├── maint
    ├── __init__.py
    ├── remove_blacklisted.py
    ├── update_best_raw.py
    ├── update_sentences.py
    ├── assign_scores_pt.py
    ├── set_visible.py
    ├── conceptnet_fixes
    │   ├── 003_bedume_is_silly.py
    │   ├── 002_are_for.py
    │   ├── 001_is_like.py
    │   ├── 004_bedume_is_still_silly.py
    │   └── 000_is_for.py
    ├── fix_stray_spaces.py
    ├── fix_concept_counts.py
    ├── undo_globalmind.py
    ├── nerf_a_user.py
    ├── fix_stray_spaces2.py
    ├── update_scores.py
    ├── fix_people_person.py
    ├── ratings_to_votes_to_events.py
    ├── fix_abnormal_concepts.py
    ├── fix_dup_frames.py
    ├── check_best_frame.py
    ├── generalize_dependencies.py
    ├── count_surfaceforms.py
    ├── count_assertions.py
    ├── simple_update_rawassertion_assertion_fkey.py
    ├── import_conceptnet_zh.py
    ├── update_rawassertion_assertion_fkey.py
    ├── fix_raw_duplicates.py
    ├── extract_concepts.py
    ├── reconcile_assertions.py
    ├── compare_sentences.py
    └── dump_csv.py
├── tools
    ├── __init__.py
    ├── make_sqlite.sh
    ├── make_sqlite.py
    ├── stats.py
    ├── load_autocorrector.py
    ├── cnet_rdf.py
    ├── dump_to_sqlite.py
    ├── create_placeholder_users.py
    └── cnet_n3.py
├── serialize
    ├── __init__.py
    └── pyyaml.py
├── conceptnet
    ├── corpus
    │   ├── parse
    │   │   ├── admin.py
    │   │   ├── __init__.py
    │   │   ├── to-be-fixed.txt
    │   │   ├── patterns.pcfg
    │   │   ├── offline_parser.py
    │   │   ├── adverbs.py
    │   │   ├── migrate_templated_qs4e.py
    │   │   ├── models.py
    │   │   ├── try_patterns.py
    │   │   ├── migrate_templated.py
    │   │   ├── build.py
    │   │   └── run_parser.py
    │   ├── migrations
    │   │   ├── __init__.py
    │   │   └── 0002_rename_tables.py
    │   ├── views.py
    │   ├── __init__.py
    │   └── admin.py
    ├── lib
    │   ├── events
    │   │   ├── __init__.py
    │   │   ├── migrations
    │   │   │   ├── __init__.py
    │   │   │   └── 0001_initial.py
    │   │   └── models.py
    │   ├── voting
    │   │   ├── templatetags
    │   │   │   └── __init__.py
    │   │   ├── __init__.py
    │   │   ├── admin.py
    │   │   ├── models.py
    │   │   └── views.py
    │   └── __init__.py
    ├── migrations
    │   ├── __init__.py
    │   └── 0002_rename_tables.py
    ├── webapi
    │   ├── __init__.py
    │   ├── templatetags
    │   │   ├── __init__.py
    │   │   └── rst.py
    │   ├── api.wsgi
    │   ├── docs.py
    │   ├── templates
    │   │   └── documentation.txt
    │   └── urls.py
    ├── concepttools
    │   ├── __init__.py
    │   ├── models.py
    │   ├── test.txt
    │   ├── lightning.txt
    │   ├── context
    │   │   ├── pink.txt
    │   │   ├── red.txt
    │   │   ├── black.txt
    │   │   ├── grey.txt
    │   │   ├── purple.txt
    │   │   ├── brown.txt
    │   │   ├── yellow.txt
    │   │   ├── white.txt
    │   │   ├── blue.txt
    │   │   ├── orange.txt
    │   │   └── green.txt
    │   ├── urls.py
    │   ├── ocean.txt
    │   ├── test.txt.html
    │   ├── amsterdam.txt
    │   ├── lightning.txt.html
    │   ├── testwords.html
    │   └── ConceptNetGUI.py
    ├── pseudo_auth
    │   ├── __init__.py
    │   ├── models.py
    │   └── backends.py
    ├── __init__.py
    ├── analogyspace.py
    ├── django_settings
    │   ├── default_db_config.py
    │   ├── db_downloader.py
    │   └── __init__.py
    ├── admin.py
    ├── network.py
    └── analogyspace2.py
├── setup.cfg
├── models.pdf
├── doc
    ├── source
    │   ├── others.rst
    │   ├── corpus.rst
    │   ├── _static
    │   │   └── graph
    │   │   │   ├── corpus.pdf
    │   │   │   ├── others.pdf
    │   │   │   ├── conceptnet4.pdf
    │   │   │   ├── conceptnet4.png
    │   │   │   ├── conceptnet_all.pdf
    │   │   │   ├── conceptnet_all.png
    │   │   │   └── others.dot
    │   ├── install.rst
    │   ├── index.rst
    │   └── conf.py
    ├── Makefile
    ├── bzr-howto.txt
    └── zero-to-conceptnet-on-xvm.txt
├── MANIFEST.in
├── test
    ├── test_normalize.py
    ├── test_users.py
    ├── test_analogyspace.py
    ├── test_conceptnet_queries.py
    ├── test_denormalized.py
    └── test_ja_harness.py
├── .gitignore
├── urls.py
├── README.rst
├── manage.py
├── setup.py
└── conf
    └── db_config.py.orig


/maint/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tools/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/serialize/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/conceptnet/corpus/parse/admin.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/conceptnet/lib/events/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/conceptnet/migrations/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/conceptnet/webapi/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/conceptnet/concepttools/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/conceptnet/corpus/parse/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/conceptnet/pseudo_auth/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/conceptnet/corpus/migrations/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/conceptnet/webapi/templatetags/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/conceptnet/lib/events/migrations/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/conceptnet/lib/voting/templatetags/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [egg_info]
2 | tag_svn_revision = 1
3 | 


--------------------------------------------------------------------------------
/conceptnet/corpus/views.py:
--------------------------------------------------------------------------------
1 | # Create your views here.
2 | 


--------------------------------------------------------------------------------
/conceptnet/lib/voting/__init__.py:
--------------------------------------------------------------------------------
1 | VERSION = (0, 1, None)


--------------------------------------------------------------------------------
/models.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/commonsense/conceptnet/HEAD/models.pdf


--------------------------------------------------------------------------------
/doc/source/others.rst:
--------------------------------------------------------------------------------
1 | .. _others:
2 | 
3 | Other modules
4 | =============
5 | 
6 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include manage.py
2 | include tools
3 | include test
4 | include serialize
5 | 


--------------------------------------------------------------------------------
/tools/make_sqlite.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | python make_sqlite.py $1 && python dump_to_sqlite.py $1
3 | 


--------------------------------------------------------------------------------
/conceptnet/concepttools/models.py:
--------------------------------------------------------------------------------
1 | from django.db import models
2 | 
3 | # Create your models here.
4 | 


--------------------------------------------------------------------------------
/doc/source/corpus.rst:
--------------------------------------------------------------------------------
1 | .. _corpus:
2 | 
3 | The :mod:`corpus` module
4 | ========================
5 | 
6 | 


--------------------------------------------------------------------------------
/conceptnet/corpus/__init__.py:
--------------------------------------------------------------------------------
1 | __import__('os').environ.setdefault('DJANGO_SETTINGS_MODULE', 'conceptnet.django_settings')
2 | 


--------------------------------------------------------------------------------
/doc/source/_static/graph/corpus.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/commonsense/conceptnet/HEAD/doc/source/_static/graph/corpus.pdf


--------------------------------------------------------------------------------
/doc/source/_static/graph/others.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/commonsense/conceptnet/HEAD/doc/source/_static/graph/others.pdf


--------------------------------------------------------------------------------
/conceptnet/lib/voting/admin.py:
--------------------------------------------------------------------------------
1 | from django.contrib import admin
2 | from voting.models import Vote
3 | 
4 | admin.site.register(Vote)
5 | 


--------------------------------------------------------------------------------
/doc/source/_static/graph/conceptnet4.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/commonsense/conceptnet/HEAD/doc/source/_static/graph/conceptnet4.pdf


--------------------------------------------------------------------------------
/doc/source/_static/graph/conceptnet4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/commonsense/conceptnet/HEAD/doc/source/_static/graph/conceptnet4.png


--------------------------------------------------------------------------------
/conceptnet/__init__.py:
--------------------------------------------------------------------------------
1 | __import__('os').environ.setdefault('DJANGO_SETTINGS_MODULE', 'conceptnet.django_settings')
2 | import conceptnet.lib
3 | 


--------------------------------------------------------------------------------
/doc/source/_static/graph/conceptnet_all.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/commonsense/conceptnet/HEAD/doc/source/_static/graph/conceptnet_all.pdf


--------------------------------------------------------------------------------
/doc/source/_static/graph/conceptnet_all.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/commonsense/conceptnet/HEAD/doc/source/_static/graph/conceptnet_all.png


--------------------------------------------------------------------------------
/conceptnet/analogyspace.py:
--------------------------------------------------------------------------------
1 | raise ImportError("conceptnet.analogyspace is deprecated. See http://csc.media.mit.edu/docs/divisi2/tutorial_aspace.html for how to use Divisi2 to run AnalogySpace.")
2 | 


--------------------------------------------------------------------------------
/test/test_normalize.py:
--------------------------------------------------------------------------------
1 | from csc.conceptnet4.models import *
2 | def test_normalize():
3 |     assert en.nl.normalize('they are running') == 'run'
4 |     assert en.nl.normalize('went') == 'go'
5 | 


--------------------------------------------------------------------------------
/conceptnet/corpus/admin.py:
--------------------------------------------------------------------------------
1 | from django.contrib import admin
2 | from conceptnet.corpus.models import Language, Sentence
3 | 
4 | admin.site.register(Language)
5 | admin.site.register(Sentence)
6 | 
7 | 


--------------------------------------------------------------------------------
/conceptnet/lib/__init__.py:
--------------------------------------------------------------------------------
1 | # Add this directory to the Python path.
2 | import sys, os.path
3 | _path = os.path.dirname(__file__)
4 | joinpath = os.path.join
5 | sys.path.insert(0, _path)
6 | 
7 | __test__ = False
8 | 


--------------------------------------------------------------------------------
/test/test_users.py:
--------------------------------------------------------------------------------
1 | from csc.conceptnet4.models import *
2 | 
3 | def test_users_do_not_explode():
4 |     a = RawAssertion.objects.filter(language=en)[0]
5 |     a.sentence.creator
6 |     a.sentence.creator.username
7 | 


--------------------------------------------------------------------------------
/maint/remove_blacklisted.py:
--------------------------------------------------------------------------------
1 | from csc.conceptnet.models import *
2 | 
3 | for concept in Concept.objects.all():
4 |     if concept.language.nl.is_blacklisted(concept.text):
5 |         concept.useful = False
6 |         concept.save()
7 | 


--------------------------------------------------------------------------------
/conceptnet/concepttools/test.txt:
--------------------------------------------------------------------------------
1 | I am feeling happy today because the sun is shining. Also because I watched a
2 | movie last night which was hilariously bad, called "Giant Octopus Versus Mega
3 | Shark". Colorless green ideas sleep furiously.
4 | 


--------------------------------------------------------------------------------
/maint/update_best_raw.py:
--------------------------------------------------------------------------------
1 | from csc.conceptnet4.models import Sentence, Assertion, RawAssertion
2 | from csc.util import queryset_foreach
3 | 
4 | queryset_foreach(Assertion.objects.all(), lambda a: a.update_raw_cache(),
5 | batch_size=100)
6 | 
7 | 


--------------------------------------------------------------------------------
/maint/update_sentences.py:
--------------------------------------------------------------------------------
1 | from csc.util import queryset_foreach
2 | from csc.corpus.models import Sentence
3 | 
4 | queryset_foreach(Sentence.objects.filter(id__lt=1367900).order_by('-id'),
5 |   lambda x: x.update_consistency(),
6 |   batch_size=100)
7 | 
8 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | db_config.py
 2 | db_password.py
 3 | *.pyc
 4 | *.pyd
 5 | *.so
 6 | build
 7 | *.egg-info
 8 | dist
 9 | pip-log.txt
10 | .achievements
11 | .coverage
12 | .bzr
13 | .bzrignore
14 | *.train
15 | *.psql
16 | *.pickle.gz
17 | conceptnet_zh_*.txt
18 | db
19 | *~
20 | 


--------------------------------------------------------------------------------
/doc/source/install.rst:
--------------------------------------------------------------------------------
 1 | .. _install:
 2 | 
 3 | How to install
 4 | ==============
 5 | If only we knew...
 6 | 
 7 | Actually. If you're even seeing this document, you're probably in contact with
 8 | someone in the Commonsense Computing group. Ask them, and they'll be able to
 9 | tell you how to install ConceptNet. At least, the way it works this week.
10 | 
11 | 


--------------------------------------------------------------------------------
/urls.py:
--------------------------------------------------------------------------------
 1 | from django.conf.urls.defaults import *
 2 | from django.contrib import admin
 3 | 
 4 | admin.autodiscover()
 5 | 
 6 | urlpatterns = patterns('',
 7 |      # Web API (REST)
 8 |      (r'^api/', include('csc.webapi.urls')),
 9 |      (r'', include('csc.webapi.urls')),
10 | 
11 | #     # ConceptTools (realm)
12 | #     (r'^api/', include('realm.urls')),
13 | )
14 | 


--------------------------------------------------------------------------------
/conceptnet/webapi/templatetags/rst.py:
--------------------------------------------------------------------------------
 1 | from django.template.defaultfilters import stringfilter
 2 | from django import template
 3 | 
 4 | register = template.Library()
 5 | 
 6 | @stringfilter
 7 | def indent(value, spaces):
 8 |     indentation = ' '*int(spaces)
 9 |     return '\n'.join(indentation+line for line in value.split('\n')).strip()
10 | register.filter('indent', indent)


--------------------------------------------------------------------------------
/conceptnet/pseudo_auth/models.py:
--------------------------------------------------------------------------------
 1 | from django.db import models
 2 | 
 3 | class LegacyUser(models.Model):
 4 |     username = models.CharField(max_length=30)
 5 |     password = models.CharField(max_length=128)
 6 |     salt = models.CharField(max_length=128,null=True)
 7 | 
 8 |     def __unicode__(self):
 9 |         return self.username
10 |     class Meta:
11 |         db_table = 'auth_user'
12 | 


--------------------------------------------------------------------------------
/conceptnet/corpus/parse/to-be-fixed.txt:
--------------------------------------------------------------------------------
 1 | Things to fix:
 2 | * "children" stems to "childran"
 3 | * is {P} makes the {P} show up as a frequency
 4 | * "of", "near"/"around" special cases for AtLocation
 5 | 
 6 | Steps that still need to be done:
 7 | * count frequencies for concepts
 8 | * Blacklist
 9 | * When you {1} you do the following: 1. {2}
10 | * merge frequencies into a few classes
11 | 
12 | 


--------------------------------------------------------------------------------
/conceptnet/concepttools/lightning.txt:
--------------------------------------------------------------------------------
 1 | Standing on a building I am a lightning rod
 2 | And all these clouds are so familiar
 3 | Descending from the mountain tops the gods are threatening.
 4 | I will return an honest soldier
 5 | 
 6 | Steady on this high rise like every lightning rod
 7 | And all these clouds are boiling over
 8 | Swimming in adrenaline the sky is caving in
 9 | but I will remain the honest soldier. 
10 | 


--------------------------------------------------------------------------------
/test/test_analogyspace.py:
--------------------------------------------------------------------------------
 1 | from nose.tools import *
 2 | from csc.conceptnet4.analogyspace import *
 3 | 
 4 | def test_basic_analogyspace():
 5 |     mat = conceptnet_2d_from_db('en', cutoff=15)
 6 |     item = mat.iteritems().next()
 7 |     key, value = item
 8 |     concept1, feature = key
 9 |     filled_side, relation, concept2 = feature
10 |     assert filled_side in ['left', 'right']
11 |     assert relation[0] == relation[0].upper()
12 |     
13 | 


--------------------------------------------------------------------------------
/conceptnet/concepttools/context/pink.txt:
--------------------------------------------------------------------------------
1 | admiration, affection, appreciation, bisexuality, calming, caring, delicacy, emotional healing, emotional maturity, ethereal, femininity, friendship, good will, gratitude, happiness, health, homosexuality, joy, june, kindness, love, lust, marriage, nurturing, passivity, peace, romance, sex, spring, sweet smelling, sweet tasting, sweetness, sympathy, truth,
2 | 
3 | bunny, cosmetics, dress, elephant, flamingo, flower, makeup, pink panther, rose, tulip


--------------------------------------------------------------------------------
/conceptnet/django_settings/default_db_config.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | # Don't use a "dot" directory on Windows. It might make Windows sad.
 4 | if os.name == 'nt':
 5 |     user_data_dir = os.path.expanduser('~/conceptnet/')
 6 | else:
 7 |     user_data_dir = os.path.expanduser('~/.conceptnet/')
 8 | 
 9 | DB_ENGINE = "sqlite3"
10 | DB_NAME = user_data_dir + "ConceptNet.db"
11 | DB_HOST = ""
12 | DB_PORT = ""
13 | DB_USER = ""
14 | DB_PASSWORD = ""
15 | DB_SCHEMAS = ""
16 | 
17 | DEBUG = True
18 | SERVE_API = True
19 | 


--------------------------------------------------------------------------------
/maint/assign_scores_pt.py:
--------------------------------------------------------------------------------
 1 | from csc.util import queryset_foreach
 2 | from csc.conceptnet4.models import Sentence, Assertion, RawAssertion, Language, Vote
 3 | 
 4 | pt = Language.get('pt')
 5 | def process(raw):
 6 |     if pt.nl.is_blacklisted(raw.surface1.text) or pt.nl.is_blacklisted(raw.surface2.text):
 7 |         raw.votes.delete()
 8 |     else:
 9 |         Vote.objects.record_vote(raw, raw.sentence.creator, 1)
10 | 
11 | queryset_foreach(RawAssertion.objects.filter(language=pt), process, batch_size=100)
12 | 
13 | 


--------------------------------------------------------------------------------
/maint/set_visible.py:
--------------------------------------------------------------------------------
 1 | from csc.util import queryset_foreach
 2 | from csc.conceptnet.models import Concept, Language
 3 | 
 4 | def set_visible(concept):
 5 |     if not concept.language.nl.is_blacklisted(concept.text):
 6 |         concept.visible=True
 7 |         concept.save()
 8 | 
 9 | def set_invisible(concept):
10 |     if concept.language.nl.is_blacklisted(concept.text):
11 |         concept.visible=False
12 |         concept.save()
13 |         
14 | queryset_foreach(Concept.objects.filter(visible=False), set_visible)
15 | 
16 | 


--------------------------------------------------------------------------------
/maint/conceptnet_fixes/003_bedume_is_silly.py:
--------------------------------------------------------------------------------
 1 | from csc.conceptnet.models import *
 2 | from csc.util import foreach
 3 | 
 4 | bedume = User.objects.get(username='bedume')
 5 | activity = Activity.objects.get(name='administrative fiat')
 6 | braw = [r for r in bedume.vote_set.all() if isinstance(r.object, RawAssertion)]
 7 | for b in braw:
 8 |     if b.object.assertion.relation.name == 'HasProperty':
 9 |         print b.object
10 |         b.object.set_rating(bedume, 0, activity)
11 |         b.object.assertion.set_rating(bedume, 0, activity)
12 | 
13 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | ConceptNet
 2 | ==========
 3 | 
 4 | ConceptNet aims to give computers access to common-sense knowledge, the kind of information that ordinary
 5 | people know but usually leave unstated.
 6 | 
 7 | The new version of ConceptNet, **ConceptNet 5**, is now maintained in a separate repository:
 8 | 
 9 | http://github.com/commonsense/conceptnet5/
10 | 
11 | See http://conceptnet5.media.mit.edu for more information.
12 | 
13 | If you're interested in ConceptNet, please join the conceptnet-users Google group:
14 | http://groups.google.com/group/conceptnet-users?hl=en
15 | 


--------------------------------------------------------------------------------
/manage.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | from django.core.management import execute_manager
 3 | try:
 4 |     from csc import django_settings as settings 
 5 | except ImportError:
 6 |     import sys
 7 |     sys.stderr.write("Error: Can't find the file 'settings.py' in the directory containing %r. It appears you've customized things.\nYou'll have to run django-admin.py, passing it your settings module.\n(If the file settings.py does indeed exist, it's causing an ImportError somehow.)\n" % __file__)
 8 |     sys.exit(1)
 9 | 
10 | if __name__ == "__main__":
11 |     execute_manager(settings)
12 | 


--------------------------------------------------------------------------------
/tools/make_sqlite.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import sys
 3 | db_name = sys.argv[1]
 4 | 
 5 | from django.conf import settings
 6 | settings.configure(
 7 |     DATABASE_ENGINE = 'sqlite3',
 8 |     DATABASE_NAME = db_name,
 9 |     INSTALLED_APPS=(
10 |         'django.contrib.auth',
11 |         'django.contrib.contenttypes',
12 |         'conceptnet.corpus',
13 |         'conceptnet',
14 |         'simplenlp',
15 |         'voting',
16 |         'events',
17 |         'south'))
18 | 
19 | from django.core.management import call_command
20 | call_command('syncdb')
21 | call_command('migrate')
22 | 
23 | 


--------------------------------------------------------------------------------
/maint/fix_stray_spaces.py:
--------------------------------------------------------------------------------
 1 | from csc.conceptnet.models import *
 2 | from csc.util import foreach
 3 | 
 4 | def fix_spaces(s):
 5 |     if (s.surface1.text.startswith(' ') or s.surface2.text.startswith(' ')):
 6 |         print s
 7 |         newsurf1 = SurfaceForm.get(s.surface1.text.strip(), s.language,
 8 |           auto_create=True)
 9 |         newsurf2 = SurfaceForm.get(s.surface2.text.strip(), s.language,
10 |           auto_create=True)
11 |         print "=>",
12 |         print s.correct_assertion(s.frame, newsurf1, newsurf2)
13 |         s.save()
14 | 
15 | foreach(RawAssertion.objects.filter(language__id='zh-Hant'), fix_spaces)
16 | 
17 | 


--------------------------------------------------------------------------------
/maint/fix_concept_counts.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | '''
 4 | Concepts keep track of their number of words. Or, they should.
 5 | '''
 6 | 
 7 | from csc.util.batch import queryset_foreach
 8 | from csc.conceptnet4.models import Concept
 9 | from django.db.models.query import Q
10 | 
11 | def fix_concept_counts():
12 |     def fix_concept(concept):
13 |         if concept.words: return
14 |         concept.words = len(concept.text.split())
15 |         concept.save()
16 | 
17 |     return queryset_foreach(
18 |         Concept.objects.filter(Q(words=0) | Q(words__isnull=True)), fix_concept)
19 | 
20 | if __name__ == '__main__':
21 |     fix_concept_counts()
22 | 


--------------------------------------------------------------------------------
/maint/undo_globalmind.py:
--------------------------------------------------------------------------------
 1 | from csc.conceptnet4.models import *
 2 | from events.models import Event, Activity
 3 | from voting.models import Vote
 4 | from csc.util import queryset_foreach
 5 | 
 6 | def nuke_it(event):
 7 |     object = event.object
 8 |     if object is None: return
 9 |     for vote in object.votes.all():
10 |         vote.delete()
11 |     object.delete()
12 | 
13 | #queryset_foreach(Event.objects.filter(content_type__id=92, activity__id=41),
14 | #nuke_it, 50)
15 | queryset_foreach(Event.objects.filter(content_type__id=90, activity__id=41),
16 | nuke_it, 50)
17 | queryset_foreach(Event.objects.filter(content_type__id=20, activity__id=41),
18 | nuke_it, 50)
19 | 
20 | 


--------------------------------------------------------------------------------
/maint/nerf_a_user.py:
--------------------------------------------------------------------------------
 1 | from csc.conceptnet4.models import *
 2 | from django.db import transaction
 3 | 
 4 | def nerf(user):
 5 |     for vote in Vote.objects.filter(user=user):
 6 |         badass = vote.object
 7 |         vote.delete()
 8 |         badass.update_score()
 9 |         print badass
10 | 
11 | @transaction.commit_on_success
12 | def nerf_bobman():
13 |     bobman = User.objects.get(username='bobMan')
14 |     crap = bobman.rawassertion_set.all()[0]
15 |     lusers = [vote.user for vote in crap.votes.all() if vote.vote == 1]
16 |     
17 |     for luser in lusers:
18 |         print
19 |         print luser
20 |         nerf(luser)
21 |         
22 | if __name__ == '__main__': nerf_bobman()


--------------------------------------------------------------------------------
/conceptnet/concepttools/context/red.txt:
--------------------------------------------------------------------------------
1 | aggression, ambition, anger, arrogance, attention, autumn, blood, bravery, career goals, cheerfulness, christmas, communism, courage, danger, debt, december, desire, determination, devil, drama, driving forces, dynamic, emergency, emotional intensity, energy, eroticism, excitement, fame, fast action, fire, force, gaudiness, gemini, generosity, good fortune, good-tasting, happiness, heat, intense passion, leadership, love, lust, mars, masculinity, passion, power, provoking, radicalism, rage, respect, revolution, risk, romance, sex, shame, socialism, speed, stimulating, stop, strength, summer, survival, urgency, vibrancy, vigor, violence, war, warmth, heart, kill, bleed
2 | 


--------------------------------------------------------------------------------
/maint/conceptnet_fixes/002_are_for.py:
--------------------------------------------------------------------------------
 1 | from csc.conceptnet.models import *
 2 | from csc.util import foreach
 3 | 
 4 | target_frame = Frame.objects.get(language=en, relation__name='UsedFor', text='{1} is for {2}')
 5 | 
 6 | def queryset():
 7 |     frame = Frame.objects.get(text='{1} are {2}', language=en, relation__name='IsA')
 8 |     got = RawAssertion.objects.filter(language=en, frame=frame)
 9 |     return got
10 | 
11 | def fix(s):
12 |     if s.surface2.text.startswith('for '):
13 |         print s
14 |         newsurf = SurfaceForm.get(s.surface2.text[4:], 'en', auto_create=True)
15 |         print "=>",
16 |         print s.correct_assertion(target_frame, s.surface1, newsurf)
17 | 
18 | foreach(queryset(), fix)
19 | 
20 | 


--------------------------------------------------------------------------------
/maint/fix_stray_spaces2.py:
--------------------------------------------------------------------------------
 1 | from csc.conceptnet.models import *
 2 | from csc.util import foreach
 3 | 
 4 | def fix_spaces(s):
 5 |     if (s.surface1.text.startswith(' ') or s.surface2.text.startswith(' ')):
 6 |         print s
 7 |         newsurf1 = SurfaceForm.get(s.surface1.text.strip(), s.language,
 8 |           auto_create=True)
 9 |         newsurf2 = SurfaceForm.get(s.surface2.text.strip(), s.language,
10 |           auto_create=True)
11 |         #print s.correct_assertion(s.frame, newsurf1, newsurf2)
12 |         s.surface1=newsurf1
13 |         s.surface2=newsurf2
14 |         s.save()
15 |         print "=>",
16 |         print s
17 | 
18 | foreach(RawAssertion.objects.filter(language__id='zh-Hant'), fix_spaces)
19 | 
20 | 


--------------------------------------------------------------------------------
/maint/update_scores.py:
--------------------------------------------------------------------------------
 1 | from csc_utils.batch import queryset_foreach
 2 | from conceptnet.models import Sentence, Assertion, RawAssertion
 3 | 
 4 | 
 5 | def update_scores():
 6 |     queryset_foreach(Assertion, lambda x: x.update_score(),
 7 |     batch_size=100)
 8 |     queryset_foreach(RawAssertion, lambda x: x.update_score(),
 9 |     batch_size=100)
10 |     # queryset_foreach(Sentence.objects.exclude(language__id='en'), lambda x: x.update_score(), batch_size=100)
11 | 
12 | def fix_raw_assertion_vote(raw):
13 |     for vote in raw.votes.all():
14 |         raw.assertion.set_rating(vote.user, vote.vote)
15 | 
16 | def update_votes():
17 |     queryset_foreach(RawAssertion, lambda x: fix_raw_assertion_vote(x), batch_size=100)
18 | 
19 | 


--------------------------------------------------------------------------------
/maint/fix_people_person.py:
--------------------------------------------------------------------------------
 1 | from csc.conceptnet4.models import RawAssertion, Concept, Assertion,\
 2 | SurfaceForm
 3 | from django.db import transaction
 4 | 
 5 | people = Concept.get('people', 'en')
 6 | person = Concept.get('person', 'en')
 7 | 
 8 | @transaction.commit_on_success
 9 | def fix_all():
10 |     for peopleform in people.surfaceform_set.all():
11 |         print peopleform
12 |         peopleform.concept = person
13 |         peopleform.save()
14 |         for raw in RawAssertion.objects.filter(surface1=peopleform):
15 |             print raw.update_assertion()
16 |         for raw in RawAssertion.objects.filter(surface2=peopleform):
17 |             print raw.update_assertion()
18 | 
19 | if __name__ == '__main__': fix_all()
20 | 
21 | 


--------------------------------------------------------------------------------
/maint/conceptnet_fixes/001_is_like.py:
--------------------------------------------------------------------------------
 1 | from csc.conceptnet.models import *
 2 | from csc.util import foreach
 3 | 
 4 | target_frame = Frame.objects.get(language=en, relation__name='ConceptuallyRelatedTo', text='{1} is like {2}')
 5 | 
 6 | def queryset():
 7 |     frame = Frame.objects.get(text='{1} is {2}', language=en, relation__name='HasProperty')
 8 |     got = RawAssertion.objects.filter(language=en, frame=frame)
 9 |     return got
10 | 
11 | def fix(s):
12 |     if s.surface2.text.startswith('like '):
13 |         print s
14 |         newsurf = SurfaceForm.get(s.surface2.text[4:], 'en', auto_create=True)
15 |         print "=>",
16 |         print s.correct_assertion(target_frame, s.surface1, newsurf)
17 | 
18 | foreach(queryset(), fix)
19 | 
20 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | from setuptools import setup, find_packages
 3 | 
 4 | packages = find_packages(exclude=['maint'])
 5 | version_str = '4.0.1'
 6 | 
 7 | setup(  name='ConceptNet',
 8 |         version=version_str,
 9 |         description='A Python API to a Semantic Network Representation of the Open Mind Common Sense Project',
10 |         author='Catherine Havasi, Robert Speer, Jason Alonso, and Kenneth Arnold',
11 |         author_email='conceptnet@media.mit.edu',
12 |         url='http://conceptnet.media.mit.edu/',
13 |         packages=packages,
14 |         include_package_data=False,
15 |         install_requires=['csc-utils >= 0.6', 'django', 'simplenlp'],
16 |         # Metadata
17 |         license = "GPL v3",
18 |         )
19 | 


--------------------------------------------------------------------------------
/conceptnet/admin.py:
--------------------------------------------------------------------------------
 1 | from django.contrib import admin
 2 | from conceptnet.models import Frequency, Frame, RawAssertion, Concept,\
 3 | Assertion, Relation
 4 | 
 5 | for model in (RawAssertion, Concept, Assertion, Relation):
 6 |     admin.site.register(model)
 7 | 
 8 | class FrequencyAdmin(admin.ModelAdmin):
 9 |     list_display = ('language', 'text', 'value')
10 |     list_filter = ('language',)
11 | admin.site.register(Frequency, FrequencyAdmin)
12 | 
13 | class FrameAdmin(admin.ModelAdmin):
14 |     list_display = ('id', 'language','relation','text','preferred')
15 |     list_filter = ('language','relation')
16 |     list_per_page = 100
17 |     fields = ('relation', 'text', 'language', 'goodness', 'frequency')
18 | admin.site.register(Frame, FrameAdmin)
19 | 


--------------------------------------------------------------------------------
/maint/ratings_to_votes_to_events.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.insert(0, '..')
 3 | import settings
 4 | from util import queryset_foreach
 5 | from events.models import Event
 6 | from voting.models import Vote
 7 | from datetime import datetime
 8 | from conceptnet4.models import Rating
 9 | 
10 | def rating_to_vote(r):
11 |     obj = r.sentence or r.raw_assertion or r.assertion
12 |     score = 0
13 |     if r.score > 0: score=1
14 |     if r.score < 0: score=-1
15 |     Vote.objects.record_vote(obj, r.user, score)
16 |     ev = Event.record_event(obj, r.user, r.activity)
17 |     ev.timestamp = r.updated
18 |     ev.save()
19 | 
20 | def progress_callback(num, den):
21 |     print num, '/', den
22 | 
23 | queryset_foreach(Rating.objects.all(), rating_to_vote)
24 | 
25 | 


--------------------------------------------------------------------------------
/conf/db_config.py.orig:
--------------------------------------------------------------------------------
 1 | try:
 2 |         # You may create a db_password.py file in your project with a PASSWORD variable.
 3 |         # This lets you keep sensitive information out of your source code repositories
 4 |         # and your installation directories.
 5 | 	from db_password import PASSWORD
 6 | except:
 7 | 	PASSWORD = ""
 8 | 	
 9 | DB_ENGINE = "postgresql_psycopg2" # 'postgresql', 'postgresql_psycopg2', 'mysql', 'sqlite3' or 'ado_mssql'.
10 | DB_NAME = "" # Or path to database file if using sqlite3.
11 | DB_HOST = "" # Not used with sqlite3.
12 | DB_PORT = "5432" # Not used with sqlite3.
13 | DB_USER = "" # Set to empty string for localhost. Not used with sqlite3.
14 | DB_PASSWORD = PASSWORD # Set to empty string for default. Not used with sqlite3.
15 | DB_SCHEMAS = "public"
16 | 


--------------------------------------------------------------------------------
/tools/stats.py:
--------------------------------------------------------------------------------
 1 | from conceptnet4.models import *
 2 | from operator import itemgetter
 3 | 
 4 | def relations_distribution(lang):
 5 |     return sorted(
 6 |         ((relation.name, relation.assertion_set.filter(language=lang).count())
 7 |          for relation in Relation.objects.filter(description__isnull=False)),
 8 |         key=itemgetter(1))
 9 | 
10 | def sample_assertions(relation, n=10):
11 |     return [assertion.nl_repr() for assertion in
12 |             Relation.get(relation).assertion_set
13 |             .filter(score__gt=0).order_by('?')[:n]]
14 | 
15 | def oldest_assertion(lang):
16 |     return Assertion.objects.filter(language=lang).order_by('-rawassertion__created')[0]
17 | 
18 | 
19 | if __name__ == '__main__':
20 |     print relations_distribution('en')
21 |     
22 |         
23 | 


--------------------------------------------------------------------------------
/conceptnet/webapi/api.wsgi:
--------------------------------------------------------------------------------
 1 | # commons2.wsgi is configured to live in projects/commons2/deploy.
 2 | 
 3 | import os
 4 | import sys
 5 | 
 6 | # redirect sys.stdout to sys.stderr for bad libraries like geopy that uses
 7 | # print statements for optional import exceptions.
 8 | sys.stdout = sys.stderr
 9 | 
10 | from os.path import abspath, dirname, join
11 | from site import addsitedir
12 | 
13 | addsitedir('/srv/conceptnet/lib/python2.6/site-packages')
14 | addsitedir('/usr/lib/pymodules/python2.6')
15 | sys.path.insert(0, '/srv/conceptnet')
16 | sys.path.insert(0, '/srv/conceptnet/conceptnet')
17 | from django.conf import settings
18 | os.environ["DJANGO_SETTINGS_MODULE"] = "conceptnet.django_settings"
19 | 
20 | from django.core.handlers.wsgi import WSGIHandler
21 | application = WSGIHandler()
22 | 
23 | 


--------------------------------------------------------------------------------
/maint/fix_abnormal_concepts.py:
--------------------------------------------------------------------------------
 1 | from csc.util import queryset_foreach
 2 | from csc.conceptnet.models import Concept, SurfaceForm, Language, Assertion
 3 | from django.db import connection
 4 | 
 5 | en = Language.get('en')
 6 | 
 7 | def fix_surface(surface):
 8 |     norm, residue = en.nl.lemma_split(surface.text)
 9 |     if norm != surface.concept.text:
10 |         print
11 |         print "surface:", surface.text.encode('utf-8')
12 |         print "concept:", surface.concept.text.encode('utf-8')
13 |         print "normal:", norm.encode('utf-8')
14 |         surface.update(norm, residue)
15 | 
16 | queryset_foreach(SurfaceForm.objects.filter(language=en),
17 |   fix_surface,
18 |   batch_size=100)
19 | 
20 | 
21 | # plan:
22 | #  fix surface form -> concept mapping
23 | #  remove obsolete concepts
24 | 


--------------------------------------------------------------------------------
/maint/fix_dup_frames.py:
--------------------------------------------------------------------------------
 1 | from csc.util import queryset_foreach
 2 | from csc.conceptnet4.models import Frame
 3 | from django.db import connection
 4 | def fix_dups(frame):
 5 |     dups = Frame.objects.filter(language=frame.language, text=frame.text,
 6 |                                 relation=frame.relation)
 7 |     for dup in dups:
 8 |         if dup.id == frame.id:
 9 |             continue
10 |         print dup
11 |         cursor = connection.cursor()
12 |         print("UPDATE raw_assertions SET frame_id=%s WHERE frame_id=%s" % (frame.id, dup.id))
13 |         cursor.execute("UPDATE raw_assertions SET frame_id=%s WHERE frame_id=%s" % (frame.id, dup.id))
14 |         dup.delete()
15 |         print
16 | 
17 | queryset_foreach(Frame.objects.all().order_by('-goodness', 'id'),
18 |   fix_dups,
19 |   batch_size=100)
20 | 
21 | 


--------------------------------------------------------------------------------
/conceptnet/concepttools/urls.py:
--------------------------------------------------------------------------------
 1 | from django.conf.urls.defaults import *
 2 | from csamoa.representation.presentation.models import Predicate
 3 | 
 4 | urlpatterns = patterns('csamoa.realm.views',
 5 |     url(r'^concept/', 'get_stemid'),
 6 |     url(r'^concept/(?P<id>\d+)/all', 'get_stem_allforms'),
 7 | )
 8 | 
 9 | # URLs:
10 | # GET /concept/?text={text,...}&language={language}
11 | #  -> gets concept id(s) for text(s)
12 | # GET /concept/{id}/canonical/ -> gets canonical form for concept
13 | # GET /concept/{id}/all/ -> gets all forms for concept
14 | # GET /concept/{id,...}/context -> gets context for the concept(s)
15 | 
16 | 
17 | # # Programmatically define the API
18 | # api = {
19 | #     'concept': {
20 | #         '__required': {
21 | #             'language': TextField,
22 | #             },
23 | #         'id': Function(get_stemid,
24 | 


--------------------------------------------------------------------------------
/conceptnet/concepttools/context/black.txt:
--------------------------------------------------------------------------------
1 | anger, anonymity, anti-establishment, authority, bad luck, binding, classic, classy, conventionality, dark, death, depth, devil, dignity, dirt, dirtiness, drama, dramatic, elegance, emptiness, evil, fear, fear of the unknown, formality, gloomy, grief, high quality, january, modern music, modernism, modernity, mourning, mysterious, mystery, night, power, prestige, protection, rebellion, reliability, remorse, repelling negativity, reservedness, sadness, secretiveness, seduction, serious, seriousness, sex, sexuality, shapeshifting, sophistication, sorrow, space, strength, strength of character, strong, style, underground, unhappiness, wealth, wicked,
2 | 
3 | bear, beetle, berry, boots, cape, cat, clay, goth, hole, ink, leather, oak, panther, pearl, raven, robes, screen, spider, suit, text, universe, void, person, deep, die, sleep
4 | 


--------------------------------------------------------------------------------
/conceptnet/concepttools/context/grey.txt:
--------------------------------------------------------------------------------
1 | ambivalence, anachronism, astral energies, balance, boredom, clairaudience, clairvoyance, communication, confusion, decay, decrepitude, depression, distinguishment, dreams, dullness, dust, elegance, fear, female power, formality, fright, glamous, goddess, graceful aging, high tech, humility, ill health, industrial, intuition, march, modernity, mourning, neutrality, ornate riches, pollution, psychometry, respect, reverence, sleekness, stability, strong emotions, subtlety, telepathy, urban sprawl, wisdom,
2 | 
3 | alien, ash, cardboard, castle, confederate, concrete, battleship, beard, brain,  cloud, chrome, computer, dolphin, duality, dust, engine, fighter plane, fish, hair, garbage can, goblin, grave, gui, metal, motor, mountain, mouse, office, pigeon, screw, rabbit, rock, seal, silver, sky, steel, stone, suit, wallflower, wehrmacht, wolf


--------------------------------------------------------------------------------
/conceptnet/concepttools/context/purple.txt:
--------------------------------------------------------------------------------
1 | ambition, arrogance, big profits, ceremony, compassion, conceit, confusion, contemplation, creativity, cruelty, deeper truth, deja vu, delicacy, devotion, dignity, dreams, ego, elegance, empathy, energy, enlightenment, envy, exaggeration, extravagance, fame, fantasy, flamboyance, gaudiness, hidden knowledge, high aspirations, homosexuality, imagination, independence, influence, intuition, invisible, lesbianism, libra, luxury, magic, majesty, may, meditation, mourning, mystery, nausea, new age spirituality, nobility, november, paganism, pride, profanity, psychic ability, religion, riches, richness, romanticism, royalty, self assurance, self-confidence, sensuality, shadow, sophistication, spiritual connection, spiritual power, spirituality, telepathy, third eye, transformation, unconscious, universal spirit, war, wealth, wicca, wisdom
2 | 


--------------------------------------------------------------------------------
/conceptnet/concepttools/context/brown.txt:
--------------------------------------------------------------------------------
1 | anachronism, appetite, autumn, basic, boorishness, calm, capricorn, coffee, comfort, conservatism, contentment, credibility, dependability, depth, dirt, down-to-earth, dullness, earth, endurance, fertility, filth, friendliness, friendships, generosity, ground, hard work, hearth, heaviness, home, inexpensive, intimacy, longevity, masculine, materialistic thoughts, natural organisms, nature, nurturing, october, outdoors, passivity, poverty, practicality, productivity, reliability, richness, roughness, rustic, rusticism, scorpio, sensuality, simplicity, special events, stability, steadfastness, strength, substance, tradition, tranquility, wholesomeness,
2 | 
3 | bark, bear, beer, bread, branch, chocolate, cocoa, dog, faeces, fox, hair, horse, monk, mud, paper bag, ppop, puddle, rabbit, shoes, soil, squirrel, sugar, tree, trunk, wolf, person
4 | 


--------------------------------------------------------------------------------
/tools/load_autocorrector.py:
--------------------------------------------------------------------------------
 1 | from csc.corpus.models import AutocorrectRule, Language
 2 | from django.db import transaction
 3 | 
 4 | print "Loading table..."
 5 | autocorrect_file = './autocorrect.txt'
 6 | autocorrect_kb = {}
 7 | items = filter(lambda line:line.strip()!='',open(autocorrect_file,'r').read().split('\n'))
 8 | lang_en = Language.objects.get(pk='EN')
 9 | 
10 | def bulk_commit(lst):
11 |     for obj in lst: obj.save()
12 | bulk_commit_wrapped = transaction.commit_on_success(bulk_commit)
13 | 
14 | print "Building entries..."
15 | ars = []
16 | for entry in items:
17 |     match = entry.split()[0]
18 |     replace_with = ' '.join(entry.split()[1:])
19 |     ar = AutocorrectRule()
20 |     ar.language = lang_en
21 |     ar.match = match
22 |     ar.replace_with = replace_with
23 |     ars.append(ar)
24 | 
25 | print "Bulk committing..."
26 | bulk_commit_wrapped(ars)
27 | 


--------------------------------------------------------------------------------
/maint/check_best_frame.py:
--------------------------------------------------------------------------------
 1 | from csc.util import queryset_foreach
 2 | from csc.conceptnet4.models import Frame, Assertion, RawAssertion, SurfaceForm
 3 | from django.db import connection
 4 | 
 5 | def check_frame(assertion):
 6 |     try:
 7 |         assertion.best_frame
 8 |     except Frame.DoesNotExist:
 9 |         print "No frame for:", assertion
10 |         assertion.best_frame = None
11 |         assertion.save()
12 |     
13 |     try:
14 |         assertion.best_raw
15 |         assertion.best_surface1
16 |         assertion.best_surface2
17 |     except (RawAssertion.DoesNotExist, SurfaceForm.DoesNotExist):
18 |         print "No raw assertion for:", assertion
19 |         assertion.best_raw = None
20 |         assertion.best_surface1 = None
21 |         assertion.best_surface2 = None
22 |         assertion.save()
23 | 
24 | queryset_foreach(Assertion.objects.all(), check_frame,
25 |   batch_size=100)
26 | 
27 | 


--------------------------------------------------------------------------------
/maint/generalize_dependencies.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.insert(0, '..')
 3 | import settings
 4 | 
 5 | from util import queryset_foreach
 6 | from corpus.models import DependencyParse
 7 | 
 8 | def generalize_dep(dep):
 9 |     if dep.linktype.startswith('prep_') or dep.linktype.startswith('prepc_'):
10 |         newlt = 'prep'
11 |     elif dep.linktype.startswith('conj_'):
12 |         newlt = 'conj'
13 |     else: return
14 | 
15 |     newdep = DependencyParse(sentence_id=dep.sentence_id,
16 |                              linktype=newlt,
17 |                              word1=dep.word1,
18 |                              word2=dep.word2,
19 |                              index1=dep.index1,
20 |                              index2=dep.index2)
21 |     newdep.save()
22 | 
23 | def progress_callback(num, den):
24 |     print num, '/', den
25 | 
26 | queryset_foreach(DependencyParse.objects.all(), generalize_dep)
27 | 
28 | 


--------------------------------------------------------------------------------
/maint/conceptnet_fixes/004_bedume_is_still_silly.py:
--------------------------------------------------------------------------------
 1 | from csc.conceptnet.models import *
 2 | from csc.conceptnet.analogyspace import *
 3 | from csc.util import foreach
 4 | 
 5 | cnet = conceptnet_2d_from_db('en')
 6 | aspace = cnet.svd()
 7 | 
 8 | bedume = User.objects.get(username='bedume')
 9 | activity = Activity.objects.get(name='administrative fiat')
10 | braw = [r for r in bedume.vote_set.all() if isinstance(r.object, RawAssertion)]
11 | for b in braw:
12 |     if b.object.assertion.relation.name == 'IsA':
13 |         print b.object
14 |         concept = b.object.assertion.concept1.text
15 |         if concept in aspace.u.label_list(0):
16 |             sim = aspace.u[concept,:].hat() * aspace.u['debbie',:].hat()
17 |             if sim > 0.9:
18 |                 print sim, b.object
19 |                 #b.object.set_rating(bedume, 0, activity)
20 |                 #b.object.assertion.set_rating(bedume, 0, activity)
21 | 
22 | 


--------------------------------------------------------------------------------
/conceptnet/concepttools/context/yellow.txt:
--------------------------------------------------------------------------------
1 | accelerated learning, action, agility, air, april, avarice, betrayal, breaking mental blocks, brightness, caution, cheerful, cheerfulness, clarity, confidence, cooperation, courage, covetousness, cowardice, cowardness, creativity, curiosity, deceit, desire to improve, devil, disease, dishonesty, dreams, earth, energy, enlightenment, enthusiasm, femininity, follower, friendship, gladness, glory, greed, happiness, harmony, hazard signs, hazards, heat, honour, hope, humour, idealism, illness, imagination, intellect, intelligence, jaundiced, jealousy, joy, leo, liberalism, light, logical imagination, loyalty, mellowness, memory, mental force, mourning, optimism, organisation, perception, philosophy, purity, quarantine, radiance, restlessness, september, sickness, sociability, social energy, summer, sun, sunlight, sunshine, taurus, uncertainty, understanding, warmth, weakness, wealth, wisdom,
2 | person, gold, golden
3 | 


--------------------------------------------------------------------------------
/maint/count_surfaceforms.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from csc.conceptnet4.models import SurfaceForm, RawAssertion
 4 | from csc.util import queryset_foreach
 5 | from django.db.models import Q
 6 | 
 7 | fixed = 0
 8 | 
 9 | def update_count(surface):
10 |     global fixed
11 |     num_raws = RawAssertion.objects.filter(Q(surface1=surface) | Q(surface2=surface)).count()
12 |     if num_raws != surface.use_count:
13 |         fixed += 1
14 |         surface.use_count = num_raws
15 |         surface.save()
16 | 
17 | def update_surfaceform_usecounts(lang):
18 |     '''Fix the num_assertions count for each concept'''
19 |     status = queryset_foreach(SurfaceForm.objects.filter(language=lang), update_count)
20 |     print 'Updated counts on %d of %d surface forms' % (fixed, status.total)
21 |     return status
22 | 
23 | if __name__=='__main__':
24 |     import sys
25 |     lang = sys.argv[1]
26 |     status = update_surfaceform_usecounts(lang)
27 | 


--------------------------------------------------------------------------------
/maint/conceptnet_fixes/000_is_for.py:
--------------------------------------------------------------------------------
 1 | from csc.conceptnet.models import *
 2 | from csc.util import foreach
 3 | 
 4 | target_frame = Frame.objects.get(language=en, relation__name='UsedFor', text='{1} is for {2}')
 5 | 
 6 | def queryset1():
 7 |     frame = Frame.objects.get(text='{1} is {2}', language=en, relation__name='HasProperty')
 8 |     got = RawAssertion.objects.filter(language=en, frame=frame)
 9 |     return got
10 | 
11 | def queryset2():
12 |     frame = Frame.objects.get(text='{1} is {2}', language=en, relation__name='ReceivesAction')
13 |     got = RawAssertion.objects.filter(language=en, frame=frame)
14 |     return got
15 | 
16 | def fix_is_for(s):
17 |     if s.surface2.text.startswith('for '):
18 |         print s
19 |         newsurf = SurfaceForm.get(s.surface2.text[4:], 'en', auto_create=True)
20 |         print "=>",
21 |         print s.correct_assertion(target_frame, s.surface1, newsurf)
22 | 
23 | foreach(queryset1(), fix_is_for)
24 | 
25 | 


--------------------------------------------------------------------------------
/conceptnet/concepttools/context/white.txt:
--------------------------------------------------------------------------------
1 | air, aries, awareness, birth, charity, chastity, clarity, clean, cleanliness, clinical, clinicism, cold, coldness, coolness, cowardice, dignity, emptiness, fairness, faith, fearfulness, feminine divinity, fire, glory, goddess, good, goodness, harsh, higher self, holiness, hope, humility, innocence, january, light, pale, peace, perfection, pisces, positivity, precision, pristine, purity, reverence, safety, security, self-sacrifice, simplicity, snow, spirituality, sterility, successful innovations, surrender, unimaginative, union, virgin, virginity, void, weak, winter, youth,
2 | 
3 | airplane, background, black, bread, bride, cat, chocolate, cloud, cocaine, cream, doctor, dove, feather, flag, flour, flower, flutter, fog, ghost, glacier, hair, horse, ice, light, macbook, marble, milk, mist,  noise, nurse, paper, panty, polar bear, porcelain, rabbit, racism, rose, salt, sand, silk, socks, snow, swam, wedding, white house, wolf, person
4 | 


--------------------------------------------------------------------------------
/conceptnet/concepttools/context/blue.txt:
--------------------------------------------------------------------------------
1 | acceptance, affection, air, aquarius, artistry, calm, cleanliness, cold, coldness, comfort, communication, compassion, confidence, consciousness, conservatism, contemplation, coolness, cooperation, creativity, dependability, depression, depth, devotion, distance, earth, empathy, eternity, faith, february, fluidity, formality, freedom. betterment of humanity, friendliness, friendship, gentleness, good fortune, harmony, heaven, heavy, higher thoughts, ice, idealism, ideas, infinity, inner strength, inspiration, intellect, intelligence, july, krishna, light, love, loyalty, masculinity, melancholy, messages, mourning, mystery, non-threatening, obscenity, openness, order, patience, peace, piety, pisces, precision, progress, protection, quiet wisdom, reassurance, relaxation, reliabilty, religion, rest, sadness, sea, security, serenity, sharing, sincerity, sky, speech, spiritual inspiration, stability, steadfastness, strength, tackiness, technology, tranquility, travel, trust, truth, truthfulness, understanding, unity, virgo, water, winter, wisdom
2 | 


--------------------------------------------------------------------------------
/conceptnet/concepttools/context/orange.txt:
--------------------------------------------------------------------------------
1 | action, aggression, ambition, amiability, anxiety, appetite, arrogance, assurance, attention-grabbing, attraction, attractiveness, autumn, balance, beginnings, buddhism, business goals, career, charity, cheap, cheer, cheerfulness, constructiveness, courage, courtesy, creativity, danger, desire, determination, earth, emotional lift, encouragement, endurance, energy, enthusiasm, excitement, expansiveness, fascination, fire, flamboyance, friendliness, friendly, fun kids colour, gaudiness, general success, goals, growing things, happiness, health, heat, hinduism, independence, intense, inviting, joy, justice, knowledge, legal matters, life, lively, low-budget, mental and appetite stimulatant, mind, mood-lightening, overemotion, playfulness, property deals, quick movement, sagittarius, self-assuredness, selling, september, sociability, social force, steadfastness, strength, success, tension, the sun, tropics, uninhibited, vibrancy, vibrant, vitality, warmth, warning, youth,
2 | 
3 | bee, brick, bronze, cake, carrot, copper, dragonfly, fire, flame, flower, goldfish, honey, leaf, marmalade, mellon, netherlands, orange, pumpkin, rust, smiley, 


--------------------------------------------------------------------------------
/maint/count_assertions.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from csc.conceptnet4.models import Concept
 4 | from csc.util import queryset_foreach
 5 | 
 6 | concepts_fixed = 0
 7 | significant = 0
 8 | 
 9 | def fix_concept(concept):
10 |     global concepts_fixed, significant
11 |     rels = concept.get_assertions(useful_only=True).count()
12 |     if rels != concept.num_assertions:
13 |         # print '%s: %d->%d' % (concept.canonical_name, concept.num_assertions, rels)
14 |         concepts_fixed += 1
15 |         if rels > 2:
16 |             significant += 1
17 |         concept.num_assertions = rels
18 |         concept.save()
19 |     if not concept.words:
20 |         concept.words = len(concept.text.split())
21 |         concept.save()
22 | 
23 | def update_assertion_counts(lang):
24 |     '''Fix the num_assertions count for each concept'''
25 |     status = queryset_foreach(Concept.objects.filter(language=lang), fix_concept)
26 |     print 'Fixed %s of %s concepts (%s with >2 rels).' % (concepts_fixed, status.total, significant)
27 |     return status
28 | 
29 | if __name__=='__main__':
30 |     import sys
31 |     lang = sys.argv[1]
32 |     status = update_assertion_counts(lang)
33 | 


--------------------------------------------------------------------------------
/conceptnet/lib/voting/models.py:
--------------------------------------------------------------------------------
 1 | from django.contrib.contenttypes import generic
 2 | from django.contrib.contenttypes.models import ContentType
 3 | from django.contrib.auth.models import User
 4 | from django.db import models
 5 | 
 6 | from voting.managers import VoteManager
 7 | 
 8 | SCORES = (
 9 |     (u'+1', +1),
10 |     (u'-1', -1),
11 | )
12 | 
13 | class Vote(models.Model):
14 |     """
15 |     A vote on an object by a User.
16 |     """
17 |     user         = models.ForeignKey(User)
18 |     content_type = models.ForeignKey(ContentType)
19 |     object_id    = models.PositiveIntegerField()
20 |     object       = generic.GenericForeignKey('content_type', 'object_id')
21 |     vote         = models.SmallIntegerField(choices=SCORES)
22 | 
23 |     objects = VoteManager()
24 | 
25 |     class Meta:
26 |         db_table = 'votes'
27 |         # One vote per user per object
28 |         unique_together = (('user', 'content_type', 'object_id'),)
29 | 
30 |     def __unicode__(self):
31 |         return u'%s: %s on %s' % (self.user, self.vote, self.object)
32 | 
33 |     def is_upvote(self):
34 |         return self.vote == 1
35 | 
36 |     def is_downvote(self):
37 |         return self.vote == -1
38 | 


--------------------------------------------------------------------------------
/maint/simple_update_rawassertion_assertion_fkey.py:
--------------------------------------------------------------------------------
 1 | from csc.conceptnet.models import RawAssertion, Concept, Assertion
 2 | from django.db import transaction
 3 | import sys
 4 | 
 5 | no_assertion = set()
 6 | nonunique = set()
 7 | 
 8 | @transaction.commit_on_success
 9 | def main():
10 |     updated_count = 0
11 | 
12 |     for raw in RawAssertion.objects.filter(predicate__id__isnull=True).iterator():
13 |         assertions = list(Assertion.objects.filter(sentence__id=raw.sentence_id))
14 |         if len(assertions) == 0:
15 |             no_assertion.add(raw.id)
16 |         elif len(assertions)==1:
17 |             updated_count += 1
18 |             if updated_count % 1000 == 1:
19 |                 sys.stderr.write('\r'+str(updated_count))
20 |                 sys.stderr.flush()
21 |                 transaction.commit_if_managed()
22 |             raw.predicate = assertions[0]
23 |             raw.save()
24 |         else:
25 |             nonunique.add(raw.id)
26 | 
27 |     print 'Updated', updated_count, 'assertions'
28 |     print 'No assertion for', len(no_assertion), 'assertions'
29 |     print 'Non-unique assertion for', len(nonunique), 'assertions'
30 | 
31 | if __name__ == '__main__':
32 |     main()
33 | 


--------------------------------------------------------------------------------
/conceptnet/concepttools/ocean.txt:
--------------------------------------------------------------------------------
 1 | All day the city's selling something
 2 | Always, the busy people spinning 'round
 3 | Busier
 4 | Dizzier
 5 | 'Til they go back home to somewhere
 6 | 
 7 | And taxies stop to say "hello"
 8 | "Want a ride? I'll take you there"
 9 | "To anywhere, just tell my driver"
10 | 
11 | The sun is casting shadows
12 | An afternoon is fading
13 | I ask, but no one knows
14 | The answer to the question
15 | My life is like an island
16 | Where does this ocean go?
17 | 
18 | Shyly, a wino sips his wine
19 | Slowly, cause to him that is all that matters
20 | He sees a cat he knows so well
21 | Now sleeping on a bench together
22 | 
23 | A woman waiting by herself, selling flowers
24 | "Please buy some, so I can help my daughter, will you?"
25 | 
26 | The man with spider eyebrows
27 | Is standing on a corner
28 | "Who wants to see a show?"
29 | His head looks like a melon
30 | He turns into an alley
31 | Then stops to blow his nose
32 | Sky is filled with neon
33 | The buildings stand electric
34 | And almost seem to glow
35 | Want answers to the question
36 | My life is like an island
37 | Where does the ocean go?
38 | I really want to know
39 | My life is like an island
40 | It's time for me now to fly
41 | Where does the ocean go?
42 | 


--------------------------------------------------------------------------------
/maint/import_conceptnet_zh.py:
--------------------------------------------------------------------------------
 1 | from csc.conceptnet.models import *
 2 | import codecs
 3 | activity, _ = Activity.objects.get_or_create(name='Pet game')
 4 | zh = Language.get('zh-Hant')
 5 | def run(filename):
 6 |     f = codecs.open(filename, encoding='utf-8')
 7 |     count = 0
 8 |     for line in f:
 9 |         if filename.endswith('1.txt') and count < 77600:
10 |             count += 1
11 |             continue
12 |         line = line.strip()
13 |         if not line: continue
14 |         username, frame_id, text1, text2 = line.split(', ')
15 |         user, _ = User.objects.get_or_create(username=username,
16 |             defaults=dict(
17 |                 first_name='',
18 |                 last_name='',
19 |                 email='',
20 |                 password='-'
21 |             )
22 |         )
23 |         frame = Frame.objects.get(id=int(frame_id))
24 |         assert frame.language == zh
25 |         try:
26 |             got = RawAssertion.make(user, frame, text1, text2, activity)
27 |             print got
28 |         except RawAssertion.MultipleObjectsReturned:
29 |             print "got multiple"
30 |     f.close()
31 | 
32 | run('conceptnet_zh_part9.txt')
33 | run('conceptnet_zh_part10.txt')
34 | run('conceptnet_zh_api.txt')
35 | 
36 | 


--------------------------------------------------------------------------------
/conceptnet/network.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Tools for working with ConceptNet as a generalized semantic network.
 3 | 
 4 | Requires the NetworkX library.
 5 | """
 6 | import networkx as nx
 7 | import codecs
 8 | from conceptnet.models import Assertion
 9 | 
10 | def make_network(lang):
11 |     """
12 |     Get the ConceptNet network for a particular language. It takes one
13 |     parameter, which is `lang`, the language ID as a string.
14 |     """
15 |     assertions = Assertion.useful.filter(language__id=lang)
16 |     graph = nx.MultiDiGraph()
17 |     for text1, text2, rel, score, freq in assertions.values_list(
18 |         'concept1__text', 'concept2__text', 'relation__name', 'score',
19 |         'frequency__value').iterator():
20 |         if text1 and text2 and text1 != text2:
21 |             graph.add_edge(text1, text2, rel=rel, score=score, freq=freq)
22 |     return graph
23 | 
24 | def export_gml(lang, filename):
25 |     f = codecs.open(filename, 'w', encoding='utf-7')
26 |     graph = make_network(lang)
27 |     nx.write_gml(graph, f)
28 |     f.close()
29 | 
30 | def export_edgelist(lang, filename):
31 |     f = codecs.open(filename, 'w', encoding='utf-8')
32 |     graph = make_network(lang)
33 |     nx.write_edgelist(graph, f, data=True, delimiter='\t')
34 |     f.close()
35 | 
36 | 


--------------------------------------------------------------------------------
/conceptnet/concepttools/context/green.txt:
--------------------------------------------------------------------------------
1 | abundance, aggression, air, august, bad spirits, balance, calm, cancer, change, christmas, coldness, compassion, contemplation, corruption, dependability, devil, diplomacy, disgrace, earth, earth mother, endurance, environment, envy, fauna, fertility, food, fresh, freshness, friendliness, generosity, gentle, go, good luck, good omens, grass, greed, growth, harmony, healing, health, hope, illness, immortality, inexperience, islam, jealousy, joy, life eternal, luck, misfortune, moderation, mold, monetary success, money, natural abundance, nature, nausea, nurturing, pastoral, peace, personal goals, physical healing, prosperity, quiet, quietude, refreshing, renewal, responsiveness, resurrection, safety, self-control, serenity, sharing, sincerity, soothing, spring, stability, tranquil, magic, trees, vegetation, vigor, vigour, vitality, wealth, youth,
2 | 
3 | acid, absinth, algae, apple, army, beryl, brown, camouflage, chemical, chemistry, cucumber, bush, caterpillar, earth, eco, emerald, forest, grass, grasshopper, goblin, ireland, jade, jungle, lawn, leaf, leprechaun, lime, lizard, matrix, moss, nature, new, nuclear waste, ok, olive, organic, park, pea, peter pan, plant, pine, radioactivity, rainforest, recycle, salad, sap, shrub, slime, snake, surgery, tea, turtle, venom, woods
4 | 


--------------------------------------------------------------------------------
/conceptnet/pseudo_auth/backends.py:
--------------------------------------------------------------------------------
 1 | from django.contrib.auth.models import User as DjangoUser, check_password
 2 | from conceptnet.pseudo_auth.models import LegacyUser
 3 | 
 4 | class LegacyBackend:
 5 |     def authenticate(self, username=None, password=None):
 6 |         try:
 7 |             # Load user object
 8 |             u = LegacyUser.objects.get(username=username)
 9 | 
10 |             # Abort if Django should handle this
11 |             if u.password.startswith('sha1$'): return None
12 |             salt = u.salt
13 | 
14 |             # Build Django-compatible password string
15 |             enc_password = 'sha1$--' + u.salt + '--$' + u.password
16 | 
17 |             # Check password
18 |             if check_password(password+'--',enc_password):
19 |                 # Migrate them to new passwords.
20 |                 u.salt = None
21 |                 u.save()
22 |                 user = self.get_user(u.id)
23 |                 user.set_password(password)
24 |                 user.save()
25 |                 return user
26 |         except LegacyUser.DoesNotExist:
27 |             return None
28 | 
29 |         # Operation Complete!
30 |         return None
31 | 
32 |     def get_user(self, user_id):
33 |         try:
34 |             return DjangoUser.objects.get(pk=user_id)
35 |         except DjangoUser.DoesNotExist:
36 |             return None
37 | 


--------------------------------------------------------------------------------
/maint/update_rawassertion_assertion_fkey.py:
--------------------------------------------------------------------------------
 1 | from csc.conceptnet.models import RawAssertion, Concept, Assertion
 2 | from django.db import transaction
 3 | 
 4 | no_assertion = set()
 5 | nonunique = set()
 6 | failed = set()
 7 | 
 8 | @transaction.commit_on_success
 9 | def main():
10 |     updated_count = 0
11 |     for raw in RawAssertion.objects.filter(predicate__id__isnull=True)[:1000].iterator():
12 |         try:
13 |             concept1 = Concept.get(raw.text1, raw.language_id)
14 |             concept2 = Concept.get(raw.text2, raw.language_id)
15 |             assertions = list(Assertion.objects.filter(stem1=concept1,
16 |                                                        stem2=concept2,
17 |                                                        predtype__id=raw.predtype_id))
18 |             if len(assertions) == 0:
19 |                 no_assertion.add(raw.id)
20 |             elif len(assertions) == 1:
21 |                 updated_count += 1
22 |                 raw.predicate = assertions[0]
23 |                 raw.save()
24 |             else:
25 |                 nonunique.add(raw.id)
26 |         except:
27 |             failed.add(raw.id)
28 | 
29 |     print 'Updated', updated_count, 'assertions'
30 |     print 'No assertion for', len(no_assertion), 'assertions'
31 |     print 'Non-unique assertion for', len(nonunique), 'assertions'
32 |     print len(failed), 'failed.'
33 | 
34 | if __name__ == '__main__':
35 |     main()
36 | 


--------------------------------------------------------------------------------
/test/test_conceptnet_queries.py:
--------------------------------------------------------------------------------
 1 | from nose.tools import *
 2 | from conceptnet.models import *
 3 | from nose.plugins.attrib import *
 4 | def setup():
 5 |     en = Language.get('en')
 6 | 
 7 | def test_assertions_exist():
 8 |     Assertion.objects.filter(language=en)[0]
 9 |     Assertion.objects.filter(language=Language.get('pt'))[0]
10 |     Assertion.objects.filter(language=Language.get('ja'))[0]
11 |     Assertion.objects.filter(language=Language.get('ko'))[0]
12 |     Assertion.objects.filter(language=Language.get('zh-Hant'))[0]
13 | 
14 | def test_relations():
15 |     relations = [a.relation.name for a in Assertion.objects.filter(concept1__text='dog', concept2__text='bark', language=en)]
16 |     assert u'CapableOf' in relations
17 | 
18 | def test_get():
19 |     Concept.get('dog', 'en')
20 |     Concept.get('the dog', 'en')
21 |     Concept.get('dogs', 'en')
22 |     Concept.get_raw('dog', 'en')
23 | 
24 | @raises(Concept.DoesNotExist)
25 | def test_normalize():
26 |     Concept.get_raw('the dog', 'en')
27 | 
28 | def test_surface_forms():
29 |     surfaces = [s.text for s in SurfaceForm.objects.filter(concept__text='run', language=en)]
30 |     assert u'run' in surfaces
31 |     assert u'to run' in surfaces
32 |     assert u'running' in surfaces
33 | 
34 | @attr('postgres')
35 | def test_raw_assertion_search():
36 |     raw = RawAssertion.objects.filter(surface1__concept__text='couch',
37 |           surface2__concept__text='sit', language=en)
38 |     assert len(raw) > 0
39 | 
40 | 


--------------------------------------------------------------------------------
/conceptnet/concepttools/test.txt.html:
--------------------------------------------------------------------------------
1 | <!doctype html>
2 | <html><body style="background-color: #bbb; color: #444;">
3 | <span style="background-color: rgb(253,231,26); padding: 0.5ex;">I am <span style="background-color: rgb(253,104,179)">feeling happy</span> <span style="background-color: rgb(0,236,246)">today</span> <span style="background-color: rgb(246,100,178)">because</span> the <span style="background-color: rgb(60,59,59)">sun is shining</span> .</span>
4 | <span style="background-color: rgb(12,4,8); padding: 0.5ex;"><span style="background-color: rgb(210,75,165)">Also</span> <span style="background-color: rgb(246,100,178)">because</span> I <span style="background-color: rgb(229,109,196)">watched a movie</span> <span style="background-color: rgb(76,31,54)">last</span> <span style="background-color: rgb(0,0,0)">night</span> which was hilariously <span style="background-color: rgb(242,87,147)">bad ,</span> <span style="background-color: rgb(254,104,179)">called</span> `` <span style="background-color: rgb(79,37,9)">Giant</span> <span style="background-color: rgb(0,0,242)">Octopus</span> Versus Mega <span style="background-color: rgb(22,22,209)">Shark</span> '' .</span>
5 | <span style="background-color: rgb(3,251,2); padding: 0.5ex;"><span style="background-color: rgb(70,69,146)">Colorless</span> <span style="background-color: rgb(4,251,2)">green</span> <span style="background-color: rgb(230,93,163)">ideas</span> <span style="background-color: rgb(13,0,0)">sleep</span> furiously <span style="background-color: rgb(253,230,243)">.</span> .</span></body></html>
6 | 


--------------------------------------------------------------------------------
/conceptnet/concepttools/amsterdam.txt:
--------------------------------------------------------------------------------
 1 | I threw away your greatest hits
 2 | You left them here the day you split
 3 | Your bass guitar and Shagg's CD
 4 | Well they don't mean that much to me right now
 5 | I'm going through your things
 6 | These days, I'm changing all my strings
 7 | 
 8 | Chorus 1:
 9 | I'm gonna write you a letter
10 | I'm gonna write you a book
11 | I wanna see your reaction
12 | I wanna see how it looks
13 | 
14 | Chorus 2:
15 | From way up on your cloud
16 | Where you've been hiding out
17 | Are you getting somewhere?
18 | Or did you get lost in Amsterdam?
19 | 
20 | You won't get too far from me
21 | believing everything you read
22 | You're wasted in the great unknown
23 | and I am finally ready to dispose
24 | of all your vintage clothes
25 | Your drugs and every secret code
26 | 
27 | Chorus 1:
28 | I'm gonna write you a letter
29 | I'm gonna write you a book
30 | I wanna see your reaction
31 | I wanna see how it looks
32 | 
33 | Chorus 2:
34 | From way up on your cloud
35 | Where you've been hiding out
36 | Are you getting somewhere?
37 | Or did you get lost in Amsterdam?
38 | 
39 | From your red balloon you were
40 | a super high tech jet fighter
41 | Floating over planet earth
42 | Come back down here, I'll show you where it hurts
43 | Take this bitter pill
44 | Is it easy to swallow?
45 | 
46 | Chorus 1:
47 | I'm gonna write you a letter
48 | I'm gonna write you a book
49 | I wanna see your reaction
50 | I wanna see how it looks
51 | 
52 | From way up on your cloud
53 | You're never coming down
54 | Are you getting somewhere?
55 | Or did you get lost in Amsterdam?
56 | 


--------------------------------------------------------------------------------
/maint/fix_raw_duplicates.py:
--------------------------------------------------------------------------------
 1 | from csc.util import queryset_foreach
 2 | from csc.conceptnet4.models import Sentence, Assertion, RawAssertion, Vote
 3 | 
 4 | def sort_and_check():
 5 |     all_raw = RawAssertion.objects.filter(language__id='zh-Hant').order_by('language', 'surface1__text', 'surface2__text', 'frame__id')
 6 |     print "Checking for duplicates."
 7 |     prev = None
 8 |     for raw in all_raw:
 9 |         print raw.id
10 |         if equivalent(prev, raw):
11 |             print (u"%s[%s] == %s[%s]" % (prev, prev.creator.username, raw, raw.creator.username)).encode('utf-8')
12 |             prev = switch_raw(raw, prev)
13 |         else:
14 |             prev = raw
15 | 
16 | def equivalent(raw1, raw2):
17 |     if raw1 is None: return False
18 |     return (raw1.language.id == raw2.language.id
19 |             and raw1.surface1.text == raw2.surface1.text
20 |             and raw1.surface2.text == raw2.surface2.text
21 |             and raw1.frame.id == raw2.frame.id)
22 | 
23 | def switch_raw(oldraw, newraw):
24 |     # avoid the generic username when possible
25 |     if newraw.creator.username == 'openmind':
26 |         oldraw, newraw = newraw, oldraw
27 |     for vote in oldraw.votes.all():
28 |         nvotes = Vote.objects.filter(user=vote.user, object_id=newraw.id).count()
29 |         if nvotes == 0:
30 |             vote.object = newraw
31 |             vote.save()
32 |         else:
33 |             vote.delete()
34 |     oldraw.delete()
35 |     newraw.update_score()
36 |     newraw.save()
37 |     return newraw
38 | 
39 | if __name__ == '__main__':
40 |     sort_and_check()
41 | 
42 | 


--------------------------------------------------------------------------------
/conceptnet/lib/events/models.py:
--------------------------------------------------------------------------------
 1 | from django.contrib.contenttypes import generic
 2 | from django.contrib.contenttypes.models import ContentType
 3 | from django.contrib.auth.models import User
 4 | from django.db import models
 5 | from datetime import datetime
 6 | 
 7 | class Activity(models.Model):
 8 |     name = models.TextField()
 9 |     def __unicode__(self):
10 |         return self.name
11 |     
12 |     @staticmethod
13 |     def get(name):
14 |         activity, created = Activity.objects.get_or_create(name=name)
15 |         return activity
16 | 
17 |     class Meta:
18 |         verbose_name_plural = 'Activities'
19 | 
20 | class Event(models.Model):
21 |     """
22 |     Indicates that an object was created or possibly modified by an Activity.
23 |     """
24 |     user         = models.ForeignKey(User)
25 |     content_type = models.ForeignKey(ContentType)
26 |     object_id    = models.PositiveIntegerField()
27 |     object       = generic.GenericForeignKey('content_type', 'object_id')
28 |     activity     = models.ForeignKey(Activity)
29 |     timestamp    = models.DateTimeField(default=datetime.now)
30 | 
31 |     @classmethod
32 |     def record_event(cls, obj, user, activity):
33 |         ctype = ContentType.objects.get_for_model(obj)
34 |         event = cls.objects.create(user=user, content_type=ctype,
35 |                            object_id=obj._get_pk_val(),
36 |                            activity=activity)
37 |         return event
38 | 
39 |     def __unicode__(self):
40 |         return u'%s: %r/%r/%r' % (self.timestamp, self.user, self.object, self.activity)
41 |     
42 |     class Meta:
43 |         ordering = ['-timestamp']
44 | 


--------------------------------------------------------------------------------
/maint/extract_concepts.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from csc.conceptnet.models import Concept
 4 | 
 5 | 
 6 | from nltk import wordnet
 7 | def in_wordnet(word):
 8 |     base = wordnet.morphy(word)
 9 |     if base is None: base = word
10 |     for d in wordnet.Dictionaries.values():
11 |         if base in d: return True
12 |         if word in d: return True
13 |     return False
14 | 
15 | 
16 | if __name__=='__main__':
17 |     import sys
18 |     lang = sys.argv[1]
19 |     outfile = open(sys.argv[2], 'w')
20 | 
21 | 
22 |     # Stopword detector
23 |     from csc.representation.parsing.tools.models import FunctionFamily
24 |     is_stopword = FunctionFamily.build_function_detector(lang, 'stop')
25 | 
26 |     import cPickle as pickle
27 |     try:
28 |         concepts = pickle.load(open('concepts_dict.pickle','rb'))
29 |     except:
30 |         concepts_qs = Concept.objects.filter(language=lang, num_predicates__gt=0)
31 |         print >> sys.stderr, "Constructing concepts dictionary"
32 |         concepts = dict(((c.text, c) for c in concepts_qs.iterator()))
33 |         pickle.dump(concepts, open('concepts_dict.pickle','wb'), -1)
34 | 
35 |     print >> sys.stderr, "Filtering concepts"
36 |     skipped1 = skipped2 = 0
37 |     for stem_text, concept in concepts.iteritems():
38 |         stem_words = stem_text.split(' ')
39 |         if any(((word not in concepts) for word in stem_words)):
40 |             print >> sys.stderr, "Skipped-1: "+ stem_text
41 |             skipped1 += 1
42 |             continue
43 |         cname = concept.canonical_name
44 |         if any(((not is_stopword(word) and not in_wordnet(word)) for word in cname.split(' '))):
45 |             print >> sys.stderr, "Skipped-2: "+ stem_text
46 |             skipped2 += 1
47 |             continue
48 |         print >> outfile, cname
49 | 
50 |     print "Skipped1: %d, Skipped2: %d, total: %d" % (skipped1, skipped2, len(concepts))
51 | 


--------------------------------------------------------------------------------
/conceptnet/corpus/parse/patterns.pcfg:
--------------------------------------------------------------------------------
 1 | NP -> N1 [0.1] | DT N1 [0.1] | AP N1 [0.1] | DT AP N1 [0.1]
 2 | NP -> Npr [0.1] | PRP [0.05] | WP [0.05] | NP PP [0.05] | NP join NP [0.05]
 3 | NP -> VG [0.05] | VG NP [0.05] | VG ADV [0.05] | VG NP P [0.04] | VG NP VP [0.01]
 4 | NP -> QUOT NP QUOT [0.05] | NP POS NP [0.05]
 5 | N1 -> NN [0.3] | NNS [0.3] | NN N1 [0.3] | NNS N1 [0.1]
 6 | Npr -> NNP [0.5] | NNP Npr [0.5]
 7 | join -> ',' [0.4] | 'and' [0.4] | 'or' [0.2]
 8 | AP -> JJ [0.1] | VBN [0.1] | PRPp [0.1] | JJR [0.1] | JJS [0.1] | CD [0.1]
 9 | AP -> AP join AP [0.1] | AP AP [0.2]
10 | AP -> JJ PP [0.1]
11 | P  -> IN [0.5] | TO [0.5]
12 | PP -> P NP [0.5] | TO VP [0.5]
13 | V  -> VB [0.3] | VBZ [0.3] | VBP [0.3] | VB V [0.05] | V RP [0.05]
14 | VG -> VBG [0.8] | VB VBG [0.1] | VBG RP [0.1]
15 | VP -> V [0.2] | V NP [0.15] | V PP [0.15] | V NP PP [0.1]
16 | VP -> STATEVB NP [0.1] | STATEVB AP [0.1] | VP ADV [0.1]
17 | VP -> ADVP V [0.02] | ADVP V NP [0.02] | ADVP V PP [0.02] | ADVP V NP PP [0.02]
18 | VP -> ADVP STATEVB NP [0.01] | ADVP STATEVB AP [0.01]
19 | STATEVB -> BE [0.5] | CHANGE [0.5]
20 | POST -> [0.9] | VBN PP [0.03] | WDT VP [0.04] | WDT S [0.03]
21 | S -> NP VP [1.0]
22 | XP -> NP [0.4] | VP [0.3] | S [0.3]
23 | PASV -> VBN [0.4] | PASV PP [0.3] | PASV ADV [0.3]
24 | BEWORD -> 'be' [0.1] | 'is' [0.15] | 'are' [0.15] | 'was' [0.1] | 'being' [0.1]
25 | BEWORD -> 'were' [0.1] | 'been' [0.1] | "'re" [0.1] | "'m" [0.1]
26 | BE -> BEWORD [0.8] | MD BEWORD [0.1] | MD RB BEWORD [0.1]
27 | MD -> 'can' [1.0]
28 | HAVE -> 'has' [0.25] | 'have' [0.25] | 'contain' [0.25] | 'contains' [0.25]
29 | DO -> 'do' [0.4] | 'does' [0.3] | 'did' [0.3]
30 | CHANGE -> 'get' [0.25] | 'gets' [0.25] | 'become' [0.25] | 'becomes' [0.25]
31 | ADV -> RB [0.5] | RP [0.3] | RBR [0.2]
32 | ADVP -> [0.9] | RB [0.025] | RB ADVP [0.025] | MD ADVP [0.025] | DO ADVP [0.025]
33 | ADVP1 -> RB [0.25] | RB ADVP [0.25] | MD ADVP [0.25] | DO ADVP [0.25]
34 | ADVP2 -> [0.9] | RB [0.075] | RB ADVP2 [0.025]
35 | 


--------------------------------------------------------------------------------
/conceptnet/corpus/parse/offline_parser.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import sys, traceback
 3 | from pcfgpattern import pattern_parse
 4 | import yaml
 5 | from conceptnet.models import Sentence, Language
 6 | from django.core.paginator import Paginator
 7 | #from django.db import transaction
 8 | 
 9 | def process_sentence(sentence):
10 |     print sentence.text.encode('utf-8')
11 |     _, frametext, reltext, matches = pattern_parse(sentence.text)
12 |     if reltext is None or reltext == 'junk': return []
13 |     else:
14 |         return [dict(id=sentence.id, frametext=frametext, reltext=reltext,
15 |         matches=matches)]
16 | 
17 | def run(file, start_page=1, end_page=1000000):
18 |     all_sentences = Sentence.objects.filter(language=Language.get('en')).order_by('id')
19 |     paginator = Paginator(all_sentences,100)
20 |     #pages = ((i,paginator.page(i)) for i in range(start_page,paginator.num_pages))
21 | 
22 |     def do_batch(sentences):
23 |         preds = []
24 |         for sentence in sentences:
25 |             try:
26 |                 preds.extend(process_sentence(sentence))
27 |             # changed to an improbable exception for now
28 |             except Exception, e:
29 |                 # Add sentence
30 |                 e.sentence = sentence
31 | 
32 |                 # Extract traceback
33 |                 e_type, e_value, e_tb = sys.exc_info()
34 |                 e.tb = "\n".join(traceback.format_exception( e_type, e_value, e_tb ))
35 | 
36 |                 # Raise again
37 |                 raise e
38 |         file.write('\n--- ')
39 |         yaml.dump_all(preds, file)
40 | 
41 |     # Process sentences
42 |     page_range = [p for p in paginator.page_range if p >= start_page and p <
43 |     end_page]
44 |     for i in page_range:
45 |         sentences = paginator.page(i).object_list
46 |         do_batch(sentences)
47 | 
48 | 
49 | if __name__ == '__main__':
50 |     start_page = int(sys.argv[1])
51 |     end_page = int(sys.argv[2])
52 |     out = open(sys.argv[3], 'w+')
53 |     run(out, start_page, end_page)
54 | 
55 | 


--------------------------------------------------------------------------------
/conceptnet/webapi/docs.py:
--------------------------------------------------------------------------------
 1 | from conceptnet.models import *
 2 | from piston.handler import BaseHandler
 3 | from piston.doc import generate_doc
 4 | from conceptnet.webapi import handlers
 5 | 
 6 | from django.test.client import Client
 7 | from django.shortcuts import render_to_response
 8 | from django.template import RequestContext, Context, loader
 9 | from django.http import HttpResponse
10 | 
11 | from docutils.core import publish_string
12 | 
13 | API_BASE = 'http://openmind.media.mit.edu'
14 | 
15 | client = Client()
16 | def documentation_view(request):
17 |     docs = []
18 |     for klass in handlers.__dict__.values():
19 |         if isinstance(klass, type) and issubclass(klass, BaseHandler):
20 |             doc = generate_doc(klass)
21 |             if doc.get_resource_uri_template():
22 |                 doc.useful_methods = [m for m in doc.get_all_methods() if m.get_doc()]
23 |                 if hasattr(klass, 'example_args'):
24 |                     args = klass.example_args
25 |                     example_url = doc.get_resource_uri_template()
26 |                     for arg, value in args.items():
27 |                         example_url = example_url.replace('{%s}' % arg, str(value))
28 |                     doc.example_url = example_url+'query.yaml'
29 |                     doc.example_result = client.get(doc.example_url).content
30 |                 doc.uri_template = doc.get_resource_uri_template()
31 |                 docs.append(doc)
32 |             elif hasattr(klass, 'example_uri'):
33 |                 doc = generate_doc(klass)
34 |                 example_url = klass.example_uri
35 |                 doc.example_url = example_url+'query.yaml'
36 |                 doc.example_result = client.get(doc.example_url).content
37 |                 doc.uri_template = klass.example_uri_template
38 |                 docs.append(doc)
39 |     docs.sort(key=lambda doc: doc.uri_template)
40 |     t = loader.get_template('documentation.txt')
41 |     rst = t.render(Context({'docs': docs, 'API_BASE': API_BASE}))
42 |     return HttpResponse(rst, mimetype='text/plain')
43 | 


--------------------------------------------------------------------------------
/maint/reconcile_assertions.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | from csc.conceptnet.models import *
 3 | from csc.corpus.models import *
 4 | from django.contrib.auth import *
 5 | from django.db import transaction
 6 | 
 7 | den = Assertion.objects.filter(raw__isnull=True).count()
 8 | 
 9 | # Add raw assertions to predicates created on Ruby Commons.
10 | if den > 0:
11 |     batch = Batch(owner=User.objects.get(id=20003),
12 |         remarks="creating raw assertions for ruby commons",
13 |         progress_den=den)
14 |     batch.save()
15 |     
16 |     num = 0
17 |     for a in Assertion.objects.filter(raw__isnull=True):
18 |         raw = RawAssertion(batch=batch, frame=a.frame, predtype=a.predtype,
19 |                            text1=a.text1, text2=a.text2, polarity=a.polarity,
20 |                            modality=a.modality, sentence=a.sentence,
21 |                            language=a.language, predicate=a)
22 |         raw.save()
23 |         a.raw = raw
24 |         a.save()
25 |         num += 1
26 |         batch.progress_num = num
27 |         batch.save()
28 |         print num, '/', den, raw
29 | 
30 | # Some raw assertions have text1 and text2 switched, and this was fixed after
31 | # the fact in their predicates. Fix that.
32 | @transaction.commit_on_success
33 | def switch_raw():
34 |     i = 0
35 |     for a in Assertion.objects.all().select_related('raw'):
36 |         if i % 1000 == 0: print i
37 |         i += 1
38 |         if (a.language.nl.normalize(a.text1) == a.language.nl.normalize(a.raw.text2) and
39 |             a.language.nl.normalize(a.text2) == a.language.nl.normalize(a.raw.text1) and
40 |             a.stem1.text != a.stem2.text):
41 |             t1, t2 = a.raw.text2, a.raw.text1
42 |             a.raw.text1 = t1
43 |             a.raw.text2 = t2
44 |             a.raw.save()
45 |             print a
46 |             print a.raw
47 |             print
48 | 
49 | switch_raw()
50 | 
51 | #for a in Assertion.objects.all():
52 | #    if a.text1 != a.raw.text1 or a.text2 != a.raw.text2:
53 | #        print a.text1, '/', a.text2, a
54 | #        print a.raw
55 | #        print
56 | 


--------------------------------------------------------------------------------
/test/test_denormalized.py:
--------------------------------------------------------------------------------
 1 | from nose.tools import *
 2 | from csc.conceptnet.models import *
 3 | from nose.plugins.attrib import *
 4 | 
 5 | activity = Activity.objects.get_or_create(name="nosetests")[0]
 6 | user1 = User.objects.get(username='rspeer')
 7 | user2 = User.objects.get(username='kcarnold')
 8 | 
 9 | def test_denormalized():
10 |     testconcept = Concept.get('test', 'en')
11 | 
12 |     raw = RawAssertion.make(
13 |       user=user1,
14 |       frame=Frame.objects.get(language=en, relation__name='HasProperty',
15 |                               text='{1} is {2}'),
16 |       text1='the test',
17 |       text2='successful',
18 |       activity=activity)
19 |     raw.set_rating(user2, 0, activity)
20 |     raw.set_rating(user1, 0, activity)
21 |     raw.delete()
22 |     raw.assertion.delete()
23 | 
24 |     testconcept.update_num_assertions()
25 |     num = testconcept.num_assertions
26 | 
27 |     raw = RawAssertion.make(
28 |       user=user1,
29 |       frame=Frame.objects.get(language=en, relation__name='HasProperty',
30 |                               text='{1} is {2}'),
31 |       text1='the test',
32 |       text2='successful',
33 |       activity=activity)
34 |     raw_id = raw.id 
35 | 
36 |     raw = RawAssertion.objects.get(id=raw_id)
37 |     assert raw.score == 1
38 |     
39 |     testconcept = Concept.get('test', 'en')
40 |     assert testconcept.num_assertions == (num + 1)
41 | 
42 |     raw.set_rating(user2, 1, activity)
43 | 
44 |     raw = RawAssertion.objects.get(id=raw_id)
45 |     assert raw.score == 2
46 |     
47 |     testconcept = Concept.get('test', 'en')
48 |     assert testconcept.num_assertions == (num + 1)
49 | 
50 |     raw.set_rating(user2, 0, activity)
51 |     raw.set_rating(user1, 0, activity)
52 |     raw.assertion.set_rating(user2, 0, activity)
53 |     raw.assertion.set_rating(user1, 0, activity)
54 | 
55 |     testconcept = Concept.get('test', 'en')
56 |     assert testconcept.num_assertions == num
57 |     
58 |     raw = RawAssertion.objects.get(id=raw_id)
59 |     assert raw.score == 0
60 | 
61 | if __name__ == '__main__':
62 |     test_denormalized()
63 | 


--------------------------------------------------------------------------------
/maint/compare_sentences.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from csc.conceptnet.models import *
 4 | from csc.corpus.models import *
 5 | #from django.contrib.auth import *
 6 | from django.db import transaction
 7 | 
 8 | def check_polarity():
 9 |     for a in Assertion.objects.all().select_related('raw'):
10 |         if a.polarity != a.raw.polarity:
11 |             print a.sentence
12 |             print a.raw.sentence
13 |             print a
14 |             print a.raw
15 |             print a.rating_set.all()
16 |             print
17 | 
18 | #check_polarity()
19 | 
20 | # conclusion: not worth fixing. The cases where they conflict are all generally
21 | # ugly, but the raw assertions (which we're keeping) are closer to correct.
22 | #
23 | # other conclusion: do not use the old csamoa ratings.
24 | 
25 | def basically_the_same(s1, s2):
26 |     def canonical(s):
27 |         return s.replace('  ', ' ').strip('. ')
28 |     return canonical(s1) == canonical(s2)
29 | 
30 | def check_raw_mistakes():
31 |     for ra in RawAssertion.objects.all().select_related('sentence'):
32 |         rawsent = ra.nl_repr()
33 |         sent = ra.sentence.text
34 |         if not basically_the_same(rawsent, sent):
35 |             print ra
36 |             print repr(rawsent)
37 |             print repr(sent)
38 |             print "batch:", ra.batch
39 |             print "predicate:", ra.predicate
40 |             print "frame:", ra.frame.id, ra.frame
41 |             betterone = False
42 |             for r2 in ra.sentence.rawassertion_set.all():
43 |                 if basically_the_same(rawsent, r2.nl_repr()):
44 |                     betterone = True
45 |                 break
46 |             if ra.predicate is None and betterone:
47 |                 print "This raw predicate should be deleted."
48 |             print
49 | 
50 | @transaction.commit_on_success
51 | def unswitch_raw():
52 |     evilbatch = Batch.objects.get(id=136)
53 |     for ra in RawAssertion.objects.filter(batch=evilbatch).select_related('frame'):
54 |         if ra.predicate is None and ra.frame.id in [1384, 1387, 1420]:
55 |             text1 = ra.text2
56 |             text2 = ra.text1
57 |             ra.text1 = text1
58 |             ra.text2 = text2
59 |             ra.save()
60 |             print ra
61 |             
62 | unswitch_raw()


--------------------------------------------------------------------------------
/conceptnet/corpus/parse/adverbs.py:
--------------------------------------------------------------------------------
 1 | import yaml
 2 | from corpus.models import Language
 3 | from conceptnet4.models import Frequency
 4 | frequencies = {
 5 |     'never': -10,
 6 |     "n't": -5,
 7 |     "doesn't": -5,
 8 |     "not": -5,
 9 |     "no": -5,
10 |     "can't": -5,
11 |     "won't": -5,
12 |     "don't": -5,
13 |     "couldn't": -5,
14 |     "wouldn't": -5,
15 |     "didn't": -5,
16 |     "shouldn't": -5,
17 |     "cannot": -5,
18 |     "isn't": -5,
19 |     "wasn't": -5,
20 |     "aren't": -5,
21 |     "weren't": -5,
22 |     'rarely': -2,
23 |     'infrequently': -2,
24 |     'few': -2,
25 |     'seldom': -2,
26 |     'hardly': -2,
27 |     'occasionally': 2,
28 |     'sometimes': 4,
29 |     'possibly': 4,
30 |     'some': 4,
31 |     'generally': 6,
32 |     'typically': 6,
33 |     'likely': 6,
34 |     'probably': 6,
35 |     'often': 6,
36 |     'oftentimes': 6,
37 |     'frequently': 6,
38 |     'usually': 8,
39 |     'most': 8,
40 |     'mostly': 8,
41 |     'almost': 9,
42 |     'always': 10,
43 |     'every': 10,
44 |     'all': 10,
45 | }
46 | en = Language.get('en')
47 | dbfreqs = {
48 |     -10: Frequency.objects.get(language=en, text=u"never"),
49 |     -5: Frequency.objects.get(language=en, text=u"not"),
50 |     -2: Frequency.objects.get(language=en, text=u"rarely"),
51 |     2: Frequency.objects.get(language=en, text=u"occasionally"),
52 |     4: Frequency.objects.get(language=en, text=u"sometimes"),
53 |     5: Frequency.objects.get(language=en, text=u""),
54 |     6: Frequency.objects.get(language=en, text=u"generally"),
55 |     8: Frequency.objects.get(language=en, text=u"usually"),
56 |     9: Frequency.objects.get(language=en, text=u"almost always"),
57 |     10: Frequency.objects.get(language=en, text=u"always"),
58 | }
59 | 
60 | def map_adverb(adv):
61 |     words = [w.lower() for w in adv.split()]
62 |     minfreq = 11
63 |     for word in words:
64 |         if word in frequencies:
65 |             minfreq = min(minfreq, frequencies[word])
66 |     if minfreq == 11: minfreq = 5
67 |     return dbfreqs[minfreq]
68 | 
69 | def demo():
70 |     adverbs = set()
71 |     for entry in yaml.load_all(open('delayed_sentences.yaml')):
72 |         if entry is None: continue
73 |         matches = entry.get('matches', {})
74 |         adv = matches.get('a')
75 |         if adv and adv not in adverbs:
76 |             print adv,
77 |             print map_adverb(adv)
78 |             adverbs.add(adv)
79 | 
80 | 


--------------------------------------------------------------------------------
/conceptnet/django_settings/db_downloader.py:
--------------------------------------------------------------------------------
 1 | import urllib, os, sys
 2 | import tarfile
 3 | SQLITE_URL = "http://conceptnet.media.mit.edu/dist/ConceptNet-sqlite.tar.gz"
 4 | 
 5 | def prompt_for_download(filename):
 6 |     print """
 7 | You don't seem to have the ConceptNet database installed. (If you do,
 8 | I couldn't find the db_config.py file that says where it is.)
 9 | 
10 | If you want, I can download the current database for you and save it as:
11 | """
12 |     print '\t'+filename
13 |     print
14 |     print "This will be a large download -- around 450 megabytes."
15 |     response = raw_input("Do you want to download the database? [Y/n] ")
16 |     if response == '' or response.lower().startswith('y'):
17 |         return download(SQLITE_URL, filename)
18 |     else:
19 |         print """
20 | Not downloading the database.
21 | The program will have to exit now. For information on setting up ConceptNet,
22 | go to: http://csc.media.mit.edu/docs/conceptnet/install.html
23 | """
24 |         return False
25 | 
26 | def _mkdir(newdir):
27 |     """
28 |     http://code.activestate.com/recipes/82465/
29 |     
30 |     works the way a good mkdir should :)
31 |         - already exists, silently complete
32 |         - regular file in the way, raise an exception
33 |         - parent directory(ies) does not exist, make them as well
34 |     """
35 |     if os.path.isdir(newdir):
36 |         pass
37 |     elif os.path.isfile(newdir):
38 |         raise OSError("A file with the same name as the desired " \
39 |                       "directory, '%s', already exists." % newdir)
40 |     else:
41 |         head, tail = os.path.split(newdir)
42 |         if head and not os.path.isdir(head):
43 |             _mkdir(head)
44 |         if tail:
45 |             os.mkdir(newdir)
46 | 
47 | 
48 | def download(rem_filename, dest_filename):
49 |     dir = os.path.dirname(dest_filename)
50 |     member = os.path.basename(dest_filename)
51 |     _mkdir(dir)
52 |     tar_filename = dir + os.path.sep + 'ConceptNet-sqlite.tar.gz'
53 |     def dlProgress(count, blockSize, totalSize):
54 |         percent = int(count*blockSize*100/totalSize)
55 |         sys.stdout.write("\r" + rem_filename + "... %2d%%" % percent)
56 |         sys.stdout.flush()
57 |     urllib.urlretrieve(rem_filename, tar_filename, reporthook=dlProgress)
58 |     tar_obj = tarfile.open(tar_filename)
59 |     print
60 |     print "Extracting."
61 |     tar_obj.extract(member, path=dir)
62 |     return True
63 | 
64 | 
65 | 


--------------------------------------------------------------------------------
/tools/cnet_rdf.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | PREFIX = 'http://conceptnet.media.mit.edu/'
 4 | 
 5 | from conceptnet.models import Assertion, Frame
 6 | 
 7 | from rdflib.Graph import Graph
 8 | from rdflib.store import Store
 9 | from rdflib import Namespace, Literal, BNode, RDF, plugin, URIRef
10 | 
11 | store = plugin.get('SQLite', Store)()
12 | store.open('db')
13 | g = Graph(store, identifier=URIRef(PREFIX+'graph/en'))
14 | 
15 | base = Namespace(PREFIX)
16 | concept = Namespace(PREFIX+'concepts/')
17 | reltype = Namespace(PREFIX+'reltypes/')
18 | frame = Namespace(PREFIX+'frames/')
19 | user = Namespace(PREFIX+'users/')
20 | language = Namespace(PREFIX+'language/')
21 | 
22 | 
23 | #surface_form_ = base['SurfaceForm']
24 | left_text_ = base['LeftText']
25 | right_text = base['RightText']
26 | 
27 | def b(thing): return base[thing]
28 | 
29 | class SuperNode(BNode):
30 |     def __init__(self):
31 |         g.add((self, RDF.type, RDF.Statement))
32 | 
33 |     def say(self, type, obj):
34 |         g.add((self, type, obj))
35 | 
36 | def add(subj, type, obj):
37 |     stmt = SuperNode()
38 |     stmt.say(RDF.subject, subj)
39 |     stmt.say(RDF.predicate, type)
40 |     stmt.say(RDF.object, obj)
41 |     return stmt
42 | 
43 | print 'Dumping assertions.'
44 | for stem1, predtype, stem2, text1, text2, frame_id, language_id, creator_id, score, sentence in Assertion.useful.filter(language='en').values_list('stem1__text', 'predtype__name', 'stem2__text',
45 |                                                                                                  'text1', 'text2', 'frame_id', 'language_id', 'creator_id', 'score', 'sentence__text').iterator():
46 |     stmt = add(concept[stem1], reltype[predtype], concept[stem2])
47 |     stmt.say(b('LeftText'), Literal(text1))
48 |     stmt.say(b('RightText'), Literal(text2))
49 |     stmt.say(b('FrameId'), frame[str(frame_id)])
50 |     stmt.say(b('Language'), language[str(language_id)])
51 |     stmt.say(b('Creator'), user[str(creator_id)])
52 |     stmt.say(b('Score'), Literal(score))
53 |     stmt.say(b('Sentence'), Literal(sentence))
54 | 
55 | g.commit()
56 | print 'Dumping frames.'
57 | for id, predtype, text, goodness in Frame.objects.filter(language='en').values_list('id', 'predtype__name', 'text', 'goodness').iterator():
58 |     ff = frame[str(id)]
59 |     g.add((ff, b('RelationType'), reltype[predtype]))
60 |     g.add((ff, b('FrameText'), Literal(text)))
61 |     g.add((ff, b('FrameGoodness'), Literal(str(goodness))))
62 | 
63 | 
64 | g.commit()
65 | 


--------------------------------------------------------------------------------
/tools/dump_to_sqlite.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # This one should run in the ConceptNet Django environment.
 3 | from conceptnet.models import Concept # just for the environment setup.
 4 | from django.db.models import get_models
 5 | from django.db.models.query import QuerySet
 6 | from csc_utils.batch import Status
 7 | import sys, sqlite3
 8 | 
 9 | models_to_dump = '''
10 | Vote RawAssertion Frame SurfaceForm Assertion
11 | Relation Frequency Concept Language
12 | Sentence User ContentType Activity Batch
13 | '''.strip().split()
14 | 
15 | models = dict((model.__name__, model) for model in get_models()
16 |               if model.__name__ in models_to_dump)
17 | 
18 | def dump_to_sqlite(conn):
19 |     cursor = conn.cursor()
20 | 
21 |     for idx, model_name in enumerate(models_to_dump):
22 |         model = models[model_name]
23 |         print >> sys.stderr, '(%2d/%2d) dumping %s' % (idx+1, len(models_to_dump), model_name)
24 |         meta = model._meta
25 |         db_table = meta.db_table
26 | 
27 |         truncate = 'DELETE FROM %s' % db_table
28 |         print truncate
29 |         cursor.execute(truncate)
30 | 
31 |         if model_name == 'User':
32 |             # User is special because we don't want to dump private info.
33 |             placeholder_timestamp = '1969-12-31 19:00:00'
34 |             sql = 'INSERT INTO %s (id, username, last_login, date_joined, first_name, last_name, email, password, is_staff, is_active, is_superuser) VALUES (?, ?, %r, %r, "", "", "", "X", 0, 1, 0)' % (db_table, placeholder_timestamp, placeholder_timestamp)
35 |             queryset = QuerySet(model).values_list('id', 'username')
36 |         else:
37 |             # Okay, so a field has a .serialize parameter on it. But the auto
38 |             # id field has this set to False. Fail. Just serialize all the
39 |             # local fields.
40 |             fields = meta.local_fields
41 |             field_names = [f.column for f in fields]
42 | 
43 |             sql = 'INSERT INTO %s (%s) VALUES (%s)' % (
44 |                 db_table,
45 |                 ', '.join(field_names),
46 |                 ', '.join('?'*len(fields)))
47 |             queryset = QuerySet(model).values_list(*(field_names)) # hm, this might not work if the db names are different.
48 | 
49 |         print sql
50 |         cursor.executemany(sql, Status.reporter(queryset, report_interval=1000))
51 |         conn.commit()
52 | 
53 |     cursor.close()
54 | 
55 | if __name__ == '__main__':
56 |     db_name = sys.argv[1]
57 |     conn = sqlite3.connect(db_name)
58 |     dump_to_sqlite(conn)
59 | 
60 | 


--------------------------------------------------------------------------------
/tools/create_placeholder_users.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import sys, os
 3 | if 'DJANGO_SETTINGS_MODULE' not in os.environ:
 4 |     print "Setting DJANGO_SETTINGS_MODULE=csamoa.settings temporarily."
 5 |     print "You may want to set that more permanently in your environment."
 6 |     print
 7 |     os.environ['DJANGO_SETTINGS_MODULE'] = 'csc.django_settings'
 8 | 
 9 | from csc.conceptnet.models import User
10 | from csc.corpus.models import Sentence
11 | from votes.models import Vote
12 | from django.db import transaction, connection
13 | from django.conf import settings
14 | 
15 | try:
16 |     cursor = connection.cursor()
17 | except:
18 |     print "Problem while connecting to the database. Check your db_config.py."
19 |     print "Original error:"
20 |     raise
21 | 
22 | users_table_error = """
23 | Use this script ONLY if you have just created a fresh ConceptNet
24 | database, imported the dump from the website, and ran
25 | `./manage.py syncdb` to add the Django tables.
26 | 
27 | When running `syncdb`, DO NOT create an admin user. It will conflict
28 | with a user that this script will add.
29 | """
30 | 
31 | try:
32 |     if User.objects.all().count() > 0:
33 |         print "Refusing to run because you already have users in the database."
34 |         print
35 |         print users_table_error
36 |         print "Original error:"
37 |         sys.exit(1)
38 | except:
39 |     print """
40 | Encountered a problem checking the users table (auth_user). Maybe it
41 | doesn't exist?"""
42 |     print
43 |     print users_table_error
44 |     print "Original error:"
45 |     raise
46 | 
47 | 
48 | ## Now the real work.
49 | 
50 | print "Getting all known uids... ",
51 | # All Assertions have Sentences, which have the same creator. So the Sentences
52 | # is the most complete list of users.
53 | print "(users...) ",
54 | uids = set(Sentence.objects.all().values_list('creator__id', flat=True).iterator())
55 | # But some users may have been raters only.
56 | print "(ratings...) ",
57 | for uid in Vote.objects.all().values_list('user__id', flat=True).iterator():
58 |     uids.add(uid)
59 | print
60 | 
61 | @transaction.commit_on_success
62 | def make_users(uids):
63 |     for uid in uids:
64 |         User.objects.create(id=uid, username='user_%d' % uid)
65 | 
66 | print "Creating %d placeholder users..." % len(uids)
67 | make_users(uids)
68 | 
69 | if settings.DATABASE_ENGINE in ('postgresql_psycopg2', 'postgresql'):
70 |     print "Resetting id sequence for PostgreSQL..."
71 |     seq = 'auth_user_id_seq'
72 |     cursor.execute('ALTER SEQUENCE %s RESTART WITH %d;' % (seq, max(uids)+1))
73 | 


--------------------------------------------------------------------------------
/conceptnet/concepttools/lightning.txt.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html><body style="background-color: rgb(160,99,165); color: #444;">
 3 | <div style="background-color: rgb(103,93,87); padding: 1ex;"><span style="background-color: rgb(166,123,124)">Standing</span> on a <span style="background-color: rgb(97,92,89)">building</span> I am a <span style="background-color: rgb(220,130,141)">lightning</span> <span style="background-color: rgb(140,112,92)">rod</span> </div>
 4 | <div style="background-color: rgb(139,118,208); padding: 1ex;">And all these <span style="background-color: rgb(136,118,208)">clouds</span> are so <span style="background-color: rgb(188,91,96)">familiar</span> </div>
 5 | <div style="background-color: rgb(143,66,80); padding: 1ex;">Descending from the <span style="background-color: rgb(92,83,155)">mountain tops</span> the <span style="background-color: rgb(214,90,187)">gods</span> are <span style="background-color: rgb(208,178,4)">threatening .</span> </div>
 6 | <div style="background-color: rgb(219,61,89); padding: 1ex;">I will <span style="background-color: rgb(177,78,123)">return</span> an <span style="background-color: rgb(232,107,183)">honest</span> <span style="background-color: rgb(217,29,64)">soldier</span> </div>
 7 | <div style="background-color: rgb(128,128,128); padding: 1ex;"></div>
 8 | <div style="background-color: rgb(245,102,172); padding: 1ex;">Steady on this <span style="background-color: rgb(136,69,19)">high rise</span> <span style="background-color: rgb(252,102,176)">like</span> <span style="background-color: rgb(13,241,14)">every</span> <span style="background-color: rgb(220,130,141)">lightning</span> <span style="background-color: rgb(140,112,92)">rod</span> </div>
 9 | <div style="background-color: rgb(134,113,205); padding: 1ex;">And all these <span style="background-color: rgb(136,118,208)">clouds</span> are <span style="background-color: rgb(162,101,150)">boiling</span> <span style="background-color: rgb(100,86,197)">over</span> </div>
10 | <div style="background-color: rgb(110,92,151); padding: 1ex;"><span style="background-color: rgb(202,105,163)">Swimming</span> in adrenaline the <span style="background-color: rgb(103,92,129)">sky</span> is <span style="background-color: rgb(46,23,15)">caving</span> in </div>
11 | <div style="background-color: rgb(211,52,88); padding: 1ex;">but I will <span style="background-color: rgb(19,7,238)">remain</span> the <span style="background-color: rgb(232,107,183)">honest</span> <span style="background-color: rgb(217,29,64)">soldier .</span> </div>
12 | <div style="background-color: rgb(128,128,128); padding: 1ex;"></div></body></html>
13 | 


--------------------------------------------------------------------------------
/conceptnet/corpus/parse/migrate_templated_qs4e.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | import csamoa
 4 | from conceptnet4.models import Assertion, Batch, RawAssertion, Frame,\
 5 |   Frequency, Relation, SurfaceForm, Concept
 6 | import conceptnet.models as cn3
 7 | from corpus.models import Sentence, Language, Activity
 8 | from django.contrib.auth.models import User
 9 | from itertools import islice
10 | import yaml
11 | from csc_utils import queryset_foreach
12 | 
13 | csamoa4_activity = Activity.objects.get(name='csamoa4 self-rating')
14 | def process_predicate(pred):
15 |     frametext = pred.frame.text
16 |     relation = Relation.objects.get(id=pred.relation.id)
17 |     sentence = pred.sentence
18 |     lang = pred.language
19 |     if pred.polarity < 0:
20 |         freq, c = Frequency.objects.get_or_create(value=-5, language=lang,
21 |         defaults=dict(text='[negative]'))
22 |     else:
23 |         freq, c = Frequency.objects.get_or_create(value=5, language=lang,
24 |         defaults=dict(text=''))
25 |     if c: freq.save()
26 | 
27 |     frame, c = Frame.objects.get_or_create(relation=relation, language=lang,
28 |                                            text=frametext,
29 |                                            defaults=dict(frequency=freq, 
30 |                                                          goodness=1))
31 |     if c: frame.save()
32 |     raw_assertion = RawAssertion.make(sentence.creator, frame, pred.text1,
33 |     pred.text2, csamoa4_activity, 1)
34 |     assertion = raw_assertion.assertion
35 |     
36 |     for rating in pred.rating_set.all():
37 |         score = rating.rating_value.deltascore
38 |         if score < -1: score = -1
39 |         if score > 1: score = 1
40 |         if rating.activity_id is None:
41 |             rating_activity = Activity.objects.get(name='unknown')
42 |         else:
43 |             rating_activity = rating.activity
44 |         sentence.set_rating(rating.user, score, rating_activity)
45 |         raw_assertion.set_rating(rating.user, score, rating_activity)
46 |         assertion.set_rating(rating.user, score, rating_activity)
47 |     return raw_assertion
48 | 
49 | def run():
50 |     #generator = yaml.load_all(open('delayed_test.yaml'))
51 |     #all_entries = list(generator)
52 | 
53 |     #activity_filter = Q()
54 |     #for actid in good_acts:
55 |     #    activity_filter |= Q(sentence__activity__id=actid)
56 |     for lang in ['it', 'fr', 'nl', 'es', 'pt']:
57 |         queryset_foreach(cn3.Predicate.objects.filter(language__id=lang),
58 |         process_predicate, batch_size=10)
59 | 
60 | if __name__ == '__main__':
61 |     run()
62 | 
63 | 


--------------------------------------------------------------------------------
/conceptnet/webapi/templates/documentation.txt:
--------------------------------------------------------------------------------
 1 | {% load rst %}
 2 | .. _webapi:
 3 | 
 4 | The ConceptNet Web API
 5 | ======================
 6 | 
 7 | You can look up information in ConceptNet using a Web-based API. The API
 8 | follows the `Representational State Transfer`_ (REST) standard, using simple
 9 | HTTP requests to interact with the server. (A prominent example of a REST API
10 | is the `Twitter API`_.)
11 | 
12 | .. _`Representational State Transfer`: http://en.wikipedia.org/wiki/Representational_State_Transfer
13 | .. _`Twitter API`: http://apiwiki.twitter.com/Twitter-API-Documentation
14 | 
15 | The URLs listed below are relative to the base URL of {{API_BASE}}. As an
16 | example, you can use the command line utility cURL to see the results of the
17 | `/api/en/concept/duck` call::
18 | 
19 |   curl {{API_BASE}}/api/en/concept/duck/
20 | 
21 | By the way, the excellent `django-piston`_ library made it much easier to write
22 | this API, its documentation, and its examples, all at the same time.
23 | 
24 | .. _`django-piston`: http://bitbucket.org/jespern/django-piston/wiki/Home
25 | 
26 | If you want to quickly get started using this Web API in Python, go to the next
27 | section, `webapi-client`_.
28 | 
29 | Output formats
30 | --------------
31 | 
32 | When the API returns an object, it will represent it as a structure of key-value
33 | mappings. This structure will, by default, be represented in JSON format.
34 | 
35 | You can request the results in a different format by adding "query.format" to
36 | the end of a URL:
37 | 
38 | - Adding `query.xml` will request the results in XML format.
39 | - Adding `query.json` will request the results in their default JSON format.
40 | - Adding `query.yaml` will request the results in YAML_ format.
41 |   
42 | .. _YAML: http://yaml.org
43 | 
44 | For example, adding "query.xml" will request the results in XML format.
45 | 
46 | The examples shown below all use YAML format, because it is fairly readable and
47 | the most compact of all these formats.
48 | 
49 | REST requests
50 | -------------
51 | 
52 | {% for doc in docs %}
53 | 
54 | {{ doc.name }}
55 | .......................................
56 | 
57 | .. function:: {{ doc.uri_template }}
58 | 
59 |     {{ doc.get_doc|default:""|safe }}
60 |     Implemented by: :class:`conceptnet.webapi.{{ doc.name }}`
61 | 
62 |     {% for method in doc.useful_methods %}
63 |     .. method:: {{ method.name }}({{ method.signature }})
64 |     
65 |         {{ method.get_doc|indent:"8"|safe }}
66 |     {% endfor %}
67 |     {% if doc.example_url %}
68 |     **Example:** `GET {{ doc.example_url }} <{{API_BASE}}{{ doc.example_url }}>`_ ::
69 |     
70 |         {{ doc.example_result|indent:"8"|safe }}
71 |     
72 |     {% endif %}
73 | {% endfor %}
74 | 


--------------------------------------------------------------------------------
/doc/Makefile:
--------------------------------------------------------------------------------
 1 | # Makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | PAPER         =
 8 | PYTHONPATH    = ..:../..
 9 | export DJANGO_SETTINGS_MODULE = settings
10 | # Internal variables.
11 | PAPEROPT_a4     = -D latex_paper_size=a4
12 | PAPEROPT_letter = -D latex_paper_size=letter
13 | ALLSPHINXOPTS   = -d build/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
14 | 
15 | .PHONY: help clean html web pickle htmlhelp latex changes linkcheck
16 | 
17 | help:
18 | 	@echo "Please use \`make <target>' where <target> is one of"
19 | 	@echo "  html      to make standalone HTML files"
20 | 	@echo "  pickle    to make pickle files"
21 | 	@echo "  json      to make JSON files"
22 | 	@echo "  htmlhelp  to make HTML files and a HTML help project"
23 | 	@echo "  latex     to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
24 | 	@echo "  changes   to make an overview over all changed/added/deprecated items"
25 | 	@echo "  linkcheck to check all external links for integrity"
26 | 
27 | clean:
28 | 	-rm -rf build/*
29 | 
30 | html:
31 | 	mkdir -p build/html build/doctrees
32 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) build/html
33 | 	@echo
34 | 	@echo "Build finished. The HTML pages are in build/html."
35 | 
36 | pickle:
37 | 	mkdir -p build/pickle build/doctrees
38 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) build/pickle
39 | 	@echo
40 | 	@echo "Build finished; now you can process the pickle files."
41 | 
42 | web: pickle
43 | 
44 | json:
45 | 	mkdir -p build/json build/doctrees
46 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) build/json
47 | 	@echo
48 | 	@echo "Build finished; now you can process the JSON files."
49 | 
50 | htmlhelp:
51 | 	mkdir -p build/htmlhelp build/doctrees
52 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) build/htmlhelp
53 | 	@echo
54 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
55 | 	      ".hhp project file in build/htmlhelp."
56 | 
57 | latex:
58 | 	mkdir -p build/latex build/doctrees
59 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) build/latex
60 | 	@echo
61 | 	@echo "Build finished; the LaTeX files are in build/latex."
62 | 	@echo "Run \`make all-pdf' or \`make all-ps' in that directory to" \
63 | 	      "run these through (pdf)latex."
64 | 
65 | changes:
66 | 	mkdir -p build/changes build/doctrees
67 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) build/changes
68 | 	@echo
69 | 	@echo "The overview file is in build/changes."
70 | 
71 | linkcheck:
72 | 	mkdir -p build/linkcheck build/doctrees
73 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) build/linkcheck
74 | 	@echo
75 | 	@echo "Link check complete; look for any errors in the above output " \
76 | 	      "or in build/linkcheck/output.txt."
77 | 


--------------------------------------------------------------------------------
/conceptnet/concepttools/testwords.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html><body style="background-color: rgb(150,78,91); color: #444;">
 3 | <div style="background-color: rgb(239,250,0); padding: 1ex;">Stars </div>
 4 | <div style="background-color: rgb(128,128,128); padding: 1ex;"></div>
 5 | <div style="background-color: rgb(130,64,20); padding: 1ex;">Dog </div>
 6 | <div style="background-color: rgb(128,128,128); padding: 1ex;"></div>
 7 | <div style="background-color: rgb(0,0,250); padding: 1ex;">Ocean </div>
 8 | <div style="background-color: rgb(128,128,128); padding: 1ex;"></div>
 9 | <div style="background-color: rgb(92,123,11); padding: 1ex;">Tree </div>
10 | <div style="background-color: rgb(128,128,128); padding: 1ex;"></div>
11 | <div style="background-color: rgb(68,55,146); padding: 1ex;">Sky </div>
12 | <div style="background-color: rgb(128,128,128); padding: 1ex;"></div>
13 | <div style="background-color: rgb(187,184,189); padding: 1ex;">Teeth </div>
14 | <div style="background-color: rgb(128,128,128); padding: 1ex;"></div>
15 | <div style="background-color: rgb(98,95,103); padding: 1ex;">Computer </div>
16 | <div style="background-color: rgb(128,128,128); padding: 1ex;"></div>
17 | <div style="background-color: rgb(146,121,105); padding: 1ex;">Person </div>
18 | <div style="background-color: rgb(128,128,128); padding: 1ex;"></div>
19 | <div style="background-color: rgb(210,139,108); padding: 1ex;">Flower </div>
20 | <div style="background-color: rgb(128,128,128); padding: 1ex;"></div>
21 | <div style="background-color: rgb(64,106,120); padding: 1ex;">Calm </div>
22 | <div style="background-color: rgb(128,128,128); padding: 1ex;"></div>
23 | <div style="background-color: rgb(244,243,245); padding: 1ex;">Birth </div>
24 | <div style="background-color: rgb(128,128,128); padding: 1ex;"></div>
25 | <div style="background-color: rgb(8,0,8); padding: 1ex;">Death </div>
26 | <div style="background-color: rgb(128,128,128); padding: 1ex;"></div>
27 | <div style="background-color: rgb(233,82,139); padding: 1ex;">Love </div>
28 | <div style="background-color: rgb(128,128,128); padding: 1ex;"></div>
29 | <div style="background-color: rgb(250,103,3); padding: 1ex;">Fire </div>
30 | <div style="background-color: rgb(128,128,128); padding: 1ex;"></div>
31 | <div style="background-color: rgb(247,96,66); padding: 1ex;">Happiness </div>
32 | <div style="background-color: rgb(128,128,128); padding: 1ex;"></div>
33 | <div style="background-color: rgb(32,15,64); padding: 1ex;">Sadness </div>
34 | <div style="background-color: rgb(128,128,128); padding: 1ex;"></div>
35 | <div style="background-color: rgb(123,11,7); padding: 1ex;">Anger </div>
36 | <div style="background-color: rgb(128,128,128); padding: 1ex;"></div>
37 | <div style="background-color: rgb(128,128,128); padding: 1ex;"></div>
38 | <div style="background-color: rgb(128,128,128); padding: 1ex;"></div></body></html>
39 | 


--------------------------------------------------------------------------------
/tools/cnet_n3.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | PREFIX = 'http://conceptnet.media.mit.edu'
 4 | 
 5 | from conceptnet.models import Assertion, Frame, RelationType, Concept
 6 | 
 7 | import codecs
 8 | ofile_raw = open('conceptnet_en_20080604.n3','w')
 9 | ofile = codecs.getwriter('utf-8')(ofile_raw)
10 | 
11 | print >>ofile, '@prefix conceptnet: <%s>.' % (PREFIX+'/')
12 | 
13 | def prefixed(type, rest):
14 |     return '<%s/%s/%s>' % (PREFIX, type, rest)
15 | 
16 | def concept(id): return prefixed('concept', id)
17 | def reltype(x): return prefixed('reltype', reltype_id2name[x])
18 | def literal(x): return '"'+x.replace('"','_')+'"'
19 | def _frame(id): return prefixed('frame', id)
20 | def language(x): return prefixed('language', x)
21 | def user(x): return prefixed('user', x)
22 | 
23 | def proplist(p):
24 |     return u'; '.join(u'conceptnet:%s %s' % (prop, val)
25 |                      for prop, val in p)
26 | 
27 | reltype_id2name = dict((x.id, x.name) for x in RelationType.objects.all())
28 | frames = set()
29 | concepts = set()
30 | 
31 | print 'Dumping assertions.'
32 | for (id, stem1_id, reltype_id, stem2_id,
33 |      text1, text2, frame_id, language_id, creator_id,
34 |      score, sentence) in Assertion.useful.filter(language='en').values_list(
35 |     'id', 'stem1_id', 'predtype_id', 'stem2_id',
36 |     'text1', 'text2', 'frame_id', 'language_id', 'creator_id',
37 |     'score', 'sentence__text').iterator():
38 | 
39 |     ofile.write('<%s/assertion/%s> ' % (PREFIX, id))
40 |     ofile.write(proplist((
41 |         ('LeftConcept', concept(stem1_id)),
42 |         ('RelationType', reltype(reltype_id)),
43 |         ('RightConcept', concept(stem2_id)),
44 |         ('LeftText', literal(text1)),
45 |         ('RightText', literal(text2)),
46 |         ('FrameId', _frame(frame_id)),
47 |         ('Language', language(language_id)),
48 |         ('Creator', user(creator_id)),
49 |         ('Score', score),
50 |         ('Sentence', literal(sentence))
51 |         )))
52 |     ofile.write('.\n')
53 | 
54 |     frames.add(frame_id)
55 |     concepts.add(stem1_id)
56 |     concepts.add(stem2_id)
57 | 
58 | ofile.flush()
59 | 
60 | print 'Dumping frames.'
61 | for id, frame in Frame.objects.in_bulk(list(frames)).iteritems():
62 |     ofile.write(_frame(id)+' ')
63 |     ofile.write(proplist((
64 |                 ('RelationType', reltype(frame.predtype_id)),
65 |                 ('FrameText', literal(frame.text)),
66 |                 ('FrameGoodness', literal(str(frame.goodness)))))
67 |                 )
68 |     ofile.write('.\n')
69 | 
70 | ofile.flush()
71 | 
72 | print 'Dumping concepts.'
73 | for id, c in Concept.objects.in_bulk(list(concepts)).iteritems():
74 |     ofile.write(concept(id)+' ')
75 |     ofile.write(proplist((
76 |                 ('NormalizedText', literal(c.text)),
77 |                 ('CanonicalName', literal(c.canonical_name))
78 |                 )))
79 |     ofile.write('.\n')
80 | 
81 | 
82 | print 'Done.'
83 | 
84 | ofile.close()
85 | 


--------------------------------------------------------------------------------
/serialize/pyyaml.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Improved YAML serializer by rspeer@mit.edu. Uses a stream of documents so that
 3 | it doesn't have to keep all database entries in memory.
 4 | 
 5 | Requires PyYaml (http://pyyaml.org/), but that's checked for in __init__.
 6 | 
 7 | To use it, add a line like this to your settings.py::
 8 |   
 9 |   SERIALIZATION_MODULES = {
10 |       'yaml': 'path.to.import.this.module'
11 |   }
12 | """
13 | 
14 | from StringIO import StringIO
15 | import yaml
16 | from django.utils.encoding import smart_unicode
17 | 
18 | try:
19 |     import decimal
20 | except ImportError:
21 |     from django.utils import _decimal as decimal # Python 2.3 fallback
22 | 
23 | from django.db import models
24 | from django.core.serializers.python import Serializer as PythonSerializer
25 | from django.core.serializers.python import Deserializer as PythonDeserializer
26 | 
27 | class DjangoSafeDumper(yaml.SafeDumper):
28 |     def represent_decimal(self, data):
29 |         return self.represent_scalar('tag:yaml.org,2002:str', str(data))
30 | 
31 | DjangoSafeDumper.add_representer(decimal.Decimal, DjangoSafeDumper.represent_decimal)
32 | 
33 | class Serializer(PythonSerializer):
34 |     """
35 |     Convert a queryset to YAML.
36 |     """
37 |     
38 |     internal_use_only = False
39 |     
40 |     def handle_field(self, obj, field):
41 |         # A nasty special case: base YAML doesn't support serialization of time
42 |         # types (as opposed to dates or datetimes, which it does support). Since
43 |         # we want to use the "safe" serializer for better interoperability, we
44 |         # need to do something with those pesky times. Converting 'em to strings
45 |         # isn't perfect, but it's better than a "!!python/time" type which would
46 |         # halt deserialization under any other language.
47 |         if isinstance(field, models.TimeField) and getattr(obj, field.name) is not None:
48 |             self._current[field.name] = str(getattr(obj, field.name))
49 |         else:
50 |             super(Serializer, self).handle_field(obj, field)
51 |     
52 |     def end_object(self, obj):
53 |         the_object = {
54 |             "model"  : smart_unicode(obj._meta),
55 |             "pk"     : smart_unicode(obj._get_pk_val(), strings_only=True),
56 |             "fields" : self._current
57 |         }
58 |         self._current = None
59 |         dumpstr = yaml.dump(the_object, Dumper=DjangoSafeDumper,
60 |         explicit_start=True, **self.options)
61 |         self.stream.write(dumpstr)
62 | 
63 |     def start_serialization(self):
64 |         self.options.pop('stream', None)
65 |         self.options.pop('fields', None)
66 |         PythonSerializer.start_serialization(self)
67 | 
68 |     def end_serialization(self):
69 |         self.stream.close()
70 | 
71 |     def getvalue(self):
72 |         return self.stream.getvalue()
73 | 
74 | def Deserializer(stream_or_string, **options):
75 |     """
76 |     Deserialize a stream or string of YAML data.
77 |     """
78 |     if isinstance(stream_or_string, basestring):
79 |         stream = StringIO(stream_or_string)
80 |     else:
81 |         stream = stream_or_string
82 |     for obj in PythonDeserializer(yaml.load_all(stream)):
83 |         yield obj
84 | 
85 | 


--------------------------------------------------------------------------------
/doc/source/index.rst:
--------------------------------------------------------------------------------
 1 | .. ConceptNet documentation master file, created by sphinx-quickstart on Fri Feb 27 14:59:14 2009.
 2 |    You can adapt this file completely to your liking, but it should at least
 3 |    contain the root `toctree` directive.
 4 | 
 5 | .. _root:
 6 | 
 7 | ConceptNet API
 8 | ==============
 9 | 
10 | Contents:
11 | 
12 | .. toctree::
13 |    :maxdepth: 2
14 | 
15 |    install
16 |    conceptnet4
17 |    corpus
18 |    others
19 | 
20 | Overview and apology
21 | --------------------
22 | 
23 | The current ConceptNet API has the ability to access two versions of the
24 | database: ConceptNet 3 and the experimental ConceptNet 4. We call this the
25 | "ConceptNet 3.5" API, pronounced "ConceptNet three and a half".
26 | 
27 | Except this isn't quite a release of ConceptNet 3.5 yet. We're working on it.
28 | 
29 | Most of our released code, as well of our released database, take the form of
30 | ConceptNet 3. However, ConceptNet 3 has become a bit of a mess as a result of
31 | years of research and paper deadlines.
32 | 
33 | This documentation, then, will mostly document ConceptNet 4. Much of what we
34 | say will work about the same in ConceptNet 3. But some of it won't. We're sorry
35 | about that.
36 | 
37 | How does this code work?
38 | ------------------------
39 | The answer from 30,000 feet up is simple: It's Django.
40 | 
41 | Django is a Python framework for working with databases and web applications.
42 | All of ConceptNet is represented as Django models that interact with each other
43 | and with a database. We don't use the web application part -- not here, at
44 | least -- but we provide the appropriate hooks so that ConceptNet can power a
45 | Django web application. (Because it does. It's at
46 | http://openmind.media.mit.edu.)
47 | 
48 | The code is divided into a few main modules, or *apps*:
49 | 
50 | - :mod:`corpus`, representing the sentences of glorious, ambiguous natural
51 |   language that our contributors have provided us with.
52 | - :mod:`conceptnet` (or :mod:`conceptnet4`), representing the structured
53 |   assertions that we have parsed from the corpus.
54 | - :mod:`events`, which lets us keep track of how, when, and why various objects
55 |   came into being.
56 | - (:mod:`voting`, which actually isn't by us at all; it's the `django-voting`_
57 |   package by Jonathan Buchanan.)
58 | 
59 | .. _`django-voting`: http://code.google.com/p/django-voting/
60 | 
61 | :mod:`conceptnet` and :mod:`conceptnet4` are two conflicting implementations of the
62 | same idea. In :file:`settings.py`, we refer to :mod:`conceptnet`. If you have a
63 | database of ConceptNet 4 and a desire to live on the edge, you can change it to
64 | :mod:`conceptnet4`.
65 | 
66 | 
67 | Model diagram
68 | -------------
69 | Each app contains several *models*, representing objects that are stored in a
70 | database. The information in ConceptNet is represented by these models and
71 | their relationships to each other.
72 | 
73 | .. image:: _static/graph/conceptnet_all.png
74 |    :width: 600
75 |    :alt: ConceptNet 4 model diagram
76 |    :target: _static/graph/conceptnet_all.png
77 | 
78 | (`PDF version`_)
79 | 
80 | .. _`PDF version`: _static/graph/conceptnet_all.pdf
81 | 
82 | Components
83 | ----------
84 | - :ref:`conceptnet4`
85 | - :ref:`corpus`
86 | - :ref:`others`
87 | 
88 | Indices and tables
89 | ==================
90 | 
91 | * :ref:`genindex`
92 | * :ref:`modindex`
93 | * :ref:`search`
94 | 
95 | 


--------------------------------------------------------------------------------
/maint/dump_csv.py:
--------------------------------------------------------------------------------
 1 | from csc.conceptnet.models import Concept, Assertion, Sentence, Frame
 2 | from csc.corpus.models import TaggedSentence
 3 | import csv
 4 | 
 5 | def dump_assertion_sentences(lang, f):
 6 |     writer = csv.writer(f)
 7 |     writer.writerow(('id', 'creator', 'score', 'text'))
 8 |     for id, username, score, text in Assertion.objects.filter(language=lang).values_list('id','creator__username', 'score','sentence__text').iterator():
 9 |         writer.writerow((id, username.encode('utf-8'), score, text.encode('utf-8')))
10 | 
11 | def dump_all_sentences(lang, f):
12 |     writer = csv.writer(f)
13 |     writer.writerow(('id', 'creator', 'created_on', 'activity', 'text'))
14 |     for id, username, created_on, activity, text in Sentence.objects.filter(language=lang).values_list('id','creator__username','created_on', 'activity__name', 'text').iterator():
15 |         writer.writerow((id, username.encode('utf-8'), created_on,
16 |                          activity, text.encode('utf-8')))
17 | 
18 | def dump_concepts(lang, f):
19 |     writer = csv.writer(f)
20 |     writer.writerow(('id', 'num_assertions', 'normalized_text', 'canonical_name'))
21 |     for c in Concept.objects.filter(language=lang).iterator():
22 |         writer.writerow((c.id, c.num_predicates, c.text.encode('utf-8'),
23 |                          c.canonical_name.encode('utf-8')))
24 | 
25 | def dump_assertions(lang, f):
26 |     writer = csv.writer(f)
27 |     writer.writerow(('id', 'sentence', 'relation_type', 'text1', 'text2', 'stem1_id', 'stem2_id', 'frame_id', 'score', 'creator'))
28 |     for id, sentence, relation_type, text1, text2, stem1_id, stem2_id, frame_id, score, creator in Assertion.objects.filter(language=lang).values_list(
29 |         'id', 'sentence__text', 'predtype__name', 'text1', 'text2',
30 |         'stem1_id', 'stem2_id', 'frame_id', 'score', 'creator__username'
31 |         ).iterator():
32 |         writer.writerow((
33 |                 id, sentence.encode('utf-8'), relation_type,
34 |                 text1.encode('utf-8'), text2.encode('utf-8'),
35 |                 stem1_id, stem2_id, frame_id, score,
36 |                 creator.encode('utf-8')
37 |                 ))
38 | 
39 | def dump_frames(lang, f):
40 |     writer = csv.writer(f)
41 |     writer.writerow(('id', 'relation_type', 'text', 'goodness'))
42 |     for id, relation_type, text, goodness in Frame.objects.filter(language=lang).values_list(
43 |         'id', 'predtype__name', 'text', 'goodness'
44 |         ).iterator():
45 |         writer.writerow((
46 |                 id, relation_type,
47 |                 text.encode('utf-8'),
48 |                 goodness
49 |                 ))
50 | 
51 | def dump_tagged_sentences(lang, f):
52 |     writer = csv.writer(f)
53 |     writer.writerow(('id', 'text'))
54 |     for id, text in TaggedSentence.objects.filter(language=lang).values_list(
55 |         'id', 'text'
56 |         ).iterator():
57 |         writer.writerow((
58 |                 id, text.encode('utf-8')
59 |                 ))
60 | 
61 | if __name__=='__main__':
62 |     import sys
63 |     name, lang = sys.argv
64 | 
65 |     dump_assertion_sentences(lang, open(lang+'_assertion_sentences.csv','w'))
66 |     dump_all_sentences(lang, open(lang+'_all_sentences.csv','w'))
67 |     dump_concepts(lang, open(lang+'_concepts.csv','w'))
68 |     dump_assertions(lang, open(lang+'_assertions.csv','w'))
69 |     dump_frames(lang, open(lang+'_frames.csv','w'))
70 | 


--------------------------------------------------------------------------------
/conceptnet/corpus/parse/models.py:
--------------------------------------------------------------------------------
  1 | from django.db import models
  2 | from conceptnet.corpus.models import Language
  3 | from conceptnet.models import Relation
  4 | 
  5 | class FunctionFamilyDetector(object):
  6 |     def __init__(self,kb,language,family):
  7 |         self.language = language
  8 |         self.kb = kb
  9 |         self.family = family
 10 | 
 11 |     def __str__(self):
 12 |         return '<' + self.language.id + ': ' + \
 13 |                 'function words (family=' + self.family + ')>'
 14 | 
 15 |     def __call__(self,word):
 16 |         return (word in self.kb)
 17 | 
 18 | 
 19 | class FunctionWord(models.Model):
 20 |     """ a word of particular significance to a parser """
 21 |     language = models.ForeignKey(Language)
 22 |     word = models.TextField()
 23 |     unique_together = (('language', 'word'),)
 24 | 
 25 |     def __str__(self):
 26 |         return "<" + self.language.id + ":" + self.word + ">"
 27 | 
 28 |     class Meta:
 29 |         db_table = 'functionwords'
 30 | 
 31 | class FunctionFamily(models.Model):
 32 |     """ defines a family of function words """
 33 |     family = models.TextField()
 34 |     f_word = models.ForeignKey(FunctionWord)
 35 |     unique_together = (('family', 'f_word'),)
 36 | 
 37 |     def __str__(self):
 38 |         return self.family + ": " + str(self.f_word)
 39 | 
 40 |     class Meta:
 41 |         db_table = 'functionfamilies'
 42 | 
 43 |     @staticmethod
 44 |     def build_function_detector(language, family):
 45 |         # Prepare the kb
 46 |         words = list(FunctionFamily.objects.filter(family=family,f_word__language=language).values_list('f_word__word', flat=True))
 47 | 
 48 |         return FunctionFamilyDetector(words,language,family)
 49 | 
 50 | class ParsingPattern(models.Model):
 51 |     pattern = models.TextField(blank=False)
 52 |     predtype = models.ForeignKey(Relation)
 53 |     polarity = models.IntegerField()
 54 |     sort_order = models.IntegerField()
 55 |     language = models.ForeignKey(Language)
 56 | 
 57 |     class Meta:
 58 |         db_table = 'parsing_patterns'
 59 | 
 60 | 
 61 | class SecondOrderPattern(models.Model):
 62 |     regex = models.TextField()
 63 |     language = models.ForeignKey(Language)
 64 |     use_group = models.IntegerField(default=0)
 65 |     abort = models.BooleanField(default=False)
 66 | 
 67 |     def __str__(self):
 68 |         return "(" + self.language.id + ") /" + self.regex + "/"
 69 | 
 70 |     def compile(self):
 71 |         self._compiled_regex = re.compile( self.regex )
 72 | 
 73 |     def __call__(self, text):
 74 |         if not hasattr( self, '_compiled_regex' ): self.compile()
 75 |         return self._compiled_regex.search(text)
 76 | 
 77 |     class Meta:
 78 |         db_table = 'secondorderpatterns'
 79 | 
 80 |     class SecondOrderSplitter:
 81 |         def __init__(self,patterns,language):
 82 |             self.language = language
 83 |             self.patterns = patterns
 84 | 
 85 |         def __call__(self,text):
 86 |                  # FIXME: THIS IS A HIDEOUSLY USELESS ROUTINE
 87 |             for pattern in self.patterns:
 88 |                 m = pattern(text)
 89 |                 if m:
 90 |                     if pattern.abort: text = ''
 91 |                     else: text = m.groups()[pattern.use_group]
 92 |             return [text]
 93 | 
 94 |         def __str__(self):
 95 |             return "Second order splitter (" + self.language.id + ")"
 96 | 
 97 |     @staticmethod
 98 |     def build_splitter(language):
 99 |         return SecondOrderPattern.SecondOrderSplitter(language.secondorderpattern_set.all(), language)
100 | 


--------------------------------------------------------------------------------
/doc/bzr-howto.txt:
--------------------------------------------------------------------------------
 1 | Common Sense Computing and Bazaar
 2 | =================================
 3 | 
 4 | 
 5 | First-time setup
 6 | ----------------
 7 | * Install Bazaar (bazaar-vcs.org)
 8 | * Sign up for Launchpad (launchpad.net)
 9 | * Join the Commonsense Computing team (http://launchpad.net/~commonsense)
10 | 
11 | 
12 | Working on a project
13 | --------------------
14 | 
15 | Start by making a branch of the project you're working on:
16 |   bzr branch lp:conceptnet my_csamoa_branch
17 | (This gives you a local working directory called my_csamoa_branch.)
18 | 
19 | Hack on the code.
20 | 
21 | If you create new files, add them:
22 |   bzr add filename
23 | 
24 | From time to time, commit:
25 |   bzr commit -m "this is my highly informative commit message"
26 | This commits to _your_ version-controlled repository. It can't mess with anyone else. It's safe.
27 | 
28 | To incorporate new things that happen on the trunk, you need to _merge_:
29 |   bzr merge lp:conceptnet     # get your branch up to date with what's changed
30 |   bzr commit -m "Merged"
31 | 
32 | If for some reason your working copy is out of date:
33 |   bzr update
34 | 
35 | When it's ready for prime time, push it back into the trunk:
36 |   bzr push lp:conceptnet
37 |   
38 | If the trunk has changes you haven't merged, you'll need to merge before you can push.
39 | 
40 | 
41 | I don't want my own branch, I just want to use this like SVN
42 | ------------------------------------------------------------
43 | 
44 | Okay. This makes perfect sense for a quick change, but if you make a habit of this you're probably going to get in someone's way.
45 | 
46 | Instead of branching, get a _checkout_:
47 |   bzr checkout lp:conceptnet
48 | 
49 | A checkout is a working copy whose repository is somewhere else. When you commit, it commits to that repository. This is how everything worked in Subversion.
50 | 
51 | To pull in new stuff from the repository:
52 |   bzr update
53 |   
54 | To commit your changes to the repository:
55 |   bzr commit -m "extremely informative message"
56 |   
57 | 
58 | Checking out the same branch somewhere else
59 | -------------------------------------------
60 | You've made a branch on one computer, and you want to work with the same branch on another computer. No problem: make a checkout of it.
61 |   bzr checkout bzr+ssh://your.host.name/path/to/your/branch
62 |   
63 | Now you have multiple checkouts, and you can update, commit, etc. just like above.
64 | 
65 | This also makes sense if you want to work on some minor branch that's on Launchpad (like ~commonsense/conceptnet/new-caledonia) without re-branching it. Check out that branch and commit to it.
66 | 
67 | 
68 | Sharing a branch
69 | ----------------
70 | If you want someone else to be able to work with your branch, you probably want it hosted on Launchpad instead of your own computer. Here's how to do that:
71 | 
72 |   bzr push lp:~username/project/branch-name
73 |   
74 | For example, Rob might do this:
75 |   bzr push lp:~rspeer/conceptnet/speed-up-the-lemmatizer
76 |   
77 | That's right, you can just make up a URL like that and suddenly Launchpad is hosting a branch for you. Now make your branch into a checkout of that new hosted branch:
78 |   bzr bind lp:~username/project/branch-name
79 | 
80 | 
81 | I screwed up! Shit shit shit.
82 | -----------------------------
83 | If you committed something you didn't mean to, you can fix it:
84 |   bzr uncommit
85 | 
86 | If you added something you meant to be unversioned:
87 |   bzr remove --keep filename
88 | 
89 | If you want to go back to a previous revision, look up how to use bzr merge -r.
90 | 
91 | If you pushed to somewhere you didn't mean to, check out that branch and bzr merge -r it back to something sane.
92 | 
93 | 
94 | 


--------------------------------------------------------------------------------
/conceptnet/webapi/urls.py:
--------------------------------------------------------------------------------
 1 | from django.conf.urls.defaults import *
 2 | from piston.resource import Resource
 3 | from conceptnet.webapi.docs import documentation_view
 4 | from conceptnet.webapi.handlers import *
 5 | 
 6 | # This gives a way to accept "query.foo" on the end of the URL to set the
 7 | # format to 'foo'. "?format=foo" works as well.
 8 | Q = r'(query\.(?P<emitter_format>.+))?$'
 9 | 
10 | urlpatterns = patterns('',
11 |     url(r'^(?P<lang>[^/]+)/'+Q,
12 |         Resource(LanguageHandler), name='language_handler'),
13 |     url(r'^(?P<lang>.+)/concept/(?P<concept>[^/]*)/'+Q,
14 |         Resource(ConceptHandler), name='concept_handler'),
15 |     url(r'^(?P<lang>.+)/concept/(?P<concept>[^/]*)/assertions/'+Q,
16 |         Resource(ConceptAssertionHandler), name='concept_assertion_handler_default'),
17 |     url(r'^(?P<lang>.+)/concept/(?P<concept>[^/]*)/assertions/limit:(?P<limit>[0-9]+)/'+Q,
18 |         Resource(ConceptAssertionHandler), name='concept_assertion_handler'),
19 |     url(r'^(?P<lang>.+)/concept/(?P<concept>[^/]*)/surfaceforms/'+Q,
20 |         Resource(ConceptSurfaceHandler), name='concept_surface_handler_default'),
21 |     url(r'^(?P<lang>.+)/concept/(?P<concept>[^/]*)/surfaceforms/limit:(?P<limit>[0-9]+)/'+Q,
22 |         Resource(ConceptSurfaceHandler), name='concept_surface_handler'),
23 |     url(r'^(?P<lang>.+)/concept/(?P<concept>[^/]*)/features/'+Q,
24 |         Resource(ConceptFeatureHandler), name='concept_feature_handler'),
25 |     url(r'^(?P<lang>.+)/(?P<dir>left|right)feature/(?P<relation>[^/]+)/(?P<concept>[^/]+)/'+Q,
26 |         Resource(FeatureQueryHandler), name='feature_query_handler_default'),
27 |     url(r'^(?P<lang>.+)/(?P<dir>left|right)feature/(?P<relation>[^/]+)/(?P<concept>[^/]+)/limit:(?P<limit>[0-9]+)/'+Q,
28 |         Resource(FeatureQueryHandler), name='feature_query_handler'),
29 |     url(r'^(?P<lang>.+)/(?P<type>.+)/(?P<id>[0-9]+)/votes/'+Q,
30 |         Resource(RatedObjectHandler), name='rated_object_handler'),
31 |     url(r'^(?P<lang>.+)/surface/(?P<text>.+)/'+Q,
32 |         Resource(SurfaceFormHandler), name='surface_form_handler'),
33 |     url(r'^(?P<lang>.+)/frame/(?P<id>[0-9]+)/'+Q,
34 |         Resource(FrameHandler), name='frame_handler'),
35 |     url(r'^(?P<lang>.+)/frame/(?P<id>[0-9]+)/statements/'+Q,
36 |         Resource(RawAssertionByFrameHandler),
37 |         name='raw_assertion_by_frame_handler_default'),
38 |     url(r'^(?P<lang>.+)/frame/(?P<id>[0-9]+)/statements/limit:(?P<limit>[0-9]+)/'+Q,
39 |         Resource(RawAssertionByFrameHandler),
40 |         name='raw_assertion_by_frame_handler'),
41 |     url(r'^(?P<lang>.+)/assertion/(?P<id>[0-9]+)/'+Q,
42 |         Resource(AssertionHandler), name='assertion_handler'),
43 |     url(r'^(?P<lang>.+)/assertion/(?P<id>[0-9]+)/raw/'+Q,
44 |         Resource(AssertionToRawHandler), name='assertion_to_raw_handler'),
45 |     url(r'^(?P<lang>.+)/raw_assertion/(?P<id>[0-9]+)/'+Q,
46 |         Resource(RawAssertionHandler), name='raw_assertion_handler'),
47 |     url(r'^(?P<lang>.+)/frequency/(?P<text>[^/]*)/'+Q,
48 |         Resource(FrequencyHandler), name='frequency_handler'),
49 |     url(r'^(?P<lang>.+)/assertionfind/(?P<relation>[^/]+)/(?P<text1>[^/]+)/(?P<text2>[^/]+)/'+Q,
50 |         Resource(AssertionFindHandler), name='assertion_find_handler'),
51 |     url(r'^user/(?P<username>.+)/'+Q,
52 |         Resource(UserHandler), name='user_handler'),
53 |     url(r'^(?P<lang>.+)/similar_to/(?P<termlist>[^/]+)/limit:(?P<limit>[0-9]+)/'+Q,
54 |         Resource(SimilarityHandler), name='similarity_handler'),
55 |     url(r'^(?P<lang>.+)/similar_to/(?P<termlist>[^/]+)/'+Q,
56 |         Resource(SimilarityHandler), name='similarity_handler_default'),
57 |     url(r'docs.txt$',
58 |         documentation_view, name='documentation_view')
59 | )
60 | # :vim:tw=0:nowrap:
61 | 


--------------------------------------------------------------------------------
/conceptnet/corpus/parse/try_patterns.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | from conceptnet.corpus.parse.pcfgpattern import *
 3 | __test__ = False
 4 | 
 5 | def textrepr(rel, matchdict):
 6 |     if rel is None: return 'None'
 7 |     return "%s(%s, %s)" % (rel, matchdict.get(1), matchdict.get(2))
 8 | 
 9 | # A selection of sentences from OMCS that we should be able to parse correctly.
10 | # This test suite does not vouch for the correctness or usefulness of the
11 | # sentences it contains.
12 | 
13 | tests = [
14 |     ("If you want to impanel a jury then you should ask questions.",
15 |      "HasPrerequisite(impanel a jury, ask questions)"),
16 |     ('"Lucy in the Sky with Diamonds" was a famous Beatles song',
17 |      'IsA("Lucy in the Sky with Diamonds", a famous Beatles song)'),
18 |     ("sound can be recorded",
19 |      "ReceivesAction(sound, recorded)"),
20 |     ("sounds can be soothing",
21 |      "HasProperty(sounds, soothing)"),
22 |     ("music can be recorded with a recording device",
23 |      "ReceivesAction(music, recorded with a recording device)"),
24 |     ("The first thing you do when you buy a shirt is try it on",
25 |      "HasFirstSubevent(buy a shirt, try it on)"),
26 |     ("One of the things you do when you water a plant is pour",
27 |      "HasSubevent(water a plant, pour)"),
28 |     ("A small sister can bug an older brother",
29 |      "CapableOf(A small sister, bug an older brother)"),
30 |     ("McDonald's hamburgers contain mayonnaise",
31 |      "HasA(McDonald's hamburgers, mayonnaise)"),
32 |     ("If you want to stab to death then you should get a knife.",
33 |      "HasPrerequisite(stab to death, get a knife)"),
34 |     ("carbon can cake hard",
35 |      "CapableOf(carbon, cake hard)"),
36 |     ("You would take a walk because your housemates were having sex in your bed.",
37 |      "MotivatedByGoal(take a walk, your housemates were having sex in your bed)"),
38 |     ("police can tail a suspect",
39 |      "CapableOf(police, tail a suspect)"),
40 |     ("people can race horses",
41 |      "CapableOf(people, race horses)"),
42 |     ("computer can mine data",
43 |      "CapableOf(computer, mine data)"),
44 |     ("to use a phone you must dial numbers",
45 |      "HasSubevent(use a phone, dial numbers)"),
46 |     ("People who are depressed are more likely to kill themselves",
47 |      "HasProperty(People who are depressed, more likely to kill themselves)"),
48 |     ("Bird eggs are good with toast and jam",
49 |      "HasProperty(Bird eggs, good with toast and jam)"),
50 |     ("housewife can can fruit",
51 |      "CapableOf(housewife, can fruit)"),
52 |     ("pictures can be showing nudity",
53 |      "CapableOf(pictures, be showing nudity)"),
54 |     ("a large house where the president of the US resides",
55 |      "junk(a large house where the president of the US resides, None)"),
56 |     ("girls are cute when they eat",
57 |      "HasProperty(girls, cute when they eat)"),
58 |     ("When books are on a bookshelf, you see only their spines.",
59 |      "HasSubevent(books are on a bookshelf, you see only their spines)"),
60 |     ("The effect of taking a phone call is finding out who is calling",
61 |      "Causes(taking a phone call, finding out who is calling)"),
62 |     ("There are 60 seconds in a minute",
63 |      "AtLocation(60 seconds, a minute)"),
64 |     ("Two wrongs don't make a right.",
65 |      "CapableOf(Two wrongs, make a right)"),
66 |     ("Somewhere someone can be is an art gallery",
67 |      "AtLocation(someone, an art gallery)"),
68 |     ("A person doesn't want war",
69 |      "Desires(A person, war)"),
70 |     ("That's weird",
71 |      "junk(That's weird, None)"),
72 | ]
73 | 
74 | def run_tests():
75 |     success = 0
76 |     ntests = 0
77 |     for testin, testout in tests:
78 |         ntests += 1
79 |         prob, frame, rel, matches = pattern_parse(testin)
80 |         if textrepr(rel, matches) == testout:
81 |             success += 1
82 |             print "Success:", testin
83 |         else:
84 |             print "Failed:", testin
85 |             print "Got:", textrepr(rel, matches)
86 |             print "Expected:", testout
87 |             pattern_parse(testin, 1)
88 |             
89 |     print "Tests complete: %d/%d" % (success, ntests)
90 | 
91 | run_tests.__test__ = False
92 | 
93 | if __name__ == '__main__':
94 |     run_tests()
95 | 
96 | 


--------------------------------------------------------------------------------
/doc/zero-to-conceptnet-on-xvm.txt:
--------------------------------------------------------------------------------
  1 | Zero to ConceptNet on XVM
  2 | by Ken Arnold (kcarnold@mit.edu)
  3 | 
  4 | These instructions tell you how to:
  5 | * Use your MIT Athena account to conjure up a new Ubuntu virtual machine that you can use
  6 | * Install ConceptNet and Divisi on that fresh Ubuntu machine
  7 | 
  8 | If you don't have an MIT account, or you have your own Ubuntu Linux machine already, you can skip to the "Getting dependencies" section.
  9 | 
 10 | Creating a new VM
 11 | =================
 12 | http://xvm.mit.edu/
 13 | Log in
 14 | Create VM: autoinstall Ubuntu Jaunty i386 (our stuff works on AMD64, but 64-bit pointers waste the precious little RAM you get
 15 | go, wait 5 minutes, power on the new VM
 16 | 
 17 | At a terminal with Kerberos tickets (e.g., Athena; ssh linux.mit.edu first)
 18 | ssh MACHINE-NAME@xvm-console.mit.edu
 19 | Hit Enter, type 'root'
 20 | 
 21 | Making a user account to log in with ssh
 22 | ----------------------------------------
 23 | 
 24 | Now add yourself as an admin user. But first we have to fix the configuration (this should not be necessary...):
 25 | 
 26 | addgroup --gid 114 admin
 27 | cat >> /etc/sudoers <<EOF
 28 | # Members of the admin group may gain root privileges
 29 | %admin ALL=(ALL) ALL
 30 | EOF
 31 | adduser kcarnold (info doesn't matter, only password)
 32 | adduser kcarnold admin
 33 | logout
 34 | 
 35 | Yes, you need to run the 'adduser' command twice.
 36 | 
 37 | Then you need to get out of here. To do that, hit Enter, then tilde, dot. (~.).
 38 | 
 39 | Now you can reconnect to your VM with a normal ssh connection:
 40 | 
 41 | ssh kcarnold.xvm.mit.edu
 42 | 
 43 | Getting dependencies
 44 | ====================
 45 | 
 46 | sudo aptitude update
 47 | 
 48 | Now let's install some basic Python-y stuff:
 49 | sudo aptitude install python-dev python-setuptools build-essential python-virtualenv python-numpy
 50 | 
 51 | (If you're not installing Divisi, python-dev, build-essential, and python-numpy are unnecessary.)
 52 | 
 53 | If you anticipate actually working on the ConceptNet code, some additional packages will be helpful:
 54 | sudo aptitude install screen bzr
 55 | 
 56 | Making a virtual environment
 57 | ============================
 58 | 
 59 | virtualenv ~/py
 60 | source py/bin/activate
 61 | echo "source py/bin/activate" >> ~/.bashrc
 62 | easy_install ipython
 63 | 
 64 | Installing ConceptNet
 65 | =====================
 66 | 
 67 | All of this will get installed inside your virtual environment.
 68 | 
 69 | easy_install django
 70 | easy_install conceptnet
 71 | wget http://conceptnet.media.mit.edu/dist/ConceptNet-sqlite.tar.gz
 72 | tar -xvf ConceptNet-sqlite.tar.gz
 73 | 
 74 | If you want to develop ConceptNet itself, replace `easy_install conceptnet` with:
 75 | bzr branch lp:conceptnet
 76 | cd conceptnet; ./setup.py develop; cd ..
 77 | 
 78 | Try it out
 79 | ==========
 80 | 
 81 | ipython
 82 | from csc.conceptnet4.models import Concept
 83 | dog = Concept.get('dog', 'en')
 84 | for fwd in dog.get_assertions_forward()[:15]:
 85 |     print fwd
 86 | 
 87 | Documentation: http://conceptnet.media.mit.edu/doc/conceptnet/overview.html
 88 | 
 89 | Installing Divisi
 90 | =================
 91 | 
 92 | easy_install divisi
 93 | 
 94 | If you want to develop Divisi itself, do this instead:
 95 | bzr branch lp:divisi
 96 | cd divisi; ./setup.py develop; cd ..
 97 | 
 98 | Try out Divisi
 99 | ==============
100 | 
101 | You can make an AnalogySpace tensor like this:
102 | ipython
103 | from csc.conceptnet4.analogyspace import *
104 | tensor = conceptnet_2d_from_db(lang='en')
105 |  [or alternatively, follow directions at http://csc.media.mit.edu/pages/ubuntu-install/ to get it online]
106 | tensor['baseball', :].top_items()
107 | svd = tensor.svd(k=50)
108 | concept_similarity(svd, 'teach').top_items(10)
109 | 
110 | 
111 | Also, if you checked out the source, you can run our test suite:
112 | python divisi/test/tests.py
113 | 
114 | Docuementation http://divisi.media.mit.edu/doc/intro.html
115 | 
116 | Using our database server
117 | =========================
118 | sudo aptitude install python-psycopg2
119 | Then see: http://conceptnet.media.mit.edu/doc/conceptnet/install.html#optional-using-a-postgresql-database
120 | 
121 | 
122 | Some basic ConceptNet queries
123 | =============================
124 | 
125 | http://conceptnet.media.mit.edu/doc/
126 | 
127 | from csc.conceptnet4.models import *
128 | 
129 | All assertions about "dog":
130 | >>> dog = Concept.get('dog','en')
131 | >>> Assertion.objects.filter(concept1=dog)
132 | (same as dog.get_assertions_forward() if you replace `objects` by `useful`)
133 | 
134 | All sentences where "a dog" is the first item:
135 | >>> Sentence.objects.filter(rawassertion__text1__iexact='a dog')
136 | 
137 | All assertions above some score
138 | >>> Assertion.objects.filter(language='en', score__gte=3).count()
139 | 
140 | A useful reference: http://docs.djangoproject.com/en/dev/topics/db/queries/
141 | 
142 | 


--------------------------------------------------------------------------------
/conceptnet/lib/events/migrations/0001_initial.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from south.db import db
 3 | from django.db import models
 4 | from events.models import *
 5 | 
 6 | class Migration:
 7 |     
 8 |     def forwards(self, orm):
 9 |         
10 |         # Adding model 'Event'
11 |         db.create_table('events_event', (
12 |             ('id', orm['events.Event:id']),
13 |             ('user', orm['events.Event:user']),
14 |             ('content_type', orm['events.Event:content_type']),
15 |             ('object_id', orm['events.Event:object_id']),
16 |             ('activity', orm['events.Event:activity']),
17 |             ('timestamp', orm['events.Event:timestamp']),
18 |         ))
19 |         db.send_create_signal('events', ['Event'])
20 |         
21 |         # Adding model 'Activity'
22 |         db.create_table('events_activity', (
23 |             ('id', orm['events.Activity:id']),
24 |             ('name', orm['events.Activity:name']),
25 |         ))
26 |         db.send_create_signal('events', ['Activity'])
27 |         
28 |     
29 |     
30 |     def backwards(self, orm):
31 |         
32 |         # Deleting model 'Event'
33 |         db.delete_table('events_event')
34 |         
35 |         # Deleting model 'Activity'
36 |         db.delete_table('events_activity')
37 |         
38 |     
39 |     
40 |     models = {
41 |         'auth.group': {
42 |             'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
43 |             'name': ('django.db.models.fields.CharField', [], {'max_length': '80', 'unique': 'True'}),
44 |             'permissions': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Permission']", 'blank': 'True'})
45 |         },
46 |         'auth.permission': {
47 |             'Meta': {'unique_together': "(('content_type', 'codename'),)"},
48 |             'codename': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
49 |             'content_type': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['contenttypes.ContentType']"}),
50 |             'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
51 |             'name': ('django.db.models.fields.CharField', [], {'max_length': '50'})
52 |         },
53 |         'auth.user': {
54 |             'date_joined': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}),
55 |             'email': ('django.db.models.fields.EmailField', [], {'max_length': '75', 'blank': 'True'}),
56 |             'first_name': ('django.db.models.fields.CharField', [], {'max_length': '30', 'blank': 'True'}),
57 |             'groups': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Group']", 'blank': 'True'}),
58 |             'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
59 |             'is_active': ('django.db.models.fields.BooleanField', [], {'default': 'True', 'blank': 'True'}),
60 |             'is_staff': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'blank': 'True'}),
61 |             'is_superuser': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'blank': 'True'}),
62 |             'last_login': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}),
63 |             'last_name': ('django.db.models.fields.CharField', [], {'max_length': '30', 'blank': 'True'}),
64 |             'password': ('django.db.models.fields.CharField', [], {'max_length': '128'}),
65 |             'user_permissions': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Permission']", 'blank': 'True'}),
66 |             'username': ('django.db.models.fields.CharField', [], {'max_length': '30', 'unique': 'True'})
67 |         },
68 |         'contenttypes.contenttype': {
69 |             'Meta': {'unique_together': "(('app_label', 'model'),)", 'db_table': "'django_content_type'"},
70 |             'app_label': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
71 |             'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
72 |             'model': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
73 |             'name': ('django.db.models.fields.CharField', [], {'max_length': '100'})
74 |         },
75 |         'events.activity': {
76 |             'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
77 |             'name': ('django.db.models.fields.TextField', [], {})
78 |         },
79 |         'events.event': {
80 |             'activity': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['events.Activity']"}),
81 |             'content_type': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['contenttypes.ContentType']"}),
82 |             'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
83 |             'object_id': ('django.db.models.fields.PositiveIntegerField', [], {}),
84 |             'timestamp': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}),
85 |             'user': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['auth.User']"})
86 |         }
87 |     }
88 |     
89 |     complete_apps = ['events']
90 | 


--------------------------------------------------------------------------------
/conceptnet/corpus/parse/migrate_templated.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | import sys, traceback
  3 | from conceptnet4.models import Assertion, Batch, RawAssertion, Frame,\
  4 |   Frequency, Relation, SurfaceForm, Concept, Rating
  5 | import conceptnet.models as cn3
  6 | from corpus.models import Sentence, Language, Activity
  7 | from django.contrib.auth.models import User
  8 | from django.core.paginator import Paginator
  9 | from django.db import transaction
 10 | from corpus.parse.adverbs import map_adverb
 11 | from itertools import islice
 12 | import yaml
 13 | 
 14 | csamoa4_activity = Activity.objects.get(name='csamoa4 self-rating')
 15 | good_acts = [ 16, 20, 22, 24, 28, 31, 32 ]
 16 | en = Language.get('en')
 17 | 
 18 | def process_predicate(pred, batch):
 19 |     frametext = pred.frame.text
 20 |     matches = {1: pred.text1, 2: pred.text2}
 21 |     if pred.polarity < 0: matches['a'] = 'not'
 22 |     relation = pred.relation
 23 |     sentence = pred.sentence
 24 |     lang = pred.language
 25 | 
 26 |     surface_forms = [SurfaceForm.get(matches[i], lang, auto_create=True)
 27 |                      for i in (1, 2)]
 28 |     concepts = [s.concept for s in surface_forms]
 29 |     
 30 |     # FIXME: english only so far
 31 |     freq = map_adverb(matches.get('a', ''))
 32 |     relation = Relation.objects.get(id=relation.id)
 33 |     frame, _ = Frame.objects.get_or_create(relation=relation, language=lang,
 34 |                                            text=frametext,
 35 |                                            defaults=dict(frequency=freq, 
 36 |                                                          goodness=1))
 37 |     frame.save()
 38 |     
 39 |     raw_assertion, _ = RawAssertion.objects.get_or_create(
 40 |         surface1=surface_forms[0],
 41 |         surface2=surface_forms[1],
 42 |         frame=frame,
 43 |         language=lang,
 44 |         creator=sentence.creator,
 45 |         defaults=dict(batch=batch))
 46 |     # still need to set assertion_id
 47 |     
 48 |     assertion, _ = Assertion.objects.get_or_create(
 49 |         relation=relation,
 50 |         concept1=concepts[0],
 51 |         concept2=concepts[1],
 52 |         frequency=freq,
 53 |         language=lang,
 54 |         defaults=dict(score=0)
 55 |     )
 56 |     #assertion.save()
 57 |     
 58 |     raw_assertion.assertion = assertion
 59 |     raw_assertion.sentence = sentence
 60 |     raw_assertion.save()
 61 | 
 62 |     sentence.set_rating(sentence.creator, 1, csamoa4_activity)
 63 |     raw_assertion.set_rating(sentence.creator, 1, csamoa4_activity)
 64 |     assertion.set_rating(sentence.creator, 1, csamoa4_activity)
 65 | 
 66 |     for rating in pred.rating_set.all():
 67 |         score = rating.rating_value.deltascore
 68 |         if score < -1: score = -1
 69 |         if score > 1: score = 1
 70 |         if rating.activity_id is None:
 71 |             rating_activity = Activity.objects.get(name='unknown')
 72 |         else:
 73 |             rating_activity = rating.activity
 74 |         sentence.set_rating(rating.user, score, rating_activity)
 75 |         raw_assertion.set_rating(rating.user, score, rating_activity)
 76 |         assertion.set_rating(rating.user, score, rating_activity)
 77 | 
 78 |     print '=>', unicode(assertion).encode('utf-8')
 79 |     return [assertion]
 80 | 
 81 | def run(user, start_page=1):
 82 |     batch = Batch()
 83 |     batch.owner = user
 84 |     
 85 |     #generator = yaml.load_all(open('delayed_test.yaml'))
 86 |     #all_entries = list(generator)
 87 |     all_preds = []
 88 |     for actid in good_acts:
 89 |         all_preds.extend(cn3.Predicate.objects.filter(sentence__activity__id=actid, language=en))
 90 |     paginator = Paginator(all_preds,100)
 91 |     #pages = ((i,paginator.page(i)) for i in range(start_page,paginator.num_pages))
 92 | 
 93 |     @transaction.commit_on_success
 94 |     def do_batch(entries):
 95 |         for entry in entries:
 96 |             try:
 97 |                 preds = process_predicate(entry, batch)
 98 |             # changed to an improbable exception for now
 99 |             except ZeroDivisionError, e:
100 |                 # Add entry
101 |                 e.entry = entry
102 | 
103 |                 # Extract traceback
104 |                 e_type, e_value, e_tb = sys.exc_info()
105 |                 e.tb = "\n".join(traceback.format_exception( e_type, e_value, e_tb ))
106 | 
107 |                 # Raise again
108 |                 raise e
109 | 
110 |     # Process entries
111 |     page_range = [p for p in paginator.page_range if p >= start_page]
112 |     for i in page_range:
113 |         entries = paginator.page(i).object_list
114 |         
115 |         # Update progress
116 |         batch.status = "process_entry_batch " + str(i) + "/" + str(paginator.num_pages)
117 |         batch.progress_num = i
118 |         batch.progress_den = paginator.num_pages
119 |         batch.save()
120 | 
121 |         try: do_batch(entries)
122 |         
123 |         except ZeroDivisionError, e:
124 |             batch.status = "process_entry_batch " + str(i) + "/" + str(paginator.num_pages) + " ERROR!"
125 |             batch.remarks = str(e.entry) + "\n" + str(e) + "\n" + e.tb
126 |             print "***TRACEBACK***"
127 |             print batch.remarks
128 |             batch.save()
129 |             raise e
130 | 
131 | if __name__ == '__main__':
132 |     user = User.objects.get(username='rspeer')
133 |     run(user, start_page=164)
134 | 
135 | 


--------------------------------------------------------------------------------
/test/test_ja_harness.py:
--------------------------------------------------------------------------------
  1 | #python-encoding: UTF-8
  2 | 
  3 | from csc.conceptnet4.models import Concept
  4 | from csc.nl.ja.system import *
  5 | from csc.corpus.models import *
  6 | import MeCab
  7 | 
  8 | def GetConcept(concept, lang):
  9 |     strings = []
 10 | 
 11 |     if not Concept.exists(concept, lang):
 12 |         print '{'
 13 |         print '\tword = "%s",' % concept
 14 |         print '\terror = "Word not found!",'
 15 |         print '}'
 16 |         return None
 17 | 
 18 |     result = Concept.get(concept, lang)
 19 | 
 20 |     lang       = result.language.name
 21 |     word       = result.text
 22 |     assertions = str(result.num_assertions)
 23 | 
 24 |     relations = {}
 25 | 
 26 |     for item in result.get_assertions():
 27 |         if not (item.relation.name in relations):
 28 |             relations[item.relation.name] = []
 29 | 
 30 |         relations[item.relation.name].append(
 31 |         {
 32 |             '-- comment': item.__str__(),
 33 |             'first':      item.concept1.text,
 34 |             'second':     item.concept2.text,
 35 |             'score':      item.score,
 36 |             'frequency':  item.frequency.value,
 37 |             'mods':       '',
 38 |         })
 39 | 
 40 |     print '{'
 41 |     print '\tword = "%s",'     % word
 42 |     print '\tlang = "%s",'     % lang
 43 |     print '\tassertions = %s,' % assertions
 44 | 
 45 |     for item.relation.name in relations:
 46 |         print '\t', item.relation.name, ' ='
 47 |         print '\t{'
 48 | 
 49 |         for v in relations[item.relation.name]:
 50 |             print '\t\t{'
 51 |             if v['first'] != word:
 52 |                 print '\t\t\tfirst = "%s",' % v['first']
 53 |             else:
 54 |                 print '\t\t\tsecond = "%s",' % v['second']
 55 | 
 56 |             if v['mods'] != '':
 57 |                 print '\t\t\tmods = "%s",' % v['mods']
 58 | 
 59 |             print '\t\t\tscore = %d,' % v['score']
 60 |             print '\t\t\tfrequency = %d,' % v['frequency']
 61 | 
 62 |             print '\t\t},'
 63 | 
 64 |         print '\t},'
 65 | 
 66 |     print '}'
 67 | 
 68 |     return result
 69 | 
 70 | ####################################################################################################
 71 | ## Main ############################################################################################
 72 | ####################################################################################################
 73 | 
 74 | j        = Language.get('ja')
 75 | j_s      = Sentence.objects.filter(language=j)
 76 | e        = Language.get('en')
 77 | e_s      = Sentence.objects.filter(language=e)
 78 | parser   = JaParser()
 79 | 
 80 | u = \
 81 | [
 82 |     parser.parse_string(v) for v in \
 83 |     [
 84 |         '赤いappleが9月に生える。',
 85 |         'が',
 86 |         'は',
 87 |         'を',
 88 |         '1月',
 89 |         '１月',
 90 |         '私の彼って、最近車買ったんだよぉ？明日は軽井沢へ連れて行ってくれるんだぁ',
 91 |         '外国人はよく社会問題の原因だとせめられ、差別されるものです。',
 92 |         'すてきな人に会いたい。',
 93 |         '大きな人に会いたい。',
 94 |         '大きい人に会いたい。',
 95 |         '赤い花は素敵。',
 96 |         'アメリカには白人がいっぱい住んでいます。',
 97 |         'テストには問題ない。',
 98 |         '夏休みに見に行った畑のいちごがとても赤かった。',
 99 |         '今すぐ行かなければならない。',
100 |         '今日は寝てしまいました。',
101 |         '君に今すぐ会いたい',
102 |         'この毛布は暖かくなかった。',
103 |         'この毛布は暖かくなるんだろう。',
104 |         '彼女のかみが細かくて更々です。',
105 |         '素敵な人に会いたい。',
106 |         '教授が「分かった」とさけた。',
107 |         '教授が「分かった」とさけた。',
108 |         '事実はそうではなかった。',
109 |         '米がやすくならなければならなくはないだろう。',
110 |         'その帽子が綺麗です。',
111 |         'その帽子が綺麗でした。',
112 |         'その帽子が綺麗だ。',
113 |         'その帽子が綺麗だった。',
114 |         'その帽子が綺麗である。',
115 |         'その帽子が綺麗であった。',
116 |         '春は寒いであって寂しい時期である。',
117 |         'この世の中じゃ、人間には説明できないことだってあるよ！',
118 |         '赤い',
119 |         '赤くない',
120 |         '赤かった',
121 |         '赤くなかった',
122 |         '赤いです',
123 |         '赤いではありません',
124 |         '赤いじゃありません',
125 |         '赤いではありませんでした',
126 |         '顔が赤くなった',
127 |         '顔が赤くなってしまいました',
128 |         '顔が赤くならなかった',
129 |         '君が面白くなりました',
130 |         '君が結局面白くならなかった',
131 |         'アメリカへのお客様にお知らせします。',
132 |         '札幌には牛乳が人気である。',
133 |         'コンピュータの世界では「モニタ」とは出力の仕方の一種だ。',
134 |         '説明することが無理なときがある。',
135 |         '8月にリンゴが赤くなる',
136 |         '8月にリンゴを赤くする',
137 |         '8月にリンゴを赤くしてやる',
138 |         '8月にリンゴを赤くしておく',
139 |         '人間は哺乳類の一種である',
140 |         'あなたが会議の際にすることの一つは資料を配布するである．',
141 |         'とうもろこしは地面でなくても育つことができる．',
142 |         '',
143 |     ]
144 | ]
145 | 
146 | def listUtterances(start = 0, count = -1):
147 |     if count < 0: count = len(u)
148 | 
149 |     for i in range(start, count):
150 |         print('[' + str(i) + '] : ' + u[i].surface)
151 | 
152 | def dumpUtterances(start = -1, count = -1):
153 |     if start < 0 and count < 0:
154 |         start = 0
155 |         count = len(u)
156 | 
157 |     elif count == -1:
158 |         count = 1
159 | 
160 |     count = min(len(u) - start, count)
161 | 
162 |     for i in range(start, start + count):
163 |         u[i].dump(True)
164 | 
165 | listUtterances()
166 | 
167 | def objMethods(obj):
168 |     out = filter(lambda k: True, obj.__class__.__dict__)
169 |     out.sort()
170 |     return out
171 | 
172 | 
173 | def dumpSentences(lang):
174 |     f   = file("/tmp/out_" + lang + ".txt", "w");
175 |     div = 1000
176 |     i   = 0
177 | 
178 |     for s in Sentence.objects.filter(language = lang):
179 |         i += 1
180 |         if not (i % div):
181 |             print(str(i) + " sentences dumped")
182 | 
183 |         f.write(ja_enc(s.text))
184 |         f.write("\n")
185 | 
186 | 


--------------------------------------------------------------------------------
/conceptnet/concepttools/ConceptNetGUI.py:
--------------------------------------------------------------------------------
  1 | from Tkinter import *
  2 | import concepttools,sys
  3 | 
  4 | __version__ = "2.0"
  5 | __author__ = "hugo@media.mit.edu"
  6 | __url__ = 'www.conceptnet.org'
  7 | config_filename = 'ConceptNet.ini'
  8 | welcome_text = """
  9 |     ***************************************************
 10 |     Welcome to the ConceptNet v2 mini-browser!
 11 |     (for more info, please visit www.conceptnet.org)
 12 |     ***************************************************
 13 |     The purpose of this browser is to allow you to
 14 |     explore the ConceptNet API interactively!
 15 |     Instructions for browsing:
 16 |     - First, click on one of the light-green or yellow
 17 |     buttons to select a mode of browsing
 18 |     - In the red box, enter some input text
 19 |         - Light-green buttons signify "node-level" modes,
 20 |         so you may only input concepts like "apple" or
 21 |         "eat food". You'll notice that the query
 22 |         automatically executes when you press the space
 23 |         bar or the return key. In this mode, concepts
 24 |         must be given in normalized form (verbs in
 25 |         infinitive form, no plurals, no "the" or "a")
 26 |         - Yellow buttons signify "document-level" modes, so
 27 |         you can paste any amount of text into the red
 28 |         box (e.g. a sentence to a document) and the text
 29 |         doesn't have to be normalized. In this mode, you
 30 |         must press the return key to execute your query.
 31 |     - Results are displayed in the deep-green box and
 32 |     you may have to scroll to see all of the results
 33 |     - Most modes are self-explanatory, but for
 34 |     additional information, please consult the api's
 35 |     html documentation and www.conceptnet.org
 36 |     That's all! So enjoy!
 37 | """
 38 | 
 39 | c = concepttools.ConceptTools()
 40 | root = Tk()
 41 | mode_var = StringVar()
 42 | 
 43 | root.title("conceptnet 2.0 mini-browser"),root.option_add('*Font',('Courier', 14, 'bold'))
 44 | 
 45 | frame1,win2,frame3 = Frame(root),Frame(root,height="1",bg="#CCFF99"),Frame(root)
 46 | 
 47 | frame1.pack(fill=BOTH,expand=NO),win2.pack(fill=BOTH,expand=NO),frame3.pack(fill=BOTH,expand=YES)
 48 | 
 49 | win,win3,win_scroll,win3_scroll = Text(frame1,bg="#FF3300",fg="white",height="3",wrap=WORD),Text(frame3,wrap=WORD,height="30",width="20",bg="#669933",fg="white"),Scrollbar(frame1),Scrollbar(frame3)
 50 | 
 51 | win_scroll.pack(side=RIGHT,fill=Y),win3_scroll.pack(side=RIGHT,fill=Y),win.pack(fill=BOTH,expand=NO),win2.pack(fill=BOTH,expand=NO),win3.pack(fill=BOTH,expand=1)
 52 | 
 53 | win.config(yscrollcommand=win_scroll.set),win3.config(yscrollcommand=win3_scroll.set),win_scroll.config(command=win.yview),win3_scroll.config(command=win3.yview)
 54 | 
 55 | Radiobutton(win2,text="BROWSE",variable=mode_var,value='browse',fg="#FF3399",bg='#CCFF99',indicatoron=0).pack(side=LEFT),Radiobutton(win2,text="CONTEXT",variable=mode_var,value='context',indicatoron=0,fg="#FF3399",bg='#CCFF99').pack(side=LEFT),Radiobutton(win2,text="PROJECTION",variable=mode_var,value='projection',indicatoron=0,fg="#FF3399",bg='#CCFF99').pack(side=LEFT),Radiobutton(win2,text="ANALOGY",variable=mode_var,value='analogy',indicatoron=0,fg="#FF3399",bg='#CCFF99').pack(side=LEFT),Radiobutton(win2,text="GUESS CONCEPT",variable=mode_var,value='guessconcept',indicatoron=0,fg="#FF3399",bg='#FFFF66').pack(side=LEFT),Radiobutton(win2,text="GUESS TOPIC",variable=mode_var,value='guesstopic',indicatoron=0,fg="#FF3399",bg='#FFFF66').pack(side=LEFT),Radiobutton(win2,text="GUESS MOOD",variable=mode_var,value='guessmood',indicatoron=0,fg="#FF3399",bg='#FFFF66').pack(side=LEFT),Radiobutton(win2,text="SUMMARIZE",variable=mode_var,value='summarize',indicatoron=0,fg="#FF3399",bg='#FFFF66').pack(side=LEFT)
 56 | 
 57 | win3.insert(0.0,welcome_text)
 58 | 
 59 | def execution1(x):
 60 | 	#if mode_var.get() not in ['guessmood','guesstopic','guessconcept','summarize']:
 61 | 	#	return execution2(x)
 62 | 	#else:
 63 | 		return False
 64 | 
 65 | def execution2(x):
 66 | 	win3.delete(0.0,END)
 67 | 	if win.get(0.0,END).strip()=='':
 68 | 		win3.insert(0.0,welcome_text)
 69 | 		return
 70 | 	
 71 | 	mode = mode_var.get() 
 72 | 	input = win.get(0.0,END).encode('ascii','ignore').strip()
 73 | 	concepts = [tok.strip() for tok in input.split(',')]
 74 | 	if mode == 'context':
 75 | 		result = '\n'.join(['%s (%d%%)' % (concept, weight*100) for concept, weight in c.spreading_activation(concepts)] ) +'\n\n'
 76 | 	
 77 | 	elif mode == 'projection':
 78 | 		result = '\n\n'.join([ v[0].upper() + '\n' + '\n'.join( [ z[0] + ' (' + str(int(z[1]*100)) + '%)' for z in v[1] ] [:10]) for v in c.get_all_projections(concepts)] ) +'\n\n'
 79 | 
 80 | 	elif mode == 'analogy':
 81 | 		result = '\n\n'.join( ['[~' + match[0] + '] (' + str(match[2]) + ')\n  ' + '\n  '.join( ['==' + struct[0] + '==> ' + struct[1] + ' (' +str(struct[2]) + ') ' for struct in match[1]] ) for match in c.get_analogous_concepts(input)])
 82 | 
 83 | 	elif mode == 'guessconcept':
 84 | 		result = '\n\n'.join( [ '[is it: ' + match[0] + '?] (' + str(match[2]) + ')\n  ' + '\n  '.join([ '==' + struct[0] + '==> ' + struct[1] + ' (' + str(struct[2]) + ') ' for struct in match[1]] ) for match in c.nltools.guess_concept(input)])
 85 | 
 86 | 	elif mode == 'guesstopic':
 87 | 		result = '\n'.join( [ z[0] + ' (' + str(int(z[1]*100)) + '%)' for z in c.nltools.guess_topic(input)[1]]) + '\n\n'
 88 | 
 89 | 	elif mode == 'guessmood':
 90 | 		result = '\n'.join([ z[0] + ' (' + str(int(z[1]*100)) + '%)' for z in c.nltools.guess_mood(input) ] ) + '\n\n'
 91 | 
 92 | 	elif mode == 'summarize':
 93 | 		result = c.nltools.summarize_document(input) + '\n\n'
 94 | 
 95 | 	elif mode == 'foo':
 96 | 		result = ''
 97 | 		
 98 | 	else:
 99 | 		result = c.display_node(input) + '\n\n'
100 | 
101 | 	win3.insert(0.0,result)
102 | 	return True
103 | 
104 | win.bind('<space>',execution1),win.bind('<Return>',execution2)
105 | root.mainloop()
106 | 


--------------------------------------------------------------------------------
/conceptnet/corpus/migrations/0002_rename_tables.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from south.db import db
 3 | from django.db import models
 4 | from conceptnet.corpus.models import *
 5 | 
 6 | class Migration:
 7 |     
 8 |     def forwards(self, orm):
 9 |         db.rename_table('sentences', 'corpus_sentence')
10 |         db.rename_table('tagged_sentences', 'corpus_taggedsentence')
11 |         db.rename_table('dependency_parses', 'corpus_dependencyparse')
12 |     
13 |     def backwards(self, orm):
14 |         db.rename_table('corpus_sentence', 'sentences')
15 |         db.rename_table('corpus_taggedsentence', 'tagged_sentences')
16 |         db.rename_table('corpus_dependencyparse', 'dependency_parses')
17 |     
18 |     models = {
19 |         'auth.group': {
20 |             'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
21 |             'name': ('django.db.models.fields.CharField', [], {'max_length': '80', 'unique': 'True'}),
22 |             'permissions': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Permission']", 'blank': 'True'})
23 |         },
24 |         'auth.permission': {
25 |             'Meta': {'unique_together': "(('content_type', 'codename'),)"},
26 |             'codename': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
27 |             'content_type': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['contenttypes.ContentType']"}),
28 |             'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
29 |             'name': ('django.db.models.fields.CharField', [], {'max_length': '50'})
30 |         },
31 |         'auth.user': {
32 |             'date_joined': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}),
33 |             'email': ('django.db.models.fields.EmailField', [], {'max_length': '75', 'blank': 'True'}),
34 |             'first_name': ('django.db.models.fields.CharField', [], {'max_length': '30', 'blank': 'True'}),
35 |             'groups': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Group']", 'blank': 'True'}),
36 |             'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
37 |             'is_active': ('django.db.models.fields.BooleanField', [], {'default': 'True', 'blank': 'True'}),
38 |             'is_staff': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'blank': 'True'}),
39 |             'is_superuser': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'blank': 'True'}),
40 |             'last_login': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}),
41 |             'last_name': ('django.db.models.fields.CharField', [], {'max_length': '30', 'blank': 'True'}),
42 |             'password': ('django.db.models.fields.CharField', [], {'max_length': '128'}),
43 |             'user_permissions': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Permission']", 'blank': 'True'}),
44 |             'username': ('django.db.models.fields.CharField', [], {'max_length': '30', 'unique': 'True'})
45 |         },
46 |         'contenttypes.contenttype': {
47 |             'Meta': {'unique_together': "(('app_label', 'model'),)", 'db_table': "'django_content_type'"},
48 |             'app_label': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
49 |             'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
50 |             'model': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
51 |             'name': ('django.db.models.fields.CharField', [], {'max_length': '100'})
52 |         },
53 |         'corpus.dependencyparse': {
54 |             'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
55 |             'index1': ('django.db.models.fields.IntegerField', [], {}),
56 |             'index2': ('django.db.models.fields.IntegerField', [], {}),
57 |             'linktype': ('django.db.models.fields.CharField', [], {'max_length': '20'}),
58 |             'sentence': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['corpus.Sentence']"}),
59 |             'word1': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
60 |             'word2': ('django.db.models.fields.CharField', [], {'max_length': '100'})
61 |         },
62 |         'corpus.language': {
63 |             'id': ('django.db.models.fields.CharField', [], {'max_length': '16', 'primary_key': 'True'}),
64 |             'name': ('django.db.models.fields.TextField', [], {'blank': 'True'}),
65 |             'sentence_count': ('django.db.models.fields.IntegerField', [], {'default': '0'})
66 |         },
67 |         'corpus.sentence': {
68 |             'activity': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['events.Activity']"}),
69 |             'created_on': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}),
70 |             'creator': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['auth.User']"}),
71 |             'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
72 |             'language': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['corpus.Language']"}),
73 |             'score': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
74 |             'text': ('django.db.models.fields.TextField', [], {}),
75 |             'votes': ('django.contrib.contenttypes.generic.GenericRelation', [], {'to': "orm['voting.Vote']"})
76 |         },
77 |         'corpus.taggedsentence': {
78 |             'language': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['corpus.Language']"}),
79 |             'sentence': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['corpus.Sentence']", 'primary_key': 'True'}),
80 |             'text': ('django.db.models.fields.TextField', [], {})
81 |         },
82 |         'events.activity': {
83 |             'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
84 |             'name': ('django.db.models.fields.TextField', [], {})
85 |         },
86 |         'voting.vote': {
87 |             'Meta': {'unique_together': "(('user', 'content_type', 'object_id'),)", 'db_table': "'votes'"},
88 |             'content_type': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['contenttypes.ContentType']"}),
89 |             'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
90 |             'object_id': ('django.db.models.fields.PositiveIntegerField', [], {}),
91 |             'user': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['auth.User']"}),
92 |             'vote': ('django.db.models.fields.SmallIntegerField', [], {})
93 |         }
94 |     }
95 |     
96 |     complete_apps = ['corpus']
97 | 


--------------------------------------------------------------------------------
/conceptnet/corpus/parse/build.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | import sys, traceback
  3 | from conceptnet4.models import Assertion, Batch, RawAssertion, Frame,\
  4 |   Frequency, Relation, SurfaceForm, Concept, Rating
  5 | import conceptnet.models as cn3
  6 | from corpus.models import Sentence, Language, Activity
  7 | from django.contrib.auth.models import User
  8 | from django.core.paginator import Paginator
  9 | from django.db import transaction
 10 | from corpus.parse.adverbs import map_adverb
 11 | from itertools import islice
 12 | import yaml
 13 | 
 14 | csamoa4_activity = Activity.objects.get(name='csamoa4 self-rating')
 15 | good_acts = [ 16, 20, 22, 24, 28, 31, 32 ]
 16 | 
 17 | def process_yaml(entry, lang, batch):
 18 |     if entry is None: return []
 19 |     frametext, id, matches, reltext = (entry['frametext'], entry['id'],
 20 |     entry['matches'], entry['reltext'])
 21 |     sentence = Sentence.objects.get(id=id)
 22 |     print sentence.text.encode('utf-8')
 23 |     if sentence.activity.id in good_acts:
 24 |         print "(we have a better parse)"
 25 |         return []
 26 |     if (sentence.text.startswith('Situation:')
 27 |         or sentence.text.startswith('The statement')
 28 |         or sentence.text.startswith('To understand')
 29 |         or sentence.text.startswith('In the event')):
 30 |             print "* skipped *"
 31 |             return []
 32 |     if matches.get(2).startswith('do the following'):
 33 |         print "** skipped **"
 34 |         return []
 35 |     
 36 |     if reltext is None or reltext == 'junk': return []
 37 | 
 38 |     # quick fixes
 39 |     if reltext == 'AtLocation' and matches.get('a') == 'of': return []
 40 |     if reltext == 'AtLocation' and matches.get('a') == 'near':
 41 |         reltext = 'LocatedNear'
 42 |     if reltext in ['IsA', 'CapableOf'] and matches.get('a') in ['in', 'on', 'at', 'by']:
 43 |         reltext = 'AtLocation'
 44 |         matches['a'] = ''
 45 |     for val in matches.values():
 46 |         if len(val.split()) > 6:
 47 |             # we'd rather wait to parse this better.
 48 |             return []
 49 | 
 50 |     relation = Relation.objects.get(name=reltext)
 51 |     
 52 |     surface_forms = [SurfaceForm.get(matches[i], lang, auto_create=True)
 53 |                      for i in (1, 2)]
 54 |     concepts = [s.concept for s in surface_forms]
 55 | 
 56 |     # FIXME: english only so far
 57 |     freq = map_adverb(matches.get('a', ''))
 58 |     
 59 |     frame, _ = Frame.objects.get_or_create(relation=relation, language=lang,
 60 |                                            text=frametext,
 61 |                                            defaults=dict(frequency=freq, 
 62 |                                                          goodness=1))
 63 |     frame.save()
 64 |     
 65 |     raw_assertion, _ = RawAssertion.objects.get_or_create(
 66 |         surface1=surface_forms[0],
 67 |         surface2=surface_forms[1],
 68 |         frame=frame,
 69 |         language=lang,
 70 |         creator=sentence.creator,
 71 |         defaults=dict(batch=batch))
 72 |     # still need to set assertion_id
 73 |     
 74 |     assertion, _ = Assertion.objects.get_or_create(
 75 |         relation=relation,
 76 |         concept1=concepts[0],
 77 |         concept2=concepts[1],
 78 |         frequency=freq,
 79 |         language=lang,
 80 |         defaults=dict(score=0)
 81 |     )
 82 |     assertion.score += 1
 83 |     #assertion.save()
 84 |     
 85 |     raw_assertion.assertion = assertion
 86 |     raw_assertion.sentence = sentence
 87 |     raw_assertion.save()
 88 | 
 89 |     sentence.set_rating(sentence.creator, 1, csamoa4_activity)
 90 |     raw_assertion.set_rating(sentence.creator, 1, csamoa4_activity)
 91 |     assertion.set_rating(sentence.creator, 1, csamoa4_activity)
 92 | 
 93 |     for old_raw in cn3.RawAssertion.objects.filter(sentence=sentence):
 94 |         pred = old_raw.predicate
 95 |         if not pred: continue
 96 |         for rating in pred.rating_set.all():
 97 |             score = rating.rating_value.deltascore
 98 |             if score > 0: score = 1
 99 |             if score < 0: score = -1
100 |             if rating.activity_id is None:
101 |                 rating_activity = Activity.objects.get(name='unknown')
102 |             else:
103 |                 rating_activity = rating.activity
104 |             sentence.set_rating(rating.user, score, rating_activity)
105 |             raw_assertion.set_rating(rating.user, score, rating_activity)
106 |             assertion.set_rating(rating.user, score, rating_activity)
107 |     
108 |     print '=>', unicode(assertion).encode('utf-8')
109 |     return [assertion]
110 | 
111 | def run(user, lang, start_page=1):
112 |     batch = Batch()
113 |     batch.owner = user
114 |     
115 |     #generator = yaml.load_all(open('delayed_test.yaml'))
116 |     #all_entries = list(generator)
117 |     all_entries = pickle.load(open('yamlparsed.pickle'))
118 |     paginator = Paginator(all_entries,100)
119 |     #pages = ((i,paginator.page(i)) for i in range(start_page,paginator.num_pages))
120 | 
121 |     @transaction.commit_on_success
122 |     def do_batch(entries):
123 |         for entry in entries:
124 |             try:
125 |                 preds = process_yaml(entry, lang, batch)
126 |             # changed to an improbable exception for now
127 |             except ZeroDivisionError, e:
128 |                 # Add entry
129 |                 e.entry = entry
130 | 
131 |                 # Extract traceback
132 |                 e_type, e_value, e_tb = sys.exc_info()
133 |                 e.tb = "\n".join(traceback.format_exception( e_type, e_value, e_tb ))
134 | 
135 |                 # Raise again
136 |                 raise e
137 | 
138 |     # Process entries
139 |     page_range = [p for p in paginator.page_range if p >= start_page]
140 |     for i in page_range:
141 |         entries = paginator.page(i).object_list
142 |         
143 |         # Update progress
144 |         batch.status = "process_entry_batch " + str(i) + "/" + str(paginator.num_pages)
145 |         batch.progress_num = i
146 |         batch.progress_den = paginator.num_pages
147 |         batch.save()
148 | 
149 |         try: do_batch(entries)
150 |         
151 |         except ZeroDivisionError, e:
152 |             batch.status = "process_entry_batch " + str(i) + "/" + str(paginator.num_pages) + " ERROR!"
153 |             batch.remarks = str(e.entry) + "\n" + str(e) + "\n" + e.tb
154 |             print "***TRACEBACK***"
155 |             print batch.remarks
156 |             batch.save()
157 |             raise e
158 | 
159 | import migrate_templated
160 | if __name__ == '__main__':
161 |     user = User.objects.get(username='rspeer')
162 |     lang = Language.get('en')
163 |     run(user, lang, start_page=214)
164 |     migrate_templated.run(user, start_page=1)
165 | 
166 | 


--------------------------------------------------------------------------------
/conceptnet/lib/voting/views.py:
--------------------------------------------------------------------------------
  1 | from django.contrib.contenttypes.models import ContentType
  2 | from django.core.exceptions import ObjectDoesNotExist
  3 | from django.http import Http404, HttpResponse, HttpResponseRedirect
  4 | from django.contrib.auth.views import redirect_to_login
  5 | from django.template import loader, RequestContext
  6 | from django.utils import simplejson
  7 | 
  8 | from voting.models import Vote
  9 | 
 10 | VOTE_DIRECTIONS = (('up', 1), ('down', -1), ('clear', 0))
 11 | 
 12 | def vote_on_object(request, model, direction, post_vote_redirect=None,
 13 |         object_id=None, slug=None, slug_field=None, template_name=None,
 14 |         template_loader=loader, extra_context=None, context_processors=None,
 15 |         template_object_name='object', allow_xmlhttprequest=False):
 16 |     """
 17 |     Generic object vote function.
 18 | 
 19 |     The given template will be used to confirm the vote if this view is
 20 |     fetched using GET; vote registration will only be performed if this
 21 |     view is POSTed.
 22 | 
 23 |     If ``allow_xmlhttprequest`` is ``True`` and an XMLHttpRequest is
 24 |     detected by examining the ``HTTP_X_REQUESTED_WITH`` header, the
 25 |     ``xmlhttp_vote_on_object`` view will be used to process the
 26 |     request - this makes it trivial to implement voting via
 27 |     XMLHttpRequest with a fallback for users who don't have JavaScript
 28 |     enabled.
 29 | 
 30 |     Templates:``<app_label>/<model_name>_confirm_vote.html``
 31 |     Context:
 32 |         object
 33 |             The object being voted on.
 34 |         direction
 35 |             The type of vote which will be registered for the object.
 36 |     """
 37 |     if allow_xmlhttprequest and request.is_ajax():
 38 |         return xmlhttprequest_vote_on_object(request, model, direction,
 39 |                                              object_id=object_id, slug=slug,
 40 |                                              slug_field=slug_field)
 41 | 
 42 |     if extra_context is None: extra_context = {}
 43 |     if not request.user.is_authenticated():
 44 |         return redirect_to_login(request.path)
 45 | 
 46 |     try:
 47 |         vote = dict(VOTE_DIRECTIONS)[direction]
 48 |     except KeyError:
 49 |         raise AttributeError("'%s' is not a valid vote type." % vote_type)
 50 | 
 51 |     # Look up the object to be voted on
 52 |     lookup_kwargs = {}
 53 |     if object_id:
 54 |         lookup_kwargs['%s__exact' % model._meta.pk.name] = object_id
 55 |     elif slug and slug_field:
 56 |         lookup_kwargs['%s__exact' % slug_field] = slug
 57 |     else:
 58 |         raise AttributeError('Generic vote view must be called with either '
 59 |                              'object_id or slug and slug_field.')
 60 |     try:
 61 |         obj = model._default_manager.get(**lookup_kwargs)
 62 |     except ObjectDoesNotExist:
 63 |         raise Http404, 'No %s found for %s.' % (model._meta.app_label, lookup_kwargs)
 64 | 
 65 |     if request.method == 'POST':
 66 |         if post_vote_redirect is not None:
 67 |             next = post_vote_redirect
 68 |         elif request.REQUEST.has_key('next'):
 69 |             next = request.REQUEST['next']
 70 |         elif hasattr(obj, 'get_absolute_url'):
 71 |             if callable(getattr(obj, 'get_absolute_url')):
 72 |                 next = obj.get_absolute_url()
 73 |             else:
 74 |                 next = obj.get_absolute_url
 75 |         else:
 76 |             raise AttributeError('Generic vote view must be called with either '
 77 |                                  'post_vote_redirect, a "next" parameter in '
 78 |                                  'the request, or the object being voted on '
 79 |                                  'must define a get_absolute_url method or '
 80 |                                  'property.')
 81 |         Vote.objects.record_vote(obj, request.user, vote)
 82 |         return HttpResponseRedirect(next)
 83 |     else:
 84 |         if not template_name:
 85 |             template_name = '%s/%s_confirm_vote.html' % (
 86 |                 model._meta.app_label, model._meta.object_name.lower())
 87 |         t = template_loader.get_template(template_name)
 88 |         c = RequestContext(request, {
 89 |             template_object_name: obj,
 90 |             'direction': direction,
 91 |         }, context_processors)
 92 |         for key, value in extra_context.items():
 93 |             if callable(value):
 94 |                 c[key] = value()
 95 |             else:
 96 |                 c[key] = value
 97 |         response = HttpResponse(t.render(c))
 98 |         return response
 99 | 
100 | def json_error_response(error_message):
101 |     return HttpResponse(simplejson.dumps(dict(success=False,
102 |                                               error_message=error_message)))
103 | 
104 | def xmlhttprequest_vote_on_object(request, model, direction,
105 |     object_id=None, slug=None, slug_field=None):
106 |     """
107 |     Generic object vote function for use via XMLHttpRequest.
108 | 
109 |     Properties of the resulting JSON object:
110 |         success
111 |             ``true`` if the vote was successfully processed, ``false``
112 |             otherwise.
113 |         score
114 |             The object's updated score and number of votes if the vote
115 |             was successfully processed.
116 |         error_message
117 |             Contains an error message if the vote was not successfully
118 |             processed.
119 |     """
120 |     if request.method == 'GET':
121 |         return json_error_response(
122 |             'XMLHttpRequest votes can only be made using POST.')
123 |     if not request.user.is_authenticated():
124 |         return json_error_response('Not authenticated.')
125 | 
126 |     try:
127 |         vote = dict(VOTE_DIRECTIONS)[direction]
128 |     except KeyError:
129 |         return json_error_response(
130 |             '\'%s\' is not a valid vote type.' % direction)
131 | 
132 |     # Look up the object to be voted on
133 |     lookup_kwargs = {}
134 |     if object_id:
135 |         lookup_kwargs['%s__exact' % model._meta.pk.name] = object_id
136 |     elif slug and slug_field:
137 |         lookup_kwargs['%s__exact' % slug_field] = slug
138 |     else:
139 |         return json_error_response('Generic XMLHttpRequest vote view must be '
140 |                                    'called with either object_id or slug and '
141 |                                    'slug_field.')
142 |     try:
143 |         obj = model._default_manager.get(**lookup_kwargs)
144 |     except ObjectDoesNotExist:
145 |         return json_error_response(
146 |             'No %s found for %s.' % (model._meta.verbose_name, lookup_kwargs))
147 | 
148 |     # Vote and respond
149 |     Vote.objects.record_vote(obj, request.user, vote)
150 |     return HttpResponse(simplejson.dumps({
151 |         'success': True,
152 |         'score': Vote.objects.get_score(obj),
153 |     }))
154 | 


--------------------------------------------------------------------------------
/doc/source/_static/graph/others.dot:
--------------------------------------------------------------------------------
  1 | 
  2 | digraph name {
  3 |   fontname = "Helvetica"
  4 |   fontsize = 8
  5 | 
  6 |   node [
  7 |     fontname = "Helvetica"
  8 |     fontsize = 8
  9 |     shape = "plaintext"
 10 |   ]
 11 |   edge [
 12 |     fontname = "Helvetica"
 13 |     fontsize = 8
 14 |   ]
 15 | 
 16 | 
 17 | 
 18 | 
 19 | 
 20 |   
 21 |     voting_models_Vote [label=<
 22 |     <TABLE BGCOLOR="palegoldenrod" BORDER="0" CELLBORDER="0" CELLSPACING="0">
 23 |      <TR><TD COLSPAN="2" CELLPADDING="4" ALIGN="CENTER" BGCOLOR="olivedrab4"
 24 |      ><FONT FACE="Helvetica Bold" COLOR="white"
 25 |      >Vote</FONT></TD></TR>
 26 | 
 27 |     
 28 |         
 29 |         <TR><TD ALIGN="LEFT" BORDER="0"
 30 |         ><FONT COLOR="#7B7B7B" FACE="Helvetica Bold">id</FONT
 31 |         ></TD>
 32 |         <TD ALIGN="LEFT"
 33 |         ><FONT COLOR="#7B7B7B" FACE="Helvetica Bold">AutoField</FONT
 34 |         ></TD></TR>
 35 |         
 36 |         <TR><TD ALIGN="LEFT" BORDER="0"
 37 |         ><FONT FACE="Helvetica Bold">user</FONT
 38 |         ></TD>
 39 |         <TD ALIGN="LEFT"
 40 |         ><FONT FACE="Helvetica Bold">ForeignKey</FONT
 41 |         ></TD></TR>
 42 |         
 43 |         <TR><TD ALIGN="LEFT" BORDER="0"
 44 |         ><FONT FACE="Helvetica Bold">content_type</FONT
 45 |         ></TD>
 46 |         <TD ALIGN="LEFT"
 47 |         ><FONT FACE="Helvetica Bold">ForeignKey</FONT
 48 |         ></TD></TR>
 49 |         
 50 |         <TR><TD ALIGN="LEFT" BORDER="0"
 51 |         ><FONT FACE="Helvetica Bold">object_id</FONT
 52 |         ></TD>
 53 |         <TD ALIGN="LEFT"
 54 |         ><FONT FACE="Helvetica Bold">PositiveIntegerField</FONT
 55 |         ></TD></TR>
 56 |         
 57 |         <TR><TD ALIGN="LEFT" BORDER="0"
 58 |         ><FONT FACE="Helvetica Bold">vote</FONT
 59 |         ></TD>
 60 |         <TD ALIGN="LEFT"
 61 |         ><FONT FACE="Helvetica Bold">SmallIntegerField</FONT
 62 |         ></TD></TR>
 63 |         
 64 |     
 65 |     </TABLE>
 66 |     >]
 67 |   
 68 | 
 69 | 
 70 | 
 71 | 
 72 | 
 73 | 
 74 |   
 75 |     events_models_Activity [label=<
 76 |     <TABLE BGCOLOR="palegoldenrod" BORDER="0" CELLBORDER="0" CELLSPACING="0">
 77 |      <TR><TD COLSPAN="2" CELLPADDING="4" ALIGN="CENTER" BGCOLOR="olivedrab4"
 78 |      ><FONT FACE="Helvetica Bold" COLOR="white"
 79 |      >Activity</FONT></TD></TR>
 80 | 
 81 |     
 82 |         
 83 |         <TR><TD ALIGN="LEFT" BORDER="0"
 84 |         ><FONT COLOR="#7B7B7B" FACE="Helvetica Bold">id</FONT
 85 |         ></TD>
 86 |         <TD ALIGN="LEFT"
 87 |         ><FONT COLOR="#7B7B7B" FACE="Helvetica Bold">AutoField</FONT
 88 |         ></TD></TR>
 89 |         
 90 |         <TR><TD ALIGN="LEFT" BORDER="0"
 91 |         ><FONT FACE="Helvetica Bold">name</FONT
 92 |         ></TD>
 93 |         <TD ALIGN="LEFT"
 94 |         ><FONT FACE="Helvetica Bold">TextField</FONT
 95 |         ></TD></TR>
 96 |         
 97 |     
 98 |     </TABLE>
 99 |     >]
100 |   
101 |     events_models_Event [label=<
102 |     <TABLE BGCOLOR="palegoldenrod" BORDER="0" CELLBORDER="0" CELLSPACING="0">
103 |      <TR><TD COLSPAN="2" CELLPADDING="4" ALIGN="CENTER" BGCOLOR="olivedrab4"
104 |      ><FONT FACE="Helvetica Bold" COLOR="white"
105 |      >Event</FONT></TD></TR>
106 | 
107 |     
108 |         
109 |         <TR><TD ALIGN="LEFT" BORDER="0"
110 |         ><FONT COLOR="#7B7B7B" FACE="Helvetica Bold">id</FONT
111 |         ></TD>
112 |         <TD ALIGN="LEFT"
113 |         ><FONT COLOR="#7B7B7B" FACE="Helvetica Bold">AutoField</FONT
114 |         ></TD></TR>
115 |         
116 |         <TR><TD ALIGN="LEFT" BORDER="0"
117 |         ><FONT FACE="Helvetica Bold">user</FONT
118 |         ></TD>
119 |         <TD ALIGN="LEFT"
120 |         ><FONT FACE="Helvetica Bold">ForeignKey</FONT
121 |         ></TD></TR>
122 |         
123 |         <TR><TD ALIGN="LEFT" BORDER="0"
124 |         ><FONT FACE="Helvetica Bold">content_type</FONT
125 |         ></TD>
126 |         <TD ALIGN="LEFT"
127 |         ><FONT FACE="Helvetica Bold">ForeignKey</FONT
128 |         ></TD></TR>
129 |         
130 |         <TR><TD ALIGN="LEFT" BORDER="0"
131 |         ><FONT FACE="Helvetica Bold">object_id</FONT
132 |         ></TD>
133 |         <TD ALIGN="LEFT"
134 |         ><FONT FACE="Helvetica Bold">PositiveIntegerField</FONT
135 |         ></TD></TR>
136 |         
137 |         <TR><TD ALIGN="LEFT" BORDER="0"
138 |         ><FONT FACE="Helvetica Bold">activity</FONT
139 |         ></TD>
140 |         <TD ALIGN="LEFT"
141 |         ><FONT FACE="Helvetica Bold">ForeignKey</FONT
142 |         ></TD></TR>
143 |         
144 |         <TR><TD ALIGN="LEFT" BORDER="0"
145 |         ><FONT FACE="Helvetica Bold">timestamp</FONT
146 |         ></TD>
147 |         <TD ALIGN="LEFT"
148 |         ><FONT FACE="Helvetica Bold">DateTimeField</FONT
149 |         ></TD></TR>
150 |         
151 |     
152 |     </TABLE>
153 |     >]
154 |   
155 | 
156 | 
157 | 
158 | 
159 |   
160 |     
161 |     
162 |     django_contrib_auth_models_User [label=<
163 |         <TABLE BGCOLOR="palegoldenrod" BORDER="0" CELLBORDER="0" CELLSPACING="0">
164 |         <TR><TD COLSPAN="2" CELLPADDING="4" ALIGN="CENTER" BGCOLOR="olivedrab4"
165 |         ><FONT FACE="Helvetica Bold" COLOR="white"
166 |         >User</FONT></TD></TR>
167 |         </TABLE>
168 |         >]
169 |     
170 |     voting_models_Vote -> django_contrib_auth_models_User
171 |     [label="user"] ;
172 |     
173 |     
174 |     django_contrib_contenttypes_models_ContentType [label=<
175 |         <TABLE BGCOLOR="palegoldenrod" BORDER="0" CELLBORDER="0" CELLSPACING="0">
176 |         <TR><TD COLSPAN="2" CELLPADDING="4" ALIGN="CENTER" BGCOLOR="olivedrab4"
177 |         ><FONT FACE="Helvetica Bold" COLOR="white"
178 |         >ContentType</FONT></TD></TR>
179 |         </TABLE>
180 |         >]
181 |     
182 |     voting_models_Vote -> django_contrib_contenttypes_models_ContentType
183 |     [label="content_type"] ;
184 |     
185 |   
186 | 
187 | 
188 |   
189 |     
190 |   
191 |     
192 |     
193 |     django_contrib_auth_models_User [label=<
194 |         <TABLE BGCOLOR="palegoldenrod" BORDER="0" CELLBORDER="0" CELLSPACING="0">
195 |         <TR><TD COLSPAN="2" CELLPADDING="4" ALIGN="CENTER" BGCOLOR="olivedrab4"
196 |         ><FONT FACE="Helvetica Bold" COLOR="white"
197 |         >User</FONT></TD></TR>
198 |         </TABLE>
199 |         >]
200 |     
201 |     events_models_Event -> django_contrib_auth_models_User
202 |     [label="user"] ;
203 |     
204 |     
205 |     django_contrib_contenttypes_models_ContentType [label=<
206 |         <TABLE BGCOLOR="palegoldenrod" BORDER="0" CELLBORDER="0" CELLSPACING="0">
207 |         <TR><TD COLSPAN="2" CELLPADDING="4" ALIGN="CENTER" BGCOLOR="olivedrab4"
208 |         ><FONT FACE="Helvetica Bold" COLOR="white"
209 |         >ContentType</FONT></TD></TR>
210 |         </TABLE>
211 |         >]
212 |     
213 |     events_models_Event -> django_contrib_contenttypes_models_ContentType
214 |     [label="content_type"] ;
215 |     
216 |     
217 |     events_models_Event -> events_models_Activity
218 |     [label="activity"] ;
219 |     
220 |   
221 | 
222 | 
223 | }
224 | 
225 | 


--------------------------------------------------------------------------------
/conceptnet/django_settings/__init__.py:
--------------------------------------------------------------------------------
  1 | import sys, os
  2 | 
  3 | ###
  4 | ### Database configuration
  5 | ###
  6 | 
  7 | # ConceptNet uses a database configuration file to determine how to
  8 | #    connect to the database. It's just a normal Python file (e.g.,
  9 | #    db_config.py) that contains the Django database settings (see
 10 | #    http://docs.djangoproject.com/en/dev/intro/tutorial01/#database-setup
 11 | #    or
 12 | #    http://docs.djangoproject.com/en/dev/ref/settings/#setting-DATABASE_ENGINE
 13 | #
 14 | # You just have to tell ConceptNet how to find this file. You can put
 15 | # the full path to this file in the CONCEPTNET_DB_CONFIG environment
 16 | # variable, or you can put the file on the Python path.
 17 | #
 18 | # Added bonuses:
 19 | # 1. You can use either DATABASE_ or DB_ in your configuration variables.
 20 | # 2. If DATABASE_ENGINE is sqlite3, DATABASE_NAME will be treated as relative
 21 | #    to the database config file.
 22 | # 3. You can use '~' in the environment variable to mean your home directory,
 23 | #    like ~/commonsense/db_config.py
 24 | 
 25 | if 'CONCEPTNET_DB_CONFIG' in os.environ:
 26 |     db_config = {}
 27 |     db_config_path = os.path.expanduser(os.environ['CONCEPTNET_DB_CONFIG'])
 28 |     db_config_dir = os.path.dirname(db_config_path)
 29 |     execfile(db_config_path, db_config)
 30 | else:
 31 |     try:
 32 |         import db_config
 33 |         db_config_dir = os.path.abspath(os.path.dirname(db_config.__file__))
 34 |         db_config = db_config.__dict__
 35 |     except ImportError:
 36 |         from conceptnet.django_settings import default_db_config
 37 |         db_config = default_db_config.__dict__
 38 |         if not os.path.exists(db_config['DB_NAME']):
 39 |             from conceptnet.django_settings import db_downloader
 40 |             if not db_downloader.prompt_for_download(db_config['DB_NAME']):
 41 |                 raise SystemExit
 42 | 
 43 | def get_db_config(param, default=''):
 44 |     long_param = 'DATABASE_'+param
 45 |     short_param = 'DB_'+param
 46 |     if long_param in db_config: return db_config[long_param]
 47 |     if short_param in db_config: return db_config[short_param]
 48 |     return default
 49 | 
 50 | 
 51 | def relative_to_db_config(path):
 52 |     if not os.path.isabs(path):
 53 |         path = os.path.join(db_config_dir, path)
 54 |     return os.path.normpath(path)
 55 | 
 56 | 
 57 | # This sets the Python path to include the distributed libraries.
 58 | import conceptnet.lib
 59 | 
 60 | DEBUG = db_config.get('DEBUG', False)
 61 | TEMPLATE_DEBUG = DEBUG
 62 | 
 63 | ADMINS = ()
 64 | 
 65 | MANAGERS = ADMINS
 66 | 
 67 | DATABASE_ENGINE = get_db_config('ENGINE')    # 'postgresql', 'mysql', 'sqlite3' or 'ado_mssql'.
 68 | DATABASE_NAME = get_db_config('NAME')        # Or path to database file if using sqlite3.
 69 | if DATABASE_ENGINE == 'sqlite3':
 70 |     # normalize the path name
 71 |     DATABASE_NAME = relative_to_db_config(DATABASE_NAME)
 72 | DATABASE_USER = get_db_config('USER', '')        # Not used with sqlite3.
 73 | DATABASE_PASSWORD = get_db_config('PASSWORD', '') # Not used with sqlite3.
 74 | DATABASE_HOST = get_db_config('HOST', '')        # Set to empty string for localhost. Not used with sqlite3.
 75 | DATABASE_PORT = get_db_config('PORT', '')        # Set to empty string for default. Not used with sqlite3.
 76 | DATABASE_OPTIONS = get_db_config('OPTIONS', {})
 77 | 
 78 | DATABASES = {
 79 |     'default': {
 80 |         'ENGINE': 'django.db.backends.'+DATABASE_ENGINE,
 81 |         'NAME': DATABASE_NAME,
 82 |         'USER': DATABASE_USER,
 83 |         'PASSWORD': DATABASE_PASSWORD,
 84 |         'HOST': DATABASE_HOST,
 85 |         'PORT': DATABASE_PORT,
 86 |         'OPTIONS': DATABASE_OPTIONS
 87 |     }
 88 | }
 89 | 
 90 | # Local time zone for this installation. All choices can be found here:
 91 | # http://www.postgresql.org/docs/current/static/datetime-keywords.html#DATETIME-TIMEZONE-SET-TABLE
 92 | TIME_ZONE = 'America/New_York'
 93 | 
 94 | # Language code for this installation. All choices can be found here:
 95 | # http://www.w3.org/TR/REC-html40/struct/dirlang.html#langcodes
 96 | # http://blogs.law.harvard.edu/tech/stories/storyReader$15
 97 | LANGUAGE_CODE = 'en-us'
 98 | 
 99 | SITE_ID = 1
100 | 
101 | # If you set this to False, Django will make some optimizations so as not
102 | # to load the internationalization machinery.
103 | USE_I18N = True
104 | 
105 | # Absolute path to the directory that holds media.
106 | # Example: "/home/media/media.lawrence.com/"
107 | MEDIA_ROOT = ''
108 | 
109 | # URL that handles the media served from MEDIA_ROOT.
110 | # Example: "http://media.lawrence.com"
111 | MEDIA_URL = ''
112 | 
113 | # URL prefix for admin media -- CSS, JavaScript and images. Make sure to use a
114 | # trailing slash.
115 | # Examples: "http://foo.com/media/", "/media/".
116 | ADMIN_MEDIA_PREFIX = '/media/'
117 | 
118 | # Make this unique, and don't share it with anybody.
119 | SECRET_KEY = 'rebo=05i#a6^%d3m#a=0dzy)cs7(ek%!^nvhwe93n1g4rajas1'
120 | 
121 | # List of callables that know how to import templates from various sources.
122 | TEMPLATE_LOADERS = (
123 |     'django.template.loaders.filesystem.load_template_source',
124 |     'django.template.loaders.app_directories.load_template_source',
125 | #     'django.template.loaders.eggs.load_template_source',
126 | )
127 | 
128 | # Middleware necessary for the admin site.
129 | MIDDLEWARE_CLASSES = (
130 |     # URL normalization, etc.
131 |     'django.middleware.common.CommonMiddleware',
132 |     # Handle sessions.
133 |     'django.contrib.sessions.middleware.SessionMiddleware',
134 |     # Keep track of users.
135 |     'django.contrib.auth.middleware.AuthenticationMiddleware',
136 | )
137 | 
138 | AUTHENTICATION_BACKENDS = (
139 |         'conceptnet.pseudo_auth.backends.LegacyBackend',
140 |         'django.contrib.auth.backends.ModelBackend',
141 | )
142 | 
143 | ROOT_URLCONF = 'urls'
144 | 
145 | INSTALLED_APPS = (
146 |     'django.contrib.auth',
147 |     'django.contrib.contenttypes',
148 |     'django.contrib.sessions',
149 |     'django.contrib.sites',
150 |     'django.contrib.admin',
151 |     'conceptnet.pseudo_auth',
152 |     'conceptnet.corpus',
153 |     'conceptnet.webapi',
154 |     'conceptnet',
155 |     'simplenlp',
156 |     'voting',
157 |     'events',
158 | #    'south',
159 | #    'django.contrib.markup',
160 | )
161 | 
162 | # Serve the API if we can.
163 | SERVE_API = db_config.get('SERVE_API', False)
164 | if SERVE_API:
165 |     try:
166 |         import conceptnet.webapi.handlers
167 |         INSTALLED_APPS += ('conceptnet.webapi',)
168 |     except ImportError:
169 |         pass
170 | 
171 | # Install command extensions, if available.
172 | try:
173 |     import django_extensions
174 |     INSTALLED_APPS += ('django_extensions',)
175 | except ImportError:
176 |     pass
177 |     
178 | # Use memcache if available.
179 | memcache = False
180 | try:
181 |     import cmemcache
182 |     memcache = True
183 | except ImportError:
184 |     try:
185 |         import memcache
186 |         memcache = True
187 |     except ImportError:
188 |         pass
189 | 
190 | if memcache:
191 |     CACHE_BACKEND="memcached://127.0.0.1:11211"
192 | 


--------------------------------------------------------------------------------
/doc/source/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # ConceptNet documentation build configuration file, created by
  4 | # sphinx-quickstart on Fri Feb 27 17:56:32 2009.
  5 | #
  6 | # This file is execfile()d with the current directory set to its containing dir.
  7 | #
  8 | # The contents of this file are pickled, so don't put values in the namespace
  9 | # that aren't pickleable (module imports are okay, they're removed automatically).
 10 | #
 11 | # Note that not all possible configuration values are present in this
 12 | # autogenerated file.
 13 | #
 14 | # All configuration values have a default; values that are commented out
 15 | # serve to show the default.
 16 | 
 17 | import sys, os
 18 | 
 19 | # If your extensions are in another directory, add it here. If the directory
 20 | # is relative to the documentation root, use os.path.abspath to make it
 21 | # absolute, like shown here.
 22 | sys.path.append(os.path.abspath('..'))
 23 | 
 24 | # General configuration
 25 | # ---------------------
 26 | 
 27 | # Add any Sphinx extension module names here, as strings. They can be extensions
 28 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
 29 | extensions = ['sphinx.ext.autodoc', 'sphinx.ext.intersphinx']
 30 | 
 31 | # Add any paths that contain templates here, relative to this directory.
 32 | templates_path = ['_templates']
 33 | 
 34 | # The suffix of source filenames.
 35 | source_suffix = '.rst'
 36 | 
 37 | # The encoding of source files.
 38 | #source_encoding = 'utf-8'
 39 | 
 40 | # The master toctree document.
 41 | master_doc = 'index'
 42 | 
 43 | # General information about the project.
 44 | project = u'ConceptNet'
 45 | copyright = u'2009, Commonsense Computing Initiative'
 46 | 
 47 | # The version info for the project you're documenting, acts as replacement for
 48 | # |version| and |release|, also used in various other places throughout the
 49 | # built documents.
 50 | #
 51 | # The short X.Y version.
 52 | version = '3.5'
 53 | # The full version, including alpha/beta/rc tags.
 54 | release = '3.5pre'
 55 | 
 56 | # The language for content autogenerated by Sphinx. Refer to documentation
 57 | # for a list of supported languages.
 58 | #language = None
 59 | 
 60 | # There are two options for replacing |today|: either, you set today to some
 61 | # non-false value, then it is used:
 62 | #today = ''
 63 | # Else, today_fmt is used as the format for a strftime call.
 64 | #today_fmt = '%B %d, %Y'
 65 | 
 66 | # List of documents that shouldn't be included in the build.
 67 | #unused_docs = []
 68 | 
 69 | # List of directories, relative to source directory, that shouldn't be searched
 70 | # for source files.
 71 | exclude_trees = []
 72 | 
 73 | # The reST default role (used for this markup: `text`) to use for all documents.
 74 | #default_role = None
 75 | 
 76 | # If true, '()' will be appended to :func: etc. cross-reference text.
 77 | #add_function_parentheses = True
 78 | 
 79 | # If true, the current module name will be prepended to all description
 80 | # unit titles (such as .. function::).
 81 | #add_module_names = True
 82 | 
 83 | # If true, sectionauthor and moduleauthor directives will be shown in the
 84 | # output. They are ignored by default.
 85 | #show_authors = False
 86 | 
 87 | # The name of the Pygments (syntax highlighting) style to use.
 88 | pygments_style = 'sphinx'
 89 | 
 90 | 
 91 | # Options for HTML output
 92 | # -----------------------
 93 | 
 94 | # The style sheet to use for HTML and HTML Help pages. A file of that name
 95 | # must exist either in Sphinx' static/ path, or in one of the custom paths
 96 | # given in html_static_path.
 97 | html_style = 'default.css'
 98 | 
 99 | # The name for this set of Sphinx documents.  If None, it defaults to
100 | # "<project> v<release> documentation".
101 | #html_title = None
102 | 
103 | # A shorter title for the navigation bar.  Default is the same as html_title.
104 | #html_short_title = None
105 | 
106 | # The name of an image file (relative to this directory) to place at the top
107 | # of the sidebar.
108 | #html_logo = None
109 | 
110 | # The name of an image file (within the static path) to use as favicon of the
111 | # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
112 | # pixels large.
113 | #html_favicon = None
114 | 
115 | # Add any paths that contain custom static files (such as style sheets) here,
116 | # relative to this directory. They are copied after the builtin static files,
117 | # so a file named "default.css" will overwrite the builtin "default.css".
118 | html_static_path = ['_static']
119 | 
120 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
121 | # using the given strftime format.
122 | #html_last_updated_fmt = '%b %d, %Y'
123 | 
124 | # If true, SmartyPants will be used to convert quotes and dashes to
125 | # typographically correct entities.
126 | #html_use_smartypants = True
127 | 
128 | # Custom sidebar templates, maps document names to template names.
129 | #html_sidebars = {}
130 | 
131 | # Additional templates that should be rendered to pages, maps page names to
132 | # template names.
133 | #html_additional_pages = {}
134 | 
135 | # If false, no module index is generated.
136 | #html_use_modindex = True
137 | 
138 | # If false, no index is generated.
139 | #html_use_index = True
140 | 
141 | # If true, the index is split into individual pages for each letter.
142 | #html_split_index = False
143 | 
144 | # If true, the reST sources are included in the HTML build as _sources/<name>.
145 | #html_copy_source = True
146 | 
147 | # If true, an OpenSearch description file will be output, and all pages will
148 | # contain a <link> tag referring to it.  The value of this option must be the
149 | # base URL from which the finished HTML is served.
150 | #html_use_opensearch = ''
151 | 
152 | # If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml").
153 | #html_file_suffix = ''
154 | 
155 | # Output file base name for HTML help builder.
156 | htmlhelp_basename = 'ConceptNetdoc'
157 | 
158 | 
159 | # Options for LaTeX output
160 | # ------------------------
161 | 
162 | # The paper size ('letter' or 'a4').
163 | #latex_paper_size = 'letter'
164 | 
165 | # The font size ('10pt', '11pt' or '12pt').
166 | #latex_font_size = '10pt'
167 | 
168 | # Grouping the document tree into LaTeX files. List of tuples
169 | # (source start file, target name, title, author, document class [howto/manual]).
170 | latex_documents = [
171 |   ('index', 'ConceptNet.tex', ur'ConceptNet Documentation',
172 |    ur'Commonsense Computing Initiative', 'manual'),
173 | ]
174 | 
175 | # The name of an image file (relative to this directory) to place at the top of
176 | # the title page.
177 | #latex_logo = None
178 | 
179 | # For "manual" documents, if this is true, then toplevel headings are parts,
180 | # not chapters.
181 | #latex_use_parts = False
182 | 
183 | # Additional stuff for the LaTeX preamble.
184 | #latex_preamble = ''
185 | 
186 | # Documents to append as an appendix to all manuals.
187 | #latex_appendices = []
188 | 
189 | # If false, no module index is generated.
190 | #latex_use_modindex = True
191 | 
192 | 
193 | # Example configuration for intersphinx: refer to the Python standard library.
194 | intersphinx_mapping = {'http://docs.python.org/dev': None}
195 | 


--------------------------------------------------------------------------------
/conceptnet/migrations/0002_rename_tables.py:
--------------------------------------------------------------------------------
  1 | 
  2 | from south.db import db
  3 | from django.db import models
  4 | from conceptnet.corpus.models import *
  5 | 
  6 | class Migration:
  7 |     
  8 |     def forwards(self, orm):
  9 |         db.rename_table('parsing_batch', 'conceptnet_batch')
 10 |         db.rename_table('predicatetypes', 'conceptnet_relation')
 11 |         db.rename_table('conceptnet_frames', 'conceptnet_frame')
 12 |         db.rename_table('concepts', 'conceptnet_concept')
 13 |         db.rename_table('surface_forms', 'conceptnet_surfaceform')
 14 |         db.rename_table('assertions', 'conceptnet_assertion')
 15 |         db.rename_table('raw_assertions', 'conceptnet_rawassertion')
 16 |     
 17 |     def backwards(self, orm):
 18 |         db.rename_table('conceptnet_batch', 'parsing_batch')
 19 |         db.rename_table('conceptnet_relation', 'predicatetypes')
 20 |         db.rename_table('conceptnet_frame', 'conceptnet_frames')
 21 |         db.rename_table('conceptnet_concept', 'concepts')
 22 |         db.rename_table('conceptnet_surfaceform', 'surface_forms')
 23 |         db.rename_table('conceptnet_assertion', 'assertions')
 24 |         db.rename_table('conceptnet_rawassertion', 'raw_assertions')
 25 |     
 26 |     models = {
 27 |         'auth.group': {
 28 |             'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
 29 |             'name': ('django.db.models.fields.CharField', [], {'max_length': '80', 'unique': 'True'}),
 30 |             'permissions': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Permission']", 'blank': 'True'})
 31 |         },
 32 |         'auth.permission': {
 33 |             'Meta': {'unique_together': "(('content_type', 'codename'),)"},
 34 |             'codename': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
 35 |             'content_type': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['contenttypes.ContentType']"}),
 36 |             'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
 37 |             'name': ('django.db.models.fields.CharField', [], {'max_length': '50'})
 38 |         },
 39 |         'auth.user': {
 40 |             'date_joined': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}),
 41 |             'email': ('django.db.models.fields.EmailField', [], {'max_length': '75', 'blank': 'True'}),
 42 |             'first_name': ('django.db.models.fields.CharField', [], {'max_length': '30', 'blank': 'True'}),
 43 |             'groups': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Group']", 'blank': 'True'}),
 44 |             'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
 45 |             'is_active': ('django.db.models.fields.BooleanField', [], {'default': 'True', 'blank': 'True'}),
 46 |             'is_staff': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'blank': 'True'}),
 47 |             'is_superuser': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'blank': 'True'}),
 48 |             'last_login': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}),
 49 |             'last_name': ('django.db.models.fields.CharField', [], {'max_length': '30', 'blank': 'True'}),
 50 |             'password': ('django.db.models.fields.CharField', [], {'max_length': '128'}),
 51 |             'user_permissions': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Permission']", 'blank': 'True'}),
 52 |             'username': ('django.db.models.fields.CharField', [], {'max_length': '30', 'unique': 'True'})
 53 |         },
 54 |         'contenttypes.contenttype': {
 55 |             'Meta': {'unique_together': "(('app_label', 'model'),)", 'db_table': "'django_content_type'"},
 56 |             'app_label': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
 57 |             'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
 58 |             'model': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
 59 |             'name': ('django.db.models.fields.CharField', [], {'max_length': '100'})
 60 |         },
 61 |         'corpus.dependencyparse': {
 62 |             'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
 63 |             'index1': ('django.db.models.fields.IntegerField', [], {}),
 64 |             'index2': ('django.db.models.fields.IntegerField', [], {}),
 65 |             'linktype': ('django.db.models.fields.CharField', [], {'max_length': '20'}),
 66 |             'sentence': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['corpus.Sentence']"}),
 67 |             'word1': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
 68 |             'word2': ('django.db.models.fields.CharField', [], {'max_length': '100'})
 69 |         },
 70 |         'corpus.language': {
 71 |             'id': ('django.db.models.fields.CharField', [], {'max_length': '16', 'primary_key': 'True'}),
 72 |             'name': ('django.db.models.fields.TextField', [], {'blank': 'True'}),
 73 |             'sentence_count': ('django.db.models.fields.IntegerField', [], {'default': '0'})
 74 |         },
 75 |         'corpus.sentence': {
 76 |             'activity': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['events.Activity']"}),
 77 |             'created_on': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}),
 78 |             'creator': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['auth.User']"}),
 79 |             'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
 80 |             'language': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['corpus.Language']"}),
 81 |             'score': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
 82 |             'text': ('django.db.models.fields.TextField', [], {}),
 83 |             'votes': ('django.contrib.contenttypes.generic.GenericRelation', [], {'to': "orm['voting.Vote']"})
 84 |         },
 85 |         'corpus.taggedsentence': {
 86 |             'language': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['corpus.Language']"}),
 87 |             'sentence': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['corpus.Sentence']", 'primary_key': 'True'}),
 88 |             'text': ('django.db.models.fields.TextField', [], {})
 89 |         },
 90 |         'events.activity': {
 91 |             'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
 92 |             'name': ('django.db.models.fields.TextField', [], {})
 93 |         },
 94 |         'voting.vote': {
 95 |             'Meta': {'unique_together': "(('user', 'content_type', 'object_id'),)", 'db_table': "'votes'"},
 96 |             'content_type': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['contenttypes.ContentType']"}),
 97 |             'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
 98 |             'object_id': ('django.db.models.fields.PositiveIntegerField', [], {}),
 99 |             'user': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['auth.User']"}),
100 |             'vote': ('django.db.models.fields.SmallIntegerField', [], {})
101 |         }
102 |     }
103 |     
104 |     complete_apps = ['corpus']
105 | 


--------------------------------------------------------------------------------
/conceptnet/analogyspace2.py:
--------------------------------------------------------------------------------
  1 | from csc import divisi2
  2 | from conceptnet.models import Assertion, Relation, RawAssertion, Feature
  3 | from conceptnet.corpus.models import Language
  4 | from math import log, sqrt
  5 | import logging
  6 | logging.basicConfig(level=logging.INFO)
  7 | logger = logging.getLogger('conceptnet.analogyspace2')
  8 | 
  9 | DEFAULT_IDENTITY_WEIGHT = 0
 10 | DEFAULT_CUTOFF = 5
 11 | 
 12 | log_2 = log(2)
 13 | 
 14 | def get_value(score, freq):
 15 |     """
 16 |     This function gives diminishing returns from higher scores, on a
 17 |     logarithmic scale. It also scales the resulting value according to the
 18 |     *frequency* value, which ranges from -10 to 10.
 19 |     """
 20 |     return (freq/10.0) * log(max((score+1, 1)))/log_2
 21 | 
 22 | ### Getting quads of (concept1, relation, concept2, value) from the database.
 23 | 
 24 | def conceptnet_quads(query, cutoff=DEFAULT_CUTOFF):
 25 |     '''
 26 |     Generates a sequence of ((concept, relation, concept), value)
 27 |     triples for ConceptNet.
 28 |     
 29 |     Query can be a language identifier, in which case it will construct the
 30 |     default query for that language. It can also be a Django QuerySet
 31 |     containing Assertions, which it will use directly.
 32 |     '''
 33 |     if isinstance(query, (basestring, Language)):
 34 |         queryset = conceptnet_queryset(query, cutoff=cutoff)
 35 |     else:
 36 |         queryset = query
 37 | 
 38 |     for (relation, concept1, concept2, score, freq) in queryset.values_list(
 39 |         'relation__name', 'concept1__text', 'concept2__text', 'score', 'frequency__value').iterator():
 40 |         yield (concept1, relation, concept2, get_value(score, freq))
 41 | 
 42 | def conceptnet_queryset(lang=None, cutoff=DEFAULT_CUTOFF):
 43 |     """
 44 |     Construct a typical queryset for retrieving all relevant assertions
 45 |     from ConceptNet:
 46 | 
 47 |     - Limit it to a particular language, unless lang=None
 48 |     - Ensure that the reliability score is greater than 0
 49 |     - Use Assertion.useful to discard concepts that we have marked as invalid
 50 |     - Include only concepts that appear in a minimum number of assertions
 51 |       (the *cutoff*)
 52 |     """
 53 |     queryset = Assertion.useful.filter(score__gt=0)
 54 |     if lang is not None:
 55 |         queryset = queryset.filter(language=lang)
 56 |     if cutoff:
 57 |         queryset = queryset.filter(
 58 |             concept1__num_assertions__gte=cutoff,
 59 |             concept2__num_assertions__gte=cutoff)
 60 |     return queryset
 61 | 
 62 | def rating_quads(lang, cutoff=DEFAULT_CUTOFF, filter=None):
 63 |     '''
 64 |     Generates a quad for each rating (vote) on Assertions.
 65 | 
 66 |     A django.db.models.Q object passed to filter will be applied to
 67 |     the Vote queryset.
 68 |     '''
 69 |     from conceptnet.models import AssertionVote
 70 |     ratings = AssertionVote.objects.filter(
 71 |         assertion__concept1__num_assertions__gte=cutoff,
 72 |         assertion__concept2__num_assertions__gte=cutoff)
 73 |     if filter is not None:
 74 |         ratings = ratings.filter(filter)
 75 |     for concept1, rel, concept2, vote in ratings.values_list(
 76 |         'assertion__concept1__text', 'assertion__relation__name', 'assertion__concept2__text', 'vote').iterator():
 77 |         yield (concept1, rel, concept2, vote)
 78 | 
 79 | def rawassertion_quads(lang, cutoff=DEFAULT_CUTOFF):
 80 |     # Experiment: deal with RawAssertions only.
 81 |     from conceptnet.models import RawAssertion
 82 |     queryset = RawAssertion.objects.filter(
 83 |         score__gt=0,
 84 |         surface1__concept__num_assertions__gte=cutoff,
 85 |         surface2__concept__num_assertions__gte=cutoff,
 86 |         language=lang)
 87 |     for (rel, concept1, concept2, text1, text2, frame_id, score, freq) in queryset.values_list(
 88 |         'frame__relation__name', 'surface1__concept__text',  'surface2__concept__text', 'surface1__text', 'surface2__text', 'frame__id', 'score', 'frame__frequency__value'
 89 |         ).iterator():
 90 |         value = get_value(score, freq)
 91 | 
 92 |         # Raw
 93 |         yield (text1, frame_id, text2, value)
 94 | 
 95 |         # Assertion
 96 |         yield (concept1, rel, concept2, value)
 97 | 
 98 |         ## NormalizesTo
 99 |         yield (concept1, 'NormalizesTo', text1, 1)
100 |         yield (concept2, 'NormalizesTo', text2, 1)
101 |         yield (concept1, 'NormalizesTo', concept1, 1)
102 |         yield (concept2, 'NormalizesTo', concept2, 1)
103 | 
104 | def to_value_concept_feature(quads):
105 |     """
106 |     Convert a stream of assertion quads into a stream of twice
107 |     as many (value, concept, feature) triples.
108 |     """
109 |     for concept1, rel, concept2, value in quads:
110 |         yield value, concept1, ('right', rel, concept2)
111 |         yield value, concept2, ('left', rel, concept1)
112 | 
113 | def to_value_concept_concept(quads):
114 |     """
115 |     Convert a stream of assertion quads into a stream of twice
116 |     as many (value, concept1, concept2) triples, ignoring the relation and
117 |     simply treating all kinds of edges equally.
118 |     """
119 |     for concept1, rel, concept2, value in quads:
120 |         yield value, concept1, concept2
121 |         yield value, concept2, concept1
122 | 
123 | def to_value_pair_relation(quads):
124 |     """
125 |     Convert a stream of assertion quads into a stream of
126 |     (value, conceptPair, relation) triples.
127 |     """
128 |     for concept1, rel, concept2, value in quads:
129 |         concept1, rel, concept2 = triple
130 |         yield value, (concept1, concept2), rel
131 | 
132 | def build_matrix(query, cutoff=DEFAULT_CUTOFF, identity_weight=DEFAULT_IDENTITY_WEIGHT, data_source=conceptnet_quads, transform=to_value_concept_feature):
133 |     """
134 |     Builds a Divisi2 SparseMatrix from relational data.
135 | 
136 |     One required argument is the `query`, which can be a QuerySet or just a
137 |     language identifier.
138 | 
139 |     Optional arguments:
140 | 
141 |     - `cutoff`: specifies how common a concept has to be to appear in the
142 |       matrix. Defaults to DEFAULT_CUTOFF=5.
143 |     - `identity_weight`
144 |     - `data_source`: a function that produces (concept1, rel, concept2, value)
145 |       quads given the `query` and `cutoff`. Defaults to
146 |       :meth:`conceptnet_quads`.
147 |     - `transform`: the function for transforming quads into
148 |       (value, row_name, column_name) triples. Defaults to
149 |       :meth:`to_value_concept_feature`, which yields
150 |       (value, concept, feature) triples.
151 |     """
152 |     logger.info("Performing ConceptNet query")
153 |     quads = list(data_source(query, cutoff))
154 |     # todo: separate this out into a customizable function
155 |     
156 |     if identity_weight > 0:
157 |         logger.info("Adding identities")
158 |         morequads = []
159 |         concept_set = set(q[0] for q in quads)
160 |         for concept in concept_set:
161 |             morequads.append( (concept, 'InheritsFrom', concept, identity_weight) )
162 |         for c1, rel, c2, val in quads:
163 |             if rel == 'IsA':
164 |                 morequads.append( (c1, 'InheritsFrom', c1, val) )
165 |         quads.extend(morequads)
166 | 
167 |     logger.info("Creating triples")
168 |     triples = transform(quads)
169 |     logger.info("Building matrix")
170 |     matrix = divisi2.make_sparse(triples)
171 |     logger.info("Squishing underused rows")
172 |     return matrix.squish(cutoff)
173 | 
174 | 


--------------------------------------------------------------------------------
/conceptnet/corpus/parse/run_parser.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | import sys, traceback
  3 | from conceptnet.models import Assertion, Batch, RawAssertion, Frame,\
  4 |   Frequency, Relation, SurfaceForm, Concept, Rating
  5 | from conceptnet.corpus.models import Sentence, Language, Activity
  6 | from django.contrib.auth.models import User
  7 | from pcfgpattern import pattern_parse
  8 | from django.core.paginator import Paginator
  9 | from django.db import transaction
 10 | 
 11 | csamoa4_activity = Activity.objects.get(name='csamoa4 self-rating')
 12 | 
 13 | def process_sentence_delayed(entry, lang, batch):
 14 |     frametext, id, matches, reltext = (entry['frametext'], entry['id'],
 15 |     entry['matches'], entry['reltext'])
 16 |     sentence = Sentence.objects.get(id=id)
 17 |     print sentence.text.encode('utf-8')
 18 |     
 19 |     if reltext is None or reltext == 'junk': return []
 20 |     relation = Relation.objects.get(name=reltext)
 21 |     text_factors = [lang.nl.lemma_factor(matches[i]) for i in (1, 2)]
 22 |     concepts = [Concept.objects.get_or_create(language=lang, text=stem)[0]
 23 |                 for stem, residue in text_factors]
 24 |     for c in concepts: c.save()
 25 |     
 26 |     surface_forms = [SurfaceForm.objects.get_or_create(concept=concepts[i],
 27 |                                                   text=matches[i+1],
 28 |                                                   residue=text_factors[i][1],
 29 |                                                   language=lang)[0]
 30 |                      for i in (0, 1)]
 31 |     for s in surface_forms: s.save()
 32 |     
 33 |     freq, _ = Frequency.objects.get_or_create(text=matches.get('a', ''),
 34 |                                               language=lang,
 35 |                                               defaults=dict(value=50))
 36 |     freq.save()
 37 |     
 38 |     frame, _ = Frame.objects.get_or_create(relation=relation, language=lang,
 39 |                                            text=frametext, frequency=freq,
 40 |                                            defaults=dict(goodness=1))
 41 |     frame.save()
 42 |     
 43 |     raw_assertion, _ = RawAssertion.objects.get_or_create(
 44 |         surface1=surface_forms[0],
 45 |         surface2=surface_forms[1],
 46 |         frame=frame,
 47 |         language=lang,
 48 |         defaults=dict(batch=batch))
 49 |     # still need to set assertion_id
 50 |     
 51 |     assertion, _ = Assertion.objects.get_or_create(
 52 |         relation=relation,
 53 |         concept1=concepts[0],
 54 |         concept2=concepts[1],
 55 |         frequency=freq,
 56 |         language=lang,
 57 |         defaults=dict(score=0)
 58 |     )
 59 |     assertion.score += 1
 60 |     assertion.save()
 61 |     raw_assertion.assertion = assertion
 62 |     raw_assertion.save()
 63 |     
 64 |     rating1, _ = Rating.objects.get_or_create(
 65 |         user=sentence.creator, activity=csamoa4_activity,
 66 |         sentence=sentence, score=1
 67 |     )
 68 |     rating2, _ = Rating.objects.get_or_create(
 69 |         user=sentence.creator, activity=csamoa4_activity,
 70 |         raw_assertion=raw_assertion, score=1
 71 |     )
 72 |     rating1.save()
 73 |     rating2.save()
 74 | 
 75 |     print '=>', str(assertion).encode('utf-8')
 76 |     return [assertion]
 77 | 
 78 | def process_sentence(sentence, lang, batch):
 79 |     print sentence.text.encode('utf-8')
 80 |     _, frametext, reltext, matches = pattern_parse(sentence.text)
 81 |     
 82 |     if reltext is None or reltext == 'junk': return []
 83 |     relation = Relation.objects.get(name=reltext)
 84 |     text_factors = [lang.nl.lemma_factor(matches[i]) for i in (1, 2)]
 85 |     concepts = [Concept.objects.get_or_create(language=lang, text=stem)[0]
 86 |                 for stem, residue in text_factors]
 87 |     for c in concepts: c.save()
 88 |     
 89 |     surface_forms = [SurfaceForm.objects.get_or_create(concept=concepts[i],
 90 |                                                   text=matches[i+1],
 91 |                                                   residue=text_factors[i][1],
 92 |                                                   language=lang)[0]
 93 |                      for i in (0, 1)]
 94 |     for s in surface_forms: s.save()
 95 |     
 96 |     freq, _ = Frequency.objects.get_or_create(text=matches.get('a', ''),
 97 |                                               language=lang,
 98 |                                               defaults=dict(value=50))
 99 |     freq.save()
100 |     
101 |     frame, _ = Frame.objects.get_or_create(relation=relation, language=lang,
102 |                                            text=frametext, frequency=freq,
103 |                                            defaults=dict(goodness=1))
104 |     frame.save()
105 |     
106 |     raw_assertion, _ = RawAssertion.objects.get_or_create(
107 |         surface1=surface_forms[0],
108 |         surface2=surface_forms[1],
109 |         frame=frame,
110 |         language=lang,
111 |         defaults=dict(batch=batch))
112 |     # still need to set assertion_id
113 |     
114 |     assertion, _ = Assertion.objects.get_or_create(
115 |         relation=relation,
116 |         concept1=concepts[0],
117 |         concept2=concepts[1],
118 |         frequency=freq,
119 |         language=lang,
120 |         defaults=dict(score=0)
121 |     )
122 |     assertion.score += 1
123 |     assertion.save()
124 |     raw_assertion.assertion = assertion
125 |     raw_assertion.save()
126 |     
127 |     rating1, _ = Rating.objects.get_or_create(
128 |         user=sentence.creator, activity=csamoa4_activity,
129 |         sentence=sentence, score=1
130 |     )
131 |     rating2, _ = Rating.objects.get_or_create(
132 |         user=sentence.creator, activity=csamoa4_activity,
133 |         raw_assertion=raw_assertion, score=1
134 |     )
135 |     rating1.save()
136 |     rating2.save()
137 | 
138 |     print '=>', str(assertion).encode('utf-8')
139 |     return [assertion]
140 | 
141 | def run(user, lang, start_page=1):
142 |     batch = Batch()
143 |     batch.owner = user
144 |     
145 |     all_sentences = Sentence.objects.filter(language=lang).order_by('id')
146 |     paginator = Paginator(all_sentences,10)
147 |     #pages = ((i,paginator.page(i)) for i in range(start_page,paginator.num_pages))
148 | 
149 |     @transaction.commit_on_success
150 |     def do_batch(sentences):
151 |         for sentence in sentences:
152 |             try:
153 |                 preds = process_sentence(sentence, lang, batch)
154 |             # changed to an improbable exception for now
155 |             except Exception, e:
156 |                 # Add sentence
157 |                 e.sentence = sentence
158 | 
159 |                 # Extract traceback
160 |                 e_type, e_value, e_tb = sys.exc_info()
161 |                 e.tb = "\n".join(traceback.format_exception( e_type, e_value, e_tb ))
162 | 
163 |                 # Raise again
164 |                 raise e
165 | 
166 |     # Process sentences
167 |     page_range = [p for p in paginator.page_range if p >= start_page]
168 |     for i in page_range:
169 |         sentences = paginator.page(i).object_list
170 |         
171 |         # Update progress
172 |         batch.status = "process_sentence_batch " + str(i) + "/" + str(paginator.num_pages)
173 |         batch.progress_num = i
174 |         batch.progress_den = paginator.num_pages
175 |         batch.save()
176 | 
177 |         try: do_batch(sentences)
178 |         
179 |         except Exception, e: #improbable exception for now
180 |             batch.status = "process_sentence_batch " + str(i) + "/" + str(paginator.num_pages) + " ERROR!"
181 |             batch.remarks = str(e.sentence) + "\n" + str(e) + "\n" + e.tb
182 |             print "***TRACEBACK***"
183 |             print batch.remarks
184 |             batch.save()
185 |             raise e
186 | 
187 | 
188 | if __name__ == '__main__':
189 |     user = User.objects.get(username='rspeer')
190 |     lang = Language.get('en')
191 |     run(user, lang, start_page=50000)
192 | 
193 | 


--------------------------------------------------------------------------------