├── .gitignore
├── README.md
├── boe
    ├── __init__.py
    ├── boe
    │   ├── __init__.py
    │   ├── settings.py
    │   ├── urls.py
    │   └── wsgi.py
    ├── boe_analisis
    │   ├── __init__.py
    │   ├── api.py
    │   ├── management
    │   │   ├── __init__.py
    │   │   └── commands
    │   │   │   ├── __init__.py
    │   │   │   ├── getNewInfo.py
    │   │   │   └── processDocument.py
    │   ├── models.py
    │   ├── paginator.py
    │   ├── tests.py
    │   ├── urls.py
    │   └── views.py
    └── manage.py
└── requirements.txt


/.gitignore:
--------------------------------------------------------------------------------
 1 | workspace.xml
 2 | settings.py
 3 | .idea/
 4 | *.sh
 5 | schema.xml
 6 | solrconfig.xml
 7 | env/
 8 | boe/static
 9 | boe/local_settings.py
10 | boe/social_networks/migrations/0001_initial.py
11 | boe/social_networks/migrations/0002_auto__add_field_facebookaccount_pageKey.py
12 | boe/social_networks/migrations/0003_auto__add_field_subject_activated.py
13 | boe/boe_analisis/migrations/0003_initial.py
14 | boe/boe_analisis/migrations/0004_auto__add_unique_documento_identificador.py
15 | boe/boe_analisis/migrations/0005_auto__chg_field_estado_consolidacion_titulo__chg_field_nota_titulo__ch.py
16 | boe/boe_analisis/migrations/0006_auto__chg_field_estado_consolidacion_titulo__chg_field_nota_titulo__ch.py
17 | boe/boe_analisis/migrations/0007_auto__add_field_legislatura_nombre_legislatura.py
18 | boe/boe_analisis/migrations/0008_auto__chg_field_legislatura_final.py
19 | boe/boe_analisis/migrations/0009_auto__add_procedimiento__add_precio__add_tipo__add_documentoanuncio__a.py
20 | boe/boe_analisis/migrations/0012_auto__add_field_documentoanuncio_importe.py
21 | boe/boe_analisis/migrations/__init__.py
22 | boe/middleware/__init__.py
23 | boe/social_networks/__init__.py
24 | boe/middleware/crossdomainxhr.py
25 | boe/search/indexes/boe_analisis/documento_text.txt
26 | boe/templates/search/indexes/boe_analisis/documento_text.txt
27 | boe/templates/boe_analisis/footer.html
28 | boe/templates/boe_analisis/header.html
29 | boe/templates/boe_analisis/index.html
30 | boe/templates/boe_analisis/individual.html
31 | boe/templates/boe_analisis/listado_otros.html
32 | boe/social_networks/models.py
33 | boe/out.csv
34 | boe/boe/prod_settings.py
35 | boe/templates/search/search.html
36 | boe/boe_analisis/search_indexes.py
37 | boe/boe_analisis/signals.py
38 | boe/boe_analisis/management/commands/testDB.py
39 | boe/social_networks/tests.py
40 | boe/social_networks/views.py
41 | boe/boe/wsgi_dev.py
42 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | BOE_API
 2 | =======
 3 | BOE API, is a REST API for Boletín Oficial del Estado(Official State Bulletin) of Spain. It fetches information from www.boe.es
 4 | and stores it in a PostgreSQL DB (Required for performance optimizations).
 5 | 
 6 | Requirements
 7 | =======
 8 | 
 9 | Tested on Ubuntu 12.04.
10 | 
11 | - PostgreSQL 9.1
12 | - Memcache (optional, if you're not going to use it, delete it from settings)
13 | - Install ```python pip install -r requirements.txt```
14 | 
15 | Use
16 | =======
17 | Sincronize DB:
18 | ```python
19 | python manage.py syncdb
20 | ```
21 | 
22 | To execute the API:
23 | ```python
24 | python manage.py runserver
25 | ```
26 | Go to your browser and type ```http://localhost:8080/v1/format=json``` and you should see API's endpoints.
27 | 
28 | To fetch new laws (from BOE.es) you can execute:
29 | ```python
30 | python manage.py getNewInfo 
31 | ```
32 | and will fetch documents since last day stored on database or since 1960 if the database is empty.
33 | 
34 | You can pass a date to fetch laws since that date:
35 | 
36 | ```python
37 | 
38 | python manage.py getNewInfo YYYY  
39 | python manage.py getNewInfo YYYY MM
40 | python manage.py getNewInfo YYYY MM DD
41 | 
42 | ```
43 | 
44 | 
45 | 
46 | 
47 | 
48 | 
49 | 
50 | 
51 | 
52 | 
53 | 


--------------------------------------------------------------------------------
/boe/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'Carlos'
2 | 


--------------------------------------------------------------------------------
/boe/boe/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BOE-API/BOE_API/48c4279db7706cd9fdc3b186a05fe1b507f3aa42/boe/boe/__init__.py


--------------------------------------------------------------------------------
/boe/boe/settings.py:
--------------------------------------------------------------------------------
  1 | # Django settings for boe project.
  2 | 
  3 | DEBUG = False
  4 | TEMPLATE_DEBUG = DEBUG
  5 | 
  6 | ADMINS = (
  7 |     # ('Your Name', 'your_email@example.com'),
  8 | )
  9 | 
 10 | MANAGERS = ADMINS
 11 | 
 12 | DATABASES = {
 13 |   'default': {
 14 |     'ENGINE': 'django.db.backends.postgresql_psycopg2',
 15 |     'NAME': '',
 16 |     'HOST': '',
 17 |     'PORT': 5432,
 18 |     'USER': '',
 19 |     'PASSWORD': '',
 20 |     # 'OPTIONS': {'autocommit': False,}
 21 |   },
 22 | 
 23 | }
 24 | # Hosts/domain names that are valid for this site; required if DEBUG is False
 25 | # See https://docs.djangoproject.com/en/1.5/ref/settings/#allowed-hosts
 26 | ALLOWED_HOSTS = ['*']
 27 | 
 28 | # Local time zone for this installation. Choices can be found here:
 29 | # http://en.wikipedia.org/wiki/List_of_tz_zones_by_name
 30 | # although not all choices may be available on all operating systems.
 31 | # In a Windows environment this must be set to your system time zone.
 32 | TIME_ZONE = 'America/Chicago'
 33 | 
 34 | # Language code for this installation. All choices can be found here:
 35 | # http://www.i18nguy.com/unicode/language-identifiers.html
 36 | LANGUAGE_CODE = 'en-us'
 37 | 
 38 | SITE_ID = 1
 39 | 
 40 | # If you set this to False, Django will make some optimizations so as not
 41 | # to load the internationalization machinery.
 42 | USE_I18N = True
 43 | 
 44 | # If you set this to False, Django will not format dates, numbers and
 45 | # calendars according to the current locale.
 46 | USE_L10N = True
 47 | 
 48 | # If you set this to False, Django will not use timezone-aware datetimes.
 49 | USE_TZ = True
 50 | 
 51 | # Absolute filesystem path to the directory that will hold user-uploaded files.
 52 | # Example: "/var/www/example.com/media/"
 53 | MEDIA_ROOT = ''
 54 | 
 55 | # URL that handles the media served from MEDIA_ROOT. Make sure to use a
 56 | # trailing slash.
 57 | # Examples: "http://example.com/media/", "http://media.example.com/"
 58 | MEDIA_URL = ''
 59 | 
 60 | # Absolute path to the directory static files should be collected to.
 61 | # Don't put anything in this directory yourself; store your static files
 62 | # in apps' "static/" subdirectories and in STATICFILES_DIRS.
 63 | # Example: "/var/www/example.com/static/"
 64 | STATIC_ROOT = ''
 65 | ADMIN_MEDIA_PREFIX = ''
 66 | # URL prefix for static files.
 67 | # Example: "http://example.com/static/", "http://static.example.com/"
 68 | STATIC_URL = ''
 69 | 
 70 | # Additional locations of static files
 71 | STATICFILES_DIRS = (
 72 |     # Put strings here, like "/home/html/static" or "C:/www/django/static".
 73 |     # Always use forward slashes, even on Windows.
 74 |     # Don't forget to use absolute paths, not relative paths.
 75 | 
 76 | 
 77 | 
 78 | )
 79 | 
 80 | # List of finder classes that know how to find static files in
 81 | # various locations.
 82 | STATICFILES_FINDERS = (
 83 |     'django.contrib.staticfiles.finders.FileSystemFinder',
 84 |     'django.contrib.staticfiles.finders.AppDirectoriesFinder',
 85 | #    'django.contrib.staticfiles.finders.DefaultStorageFinder',
 86 | )
 87 | 
 88 | # Make this unique, and don't share it with anybody.
 89 | SECRET_KEY = '0p^3-vyuy9ugc!&!ntoa&sy(=^*k_2)c-k^#6xr2et3_eiq4r#'
 90 | 
 91 | # List of callables that know how to import templates from various sources.
 92 | TEMPLATE_LOADERS = (
 93 |     'django.template.loaders.filesystem.Loader',
 94 |     'django.template.loaders.app_directories.Loader',
 95 | #     'django.template.loaders.eggs.Loader',
 96 | )
 97 | 
 98 | MIDDLEWARE_CLASSES = (
 99 |     'django.middleware.common.CommonMiddleware',
100 |     'django.contrib.sessions.middleware.SessionMiddleware',
101 |     'django.middleware.csrf.CsrfViewMiddleware',
102 |     'django.contrib.auth.middleware.AuthenticationMiddleware',
103 |     'django.contrib.messages.middleware.MessageMiddleware',
104 |     'django.middleware.cache.UpdateCacheMiddleware',
105 |     'django.middleware.common.CommonMiddleware',
106 |     'django.middleware.cache.FetchFromCacheMiddleware',
107 |     # Uncomment the next line for simple clickjacking protection:
108 |     # 'django.middleware.clickjacking.XFrameOptionsMiddleware',
109 | )
110 | 
111 | ROOT_URLCONF = 'boe.urls'
112 | 
113 | # Python dotted path to the WSGI application used by Django's runserver.
114 | WSGI_APPLICATION = 'boe.wsgi.application'
115 | 
116 | TEMPLATE_DIRS = (
117 |     # Put strings here, like "/home/html/django_templates" or "C:/www/django/templates".
118 |     # Always use forward slashes, even on Windows.
119 |     # Don't forget to use absolute paths, not relative paths.
120 | 
121 | )
122 | 
123 | INSTALLED_APPS = (
124 |     'django.contrib.auth',
125 |     'django.contrib.contenttypes',
126 |     'django.contrib.sessions',
127 |     'django.contrib.sites',
128 |     'django.contrib.messages',
129 |     'django.contrib.staticfiles',
130 |     'django.contrib.admin',
131 | 
132 |     'boe_analisis',
133 |     'south',
134 |     'tastypie',
135 | 
136 |     # Uncomment the next line to enable the admin:
137 | 
138 |     # Uncomment the next line to enable admin documentation:
139 |     # 'django.contrib.admindocs',
140 | )
141 | 
142 | 
143 | 
144 | 
145 | # A sample logging configuration. The only tangible logging
146 | # performed by this configuration is to send an email to
147 | # the site admins on every HTTP 500 error when DEBUG=False.
148 | # See http://docs.djangoproject.com/en/dev/topics/logging for
149 | # more details on how to customize your logging configuration.
150 | LOGGING = {
151 |     'version': 1,
152 |     'disable_existing_loggers': False,
153 |     'filters': {
154 |         'require_debug_false': {
155 |             '()': 'django.utils.log.RequireDebugFalse'
156 |         }
157 |     },
158 |     'handlers': {
159 |         'mail_admins': {
160 |             'level': 'ERROR',
161 |             'filters': ['require_debug_false'],
162 |             'class': 'django.utils.log.AdminEmailHandler'
163 |         }
164 |     },
165 |     'loggers': {
166 |         'django.request': {
167 |             'handlers': ['mail_admins'],
168 |             'level': 'ERROR',
169 |             'propagate': True,
170 |         },
171 |     }
172 | }
173 | CACHES = {
174 |     'default': {
175 |         'BACKEND': 'django.core.cache.backends.memcached.MemcachedCache',
176 |         'LOCATION': '127.0.0.1:11211',
177 |     }
178 | }
179 | 
180 | 
181 | 


--------------------------------------------------------------------------------
/boe/boe/urls.py:
--------------------------------------------------------------------------------
 1 | from boe_analisis.models import Documento, Diario, Partido, Legislatura
 2 | from boe_analisis.urls import *
 3 | from django.conf.urls import patterns, include, url
 4 | from boe_analisis import views
 5 | 
 6 | # Uncomment the next two lines to enable the admin:
 7 | from django.contrib import admin
 8 | #
 9 | admin.autodiscover()
10 | #admin.site.register(Documento)
11 | #admin.site.register(Partido)
12 | #admin.site.register(Legislatura)
13 | 
14 | 
15 | 
16 | 
17 | urlpatterns = patterns('',
18 |     # Examples:
19 |     # url(r'^$', 'boe.views.home', name='home'),
20 |     url(r'', include('boe_analisis.urls')),
21 | 
22 | 
23 | 
24 |     # Uncomment the admin/doc line below to enable admin documentation:
25 |     # url(r'^admin/doc/', include('django.contrib.admindocs.urls')),
26 | 
27 |     # Uncomment the next line to enable the admin:
28 |     # url(r'^admin/', include(admin.site.urls)),
29 |     # url(r'^search/', include('haystack.urls')),
30 | )
31 | 


--------------------------------------------------------------------------------
/boe/boe/wsgi.py:
--------------------------------------------------------------------------------
 1 | """
 2 | WSGI config for boe project.
 3 | 
 4 | This module contains the WSGI application used by Django's development server
 5 | and any production WSGI deployments. It should expose a module-level variable
 6 | named ``application``. Django's ``runserver`` and ``runfcgi`` commands discover
 7 | this application via the ``WSGI_APPLICATION`` setting.
 8 | 
 9 | Usually you will have the standard Django WSGI application here, but it also
10 | might make sense to replace the whole Django WSGI application with a custom one
11 | that later delegates to the Django one. For example, you could introduce WSGI
12 | middleware here, or combine a Django application with an application of another
13 | framework.
14 | 
15 | """
16 | import os
17 | 
18 | # We defer to a DJANGO_SETTINGS_MODULE already in the environment. This breaks
19 | # if running multiple sites in the same mod_wsgi process. To fix this, use
20 | # mod_wsgi daemon mode with each site in its own daemon process, or use
21 | # os.environ["DJANGO_SETTINGS_MODULE"] = "boe.settings"
22 | 
23 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "boe.settings")
24 | 
25 | # This application object is used by any WSGI server configured to use this
26 | # file. This includes Django's development server, if the WSGI_APPLICATION
27 | # setting points here.
28 | from django.core.wsgi import get_wsgi_application
29 | application = get_wsgi_application()
30 | 
31 | # Apply WSGI middleware here.
32 | # from helloworld.wsgi import HelloWorldApplication
33 | # application = HelloWorldApplication(application)
34 | 


--------------------------------------------------------------------------------
/boe/boe_analisis/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BOE-API/BOE_API/48c4279db7706cd9fdc3b186a05fe1b507f3aa42/boe/boe_analisis/__init__.py


--------------------------------------------------------------------------------
/boe/boe_analisis/api.py:
--------------------------------------------------------------------------------
  1 | __author__ = 'Carlos'
  2 | from boe_analisis.models import Materia, Documento, Diario, Origen_legislativo
  3 | from boe_analisis.models import Nota, Palabra, Referencia, Alerta
  4 | from boe_analisis.models import Departamento, Partido, Rango, Legislatura, Estado_consolidacion
  5 | from tastypie.resources import ModelResource, Bundle
  6 | from tastypie.resources import ModelResource, ALL, ALL_WITH_RELATIONS
  7 | from tastypie import fields
  8 | from tastypie.authorization import DjangoAuthorization, ReadOnlyAuthorization
  9 | from tastypie.authentication import ApiKeyAuthentication, BasicAuthentication
 10 | from django.conf.urls import url
 11 | from django.http import Http404
 12 | from tastypie.utils import trailing_slash
 13 | from tastypie import resources
 14 | from tastypie.exceptions import  ImmediateHttpResponse
 15 | from django.http import HttpResponse
 16 | from django.core.cache import cache
 17 | from tastypie.paginator import Paginator
 18 | from tastypie import http
 19 | from boe_analisis.paginator import ModelPagination
 20 | from tastypie.cache import SimpleCache
 21 | from django.db.models import Count
 22 | import json
 23 | 
 24 | from django.db import connection
 25 | 
 26 | from tastypie.paginator import Paginator
 27 | 
 28 | class BaseCorsResource(resources.Resource):
 29 |     """
 30 |     Class implementing CORS
 31 |     """
 32 |     def create_response(self, *args, **kwargs):
 33 |         response = super(BaseCorsResource, self).create_response(*args, **kwargs)
 34 |         response['Access-Control-Allow-Origin'] = '*'
 35 |         response['Access-Control-Allow-Headers'] = 'Content-Type'
 36 |         return response
 37 | 
 38 |     def method_check(self, request, allowed=None):
 39 |         if allowed is None:
 40 |             allowed = []
 41 | 
 42 |         request_method = request.method.lower()
 43 |         allows = ','.join(map(str.upper, allowed))
 44 | 
 45 |         if request_method == 'options':
 46 |             response = HttpResponse(allows)
 47 |             response['Access-Control-Allow-Origin'] = '*'
 48 |             response['Access-Control-Allow-Headers'] = 'Content-Type'
 49 |             response['Allow'] = allows
 50 |             raise ImmediateHttpResponse(response=response)
 51 | 
 52 |         if not request_method in allowed:
 53 |             response = http.HttpMethodNotAllowed(allows)
 54 |             response['Allow'] = allows
 55 |             raise ImmediateHttpResponse(response=response)
 56 | 
 57 |         return request_method
 58 | 
 59 | 
 60 | def build_content_type(format, encoding='utf-8'):
 61 |     """
 62 |     Appends character encoding to the provided format if not already present.
 63 |     """
 64 |     print format
 65 |     if 'charset' in format:
 66 |         return format
 67 | 
 68 |     return "%s; charset=%s" % (format, encoding)
 69 | 
 70 | class MyModelResource(BaseCorsResource, resources.ModelResource):
 71 | 
 72 |     def create_response(self, request, data, response_class=HttpResponse, **response_kwargs):
 73 |         """
 74 |         Extracts the common "which-format/serialize/return-response" cycle.
 75 | 
 76 |         Mostly a useful shortcut/hook.
 77 |         """
 78 |         desired_format = self.determine_format(request)
 79 |         serialized = self.serialize(request, data, desired_format)
 80 |         return response_class(content=serialized, content_type=build_content_type(desired_format), **response_kwargs)
 81 | 
 82 | class EstimatedCountPaginator(Paginator):
 83 |     def __init__(self, request_data, objects, resource_uri=None, limit=None, offset=0, max_limit=1000, collection_name='objects'):
 84 | 
 85 |         super(EstimatedCountPaginator, self).__init__(request_data, objects, resource_uri, limit, offset, max_limit, collection_name)
 86 |         self.count = self.get_estimated_count()
 87 |     def get_next(self, limit, offset, count):
 88 |         # The parent method needs an int which is higher than "limit + offset"
 89 |         # to return a url. Setting it to an unreasonably large value, so that
 90 |         # the parent method will always return the url.
 91 |         count = 2 ** 64
 92 |         return super(EstimatedCountPaginator, self).get_next(limit, offset, count)
 93 | 
 94 |     def get_count(self):
 95 |         return None
 96 |     def get_next(self, limit, offset, count):
 97 |         print "COUNT"
 98 |         print self.count
 99 |         if limit + offset > self.count and limit + offset >= self.get_max_id():
100 | 
101 |                 return None
102 |         try:
103 |             offset = self.cached[limit-1:limit][0].id + 1
104 |         except:
105 |             return None
106 |         print offset
107 |         return self._generate_uri(limit, offset)
108 |     def get_previous(self, limit, offset):
109 |         if offset == 0:
110 |             return None
111 |         if offset > limit:
112 |             offset = self.objects.filter(id__lt = offset).order_by('-id')[limit -1 :limit][0].id
113 |         return self._generate_uri(limit, offset)
114 |     # def _generate_uri(self, limit, offset):
115 |     #     pass
116 |     def get_slice(self, limit, offset):
117 |         self.cached = self.objects.filter(id__gte = offset).order_by('id')[:limit]
118 |         return self.cached
119 | 
120 | 
121 |     def get_estimated_count(self):
122 |         """Get the estimated count by using the database query planner."""
123 |         # If you do not have PostgreSQL as your DB backend, alter this method
124 |         # accordingly.
125 |         return self._get_postgres_estimated_count()
126 | 
127 |     def get_max_id(self):
128 |         cursor = connection.cursor()
129 |         query = 'SELECT id FROM "boe_analisis_documento"  where url_xml is not null ORDER BY "boe_analisis_documento"."id" desc limit 1;'
130 |         cursor.execute(query)
131 | 
132 |         self.max_id = cursor.fetchone()[0]
133 |         print self.max_id
134 |         return self.max_id
135 |     def _get_postgres_estimated_count(self):
136 | 
137 |         # This method only works with postgres >= 9.0.
138 |         # If you need postgres vesrions less than 9.0, remove "(format json)"
139 |         # below and parse the text explain output.
140 | 
141 |         def _get_postgres_version():
142 |             # Due to django connections being lazy, we need a cursor to make
143 |             # sure the connection.connection attribute is not None.
144 |             connection.cursor()
145 |             return connection.connection.server_version
146 | 
147 |         try:
148 |             if _get_postgres_version() < 90000:
149 |                 return
150 |         except AttributeError:
151 |             return
152 | 
153 |         cursor = connection.cursor()
154 |         query = "select reltuples from pg_class where relname='boe_analisis_documento';"
155 | 
156 |         # # Remove limit and offset from the query, and extract sql and params.
157 |         # query.low_mark = None
158 |         # query.high_mark = None
159 |         # query, params = self.objects.query.sql_with_params()
160 |         #
161 |         # # Fetch the estimated rowcount from EXPLAIN json output.
162 |         # query = 'explain (format json) %s' % query
163 |         cursor.execute(query)
164 |         # print query
165 |         rows = cursor.fetchone()[0]
166 |         # # Older psycopg2 versions do not convert json automatically.
167 |         # if isinstance(explain, basestring):
168 |         #     explain = json.loads(explain)
169 |         #     print explain
170 |         # rows = explain[0]['Plan']['Plan Rows']
171 |         return rows
172 | 
173 |     def page(self):
174 |         self.max_id = None
175 |         data = super(EstimatedCountPaginator, self).page()
176 |         data['meta']['estimated_count'] = self.get_estimated_count()
177 |         return data
178 | 
179 | 
180 | 
181 | class DepartamentoResource(MyModelResource):
182 |     class Meta:
183 |         queryset = Departamento.objects.all()
184 |         resource_name = 'departamento'
185 |         list_allowed_methods = ['get', 'post']
186 |         detail_allowed_methods = ['get', 'post']
187 |         authorization = DjangoAuthorization()
188 |         cache = SimpleCache(timeout=60*60*24)
189 | 
190 | class RangoResource(MyModelResource):
191 |     class Meta:
192 |         queryset = Rango.objects.all()
193 |         resource_name = 'rango'
194 |         list_allowed_methods = ['get', 'post']
195 |         detail_allowed_methods = ['get', 'post']
196 |         authorization = DjangoAuthorization()
197 |         cache = SimpleCache(timeout=60*60*24)
198 |     def determine_format(self, request):
199 |         return 'application/json'
200 | 
201 | 
202 | class PartidoResource(MyModelResource):
203 |     class Meta:
204 |         queryset = Partido.objects.all()
205 |         resource_name = 'partido'
206 |         list_allowed_methods = ['get', 'post']
207 |         detail_allowed_methods = ['get', 'post']
208 |         authorization = DjangoAuthorization()
209 |         cache = SimpleCache(timeout=60*60*24)
210 |     def determine_format(self, request):
211 |         return 'application/json'
212 | 
213 | 
214 | class LegislaturaResource(MyModelResource):
215 |     class Meta:
216 |         queryset = Legislatura.objects.all()
217 |         resource_name = 'legislatura'
218 |         list_allowed_methods = ['get', 'post']
219 |         detail_allowed_methods = ['get', 'post']
220 |         authorization = DjangoAuthorization()
221 |         cache = SimpleCache(timeout=60*60*24)
222 |         filtering = {
223 |             'presidente': ALL_WITH_RELATIONS,
224 |         }
225 |     def determine_format(self, request):
226 |         return 'application/json'
227 | 
228 | 
229 | class Estado_consolidacionResource(MyModelResource):
230 |     class Meta:
231 |         queryset = Estado_consolidacion.objects.all()
232 |         resource_name = 'estado_consolidacion'
233 |         list_allowed_methods = ['get', 'post']
234 |         detail_allowed_methods = ['get', 'post']
235 |         authorization = DjangoAuthorization()
236 |         cache = SimpleCache(timeout=60*60*24)
237 |     def determine_format(self, request):
238 |         return 'application/json'
239 | 
240 | 
241 | class Origen_legislativoResource(MyModelResource):
242 |     class Meta:
243 |         queryset = Origen_legislativo.objects.all()
244 |         resource_name = 'origen_legislativo'
245 |         list_allowed_methods = ['get', 'post']
246 |         detail_allowed_methods = ['get', 'post']
247 |         authorization = DjangoAuthorization()
248 |         cache = SimpleCache(timeout=60*60*24)
249 |     def determine_format(self, request):
250 |         return 'application/json'
251 | 
252 | 
253 | 
254 | 
255 | class MateriaResource(MyModelResource):
256 |     class Meta:
257 |         queryset = Materia.objects.all()
258 |         resource_name = 'materia'
259 |         list_allowed_methods = ['get', 'post']
260 |         detail_allowed_methods = ['get', 'post']
261 |         authorization = DjangoAuthorization()
262 |         filtering = {
263 |             'titulo': ALL,
264 |         }
265 |         cache = SimpleCache(timeout=60*60*24)
266 |     def determine_format(self, request):
267 |         return 'application/json'
268 | 
269 | 
270 | class DiarioResource(MyModelResource):
271 |     class Meta:
272 |         queryset = Diario.objects.all()
273 |         resource_name = 'diario'
274 |         list_allowed_methods = ['get', 'post']
275 |         detail_allowed_methods = ['get', 'post']
276 |         authorization = DjangoAuthorization()
277 |         cache = SimpleCache(timeout=60*60*24)
278 | 
279 |     def determine_format(self, request):
280 |         return 'application/json'
281 | 
282 | 
283 | class NotaResource(MyModelResource):
284 |     class Meta:
285 |         queryset = Nota.objects.all()
286 |         resource_name = 'nota'
287 |         list_allowed_methods = ['get', 'post']
288 |         detail_allowed_methods = ['get', 'post']
289 |         authorization = DjangoAuthorization()
290 |     def determine_format(self, request):
291 |         return 'application/json'
292 | 
293 | class AlertaResource(MyModelResource):
294 |     class Meta:
295 |         queryset = Alerta.objects.all()
296 |         resource_name = 'alerta'
297 |         list_allowed_methods = ['get', 'post']
298 |         detail_allowed_methods = ['get', 'post']
299 |         authorization = DjangoAuthorization()
300 |         cache = SimpleCache(timeout=60*60*24)
301 |     def determine_format(self, request):
302 |         return 'application/json'
303 | 
304 | class PalabraResource(MyModelResource):
305 |     class Meta:
306 |         queryset = Palabra.objects.all()
307 |         resource_name = 'palabra'
308 |         list_allowed_methods = ['get', 'post']
309 |         detail_allowed_methods = ['get', 'post']
310 |         authorization = DjangoAuthorization()
311 |         cache = SimpleCache(timeout=60*60*24)
312 |     def determine_format(self, request):
313 |         return 'application/json'
314 | 
315 | class ReferenciaResource(MyModelResource):
316 | 
317 |     # referencia = fields.ForeignKey('DocumentoResource', null=True, blank=True)
318 |     # palabra = fields.ForeignKey(PalabraResource, null=True, blank=True)
319 |     class Meta:
320 |         queryset = Referencia.objects.all()
321 |         resource_name = 'referencia'
322 |         list_allowed_methods = ['get', 'post']
323 |         detail_allowed_methods = ['get', 'post']
324 |         authorization = DjangoAuthorization()
325 |         cache = SimpleCache(timeout=60*60*24)
326 | 
327 |     def determine_format(self, request):
328 |         return 'application/json'
329 | 
330 | class DocumentoResource(MyModelResource):
331 |     diario = fields.ForeignKey(DiarioResource,
332 |                                       'diario',
333 |                                       full=True,
334 |                                       null=True,
335 |                                       blank=True,
336 | 
337 |                                       help_text="Codigo del Diario")
338 |     materias = fields.ToManyField(MateriaResource,
339 |                                   'materias',
340 |                                   full=True,
341 |                                   null=True,
342 |                                   blank=True,
343 |                                   help_text="Materias del documento")
344 |     departamento = fields.ForeignKey(DepartamentoResource,
345 |                                      'departamento',
346 |                                      full=True,
347 |                                      null=True,
348 |                                      blank=True,
349 |                                      help_text="Departamento del documento")
350 |     origen_legislativo = fields.ForeignKey(Origen_legislativoResource,
351 |                                            'origen_legislativo',
352 |                                            full=True,
353 |                                            null=True,
354 |                                            blank=True,
355 |                                            help_text="Origen Legislativo")
356 |     estado_consolidacion = fields.ForeignKey(Estado_consolidacionResource,
357 |                                              'estado_consolidacion',
358 |                                              full=True,
359 |                                              null=True,
360 |                                              blank=True,
361 |                                              help_text="Estado de consolidacion")
362 |     rango = fields.ForeignKey(RangoResource,
363 |                             'rango',
364 |                             full=True,
365 |                             null=True,
366 |                             blank=True,
367 |                             help_text="Rango del Documento(Ley, Real Decreto...)")
368 |     legislatura = fields.ForeignKey(LegislaturaResource,
369 |                                     'legislatura',
370 |                                     full=True,
371 |                                     null=True,
372 |                                     blank=True,
373 |                                     help_text="Legislatura de disposicion de la ley")
374 | 
375 | 
376 |     alertas = fields.ToManyField(AlertaResource, 'alertas', full=True,
377 |                                     null=True, blank=True)
378 |     notas = fields.ToManyField(NotaResource, 'notas', full=True,
379 |                                null=True, blank=True)
380 | 
381 | 
382 |     referencias_anteriores = fields.ToManyField('boe_analisis.api.ReferenciaResource', 'referencias_anteriores',full=True,
383 |                                    null=True, blank=True,
384 |                                    related_name='ref_anteriores')
385 |     referencias_posteriores = fields.ToManyField('boe_analisis.api.ReferenciaResource', 'referencias_posteriores', full=True,
386 |                                    null=True, blank=True,
387 |                                    related_name='ref_posteriores')
388 | 
389 |     search = None
390 |     last_query = ''
391 | 
392 | 
393 |     class Meta:
394 |         last_id = 0
395 |         queryset = Documento.objects.exclude(url_xml=None).order_by('fecha_publicacion')
396 |         resource_name = 'documento'
397 |         api_name = 'v1',
398 |         detail_uri_name = 'identificador'
399 |         list_allowed_methods = ['get', 'post']
400 |         detail_allowed_methods = ['get', 'post']
401 |         ordering = ['id']
402 |         filtering = {
403 |             'titulo': ALL,
404 |             'identificador': ALL,
405 |             'fecha_publicacion': ALL,
406 |             'diario': ALL_WITH_RELATIONS,
407 |             'seccion' : ALL,
408 |             'materias': ALL_WITH_RELATIONS,
409 |             'legislatura': ALL_WITH_RELATIONS,
410 |             'notas': ALL_WITH_RELATIONS,
411 |             'referencias_anteriores': ALL_WITH_RELATIONS,
412 |             'referencias_posteriores': ALL_WITH_RELATIONS,
413 | 
414 |         }
415 |         # authentication = BasicAuthentication()
416 |         authorization = ReadOnlyAuthorization()
417 |         paginator_class = EstimatedCountPaginator
418 |         cache = SimpleCache(timeout=60*60*24)
419 |     def determine_format(self, request):
420 |         return 'application/json'
421 | 
422 | 
423 | 
424 | 
425 | 
426 | 
427 | class BOEResource(DocumentoResource):
428 | 
429 | 
430 |     class Meta:
431 |         queryset = Documento.objects.exclude(url_xml=None).filter(diario = 'BOE').order_by('fecha_publicacion')
432 |         resource_name = 'documentoboe'
433 |         api_name = 'v1',
434 |         detail_uri_name = 'identificador'
435 |         list_allowed_methods = ['get', 'post']
436 |         detail_allowed_methods = ['get', 'post']
437 |         filtering = {
438 |             'titulo': ALL,
439 |             'identificador': ALL,
440 |             'fecha_publicacion': ALL,
441 |             'materias': ALL_WITH_RELATIONS,
442 |             'legislatura': ALL_WITH_RELATIONS,
443 |             'notas': ALL_WITH_RELATIONS,
444 |             'referencias_anteriores': ALL_WITH_RELATIONS,
445 |             'referencias_posteriores': ALL_WITH_RELATIONS,
446 | 
447 |         }
448 |         # authentication = BasicAuthentication()
449 |         authorization = ReadOnlyAuthorization()
450 |         paginator_class = EstimatedCountPaginator
451 |         cache = SimpleCache(timeout=60*60*24)
452 | 
453 | class BORMEResource(DocumentoResource):
454 | 
455 | 
456 |     class Meta:
457 |         queryset = Documento.objects.exclude(url_xml=None).filter(diario = 'BORME').order_by('fecha_publicacion')
458 |         resource_name = 'documentoborme'
459 |         api_name = 'v1',
460 |         detail_uri_name = 'identificador'
461 |         list_allowed_methods = ['get', 'post']
462 |         detail_allowed_methods = ['get', 'post']
463 |         filtering = {
464 |             'titulo': ALL,
465 |             'identificador': ALL,
466 |             'fecha_publicacion': ALL,
467 |             'diario': ALL_WITH_RELATIONS,
468 | 
469 |         }
470 |         # authentication = BasicAuthentication()
471 |         authorization = ReadOnlyAuthorization()
472 |         paginator_class = EstimatedCountPaginator
473 |         cache = SimpleCache(timeout=60*60*24)
474 | 
475 | 
476 | 


--------------------------------------------------------------------------------
/boe/boe_analisis/management/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'Carlos'
2 | 


--------------------------------------------------------------------------------
/boe/boe_analisis/management/commands/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'Carlos'
2 | 


--------------------------------------------------------------------------------
/boe/boe_analisis/management/commands/getNewInfo.py:
--------------------------------------------------------------------------------
  1 | __author__ = 'Carlos'
  2 | import requests
  3 | from django.core.management.base import BaseCommand, CommandError
  4 | from bs4 import BeautifulSoup
  5 | from django.db import models
  6 | from boe_analisis.models import Documento
  7 | from django.db import connection
  8 | import re
  9 | import datetime
 10 | from processDocument import  ProcessDocument
 11 | from lxml import etree
 12 | from pattern.web import URL
 13 | 
 14 | cursor = connection.cursor()
 15 | cursor.execute("SELECT max(fecha_publicacion) from boe_analisis_documento")
 16 | val = cursor.fetchone()
 17 | ultima_fecha = val[0]
 18 | if not ultima_fecha:
 19 |     ultima_fecha = datetime.date(year=1960, month=1, day=1)
 20 | 
 21 | hoy = datetime.date.today() + datetime.timedelta(days=1)
 22 | url = "http://www.boe.es/boe/dias/{0}/{1}/{2}/"
 23 | url_boe = "http://www.boe.es/diario_boe/xml.php?id={0}"
 24 | cursor.close()
 25 | 
 26 | 
 27 | class Command(BaseCommand):
 28 | 
 29 |     def handle(self, *args, **options):
 30 |         print 'Fetching data...'
 31 | 
 32 |         if len(args) > 0:
 33 |             year = int(args[0])
 34 |             month = int(args[1]) if len(args) > 1 else 1
 35 |             day = int(args[2]) if len(args) > 2 else 1
 36 | 
 37 |             if year < 0 \
 38 |                 or (month is not None and month not in range(1,12))\
 39 |                 or (day is not None and day not in range(1, 30)):
 40 |                 raise AttributeError
 41 |             global ultima_fecha
 42 | 
 43 | 
 44 |             ultima_fecha = datetime.date(year=year, month=month, day=day)
 45 | 
 46 | 
 47 | 
 48 |         for d in daterange(ultima_fecha, hoy):
 49 |             print d
 50 |             url_day = getURLDay(d)
 51 |             print url_day
 52 |             req = requests.get(url_day)
 53 |             html = BeautifulSoup(req.text)
 54 |             link = html.find(href=re.compile("xml"))
 55 |             if link:
 56 |                 url_sum =  'http://www.boe.es' + link.get('href')
 57 |                 all_docs = []
 58 |                 procesarSumario(url_sum, all_docs)
 59 |                 for doc in all_docs:
 60 |                     print doc
 61 |                     try:
 62 |                         d = ProcessDocument(doc)
 63 |                         d.saveDoc()
 64 |                     except Exception, e:
 65 |                         print "fallo " + doc
 66 | 
 67 | 
 68 | 
 69 | def daterange(start, stop, step_days=1):
 70 |     current = start
 71 |     step = datetime.timedelta(step_days)
 72 |     if step_days > 0:
 73 |         while current < stop:
 74 |             yield current
 75 |             current += step
 76 |     elif step_days < 0:
 77 |         while current > stop:
 78 |             yield current
 79 |             current += step
 80 |     else:
 81 |         raise ValueError("daterange() step_days argument must not be zero")
 82 | 
 83 | 
 84 | 
 85 | 
 86 | 
 87 | def getURLDay(d):
 88 | 
 89 |     mes = "%0*d" % (2, d.month)
 90 |     dia = "%0*d" % (2, d.day)
 91 |     url_day = url.format(d.year, mes, dia)
 92 |     return  url_day
 93 | 
 94 | 
 95 | 
 96 | 
 97 | 
 98 | def procesarSumario(url_sumario, allDocs):
 99 | 
100 |     url_sumario = url_sumario
101 |     print url_sumario
102 |     content = URL(url_sumario).download()
103 |     xml = etree.XML(content)
104 |     ids = etree.XPath("//item/@id")
105 |     for id in ids(xml):
106 |         url_doc = url_boe.format(id)
107 |         allDocs.append(url_doc)
108 | 
109 | 
110 | 
111 | 
112 | 
113 | 


--------------------------------------------------------------------------------
/boe/boe_analisis/management/commands/processDocument.py:
--------------------------------------------------------------------------------
  1 | __author__ = 'Carlos'
  2 | from django.db import models
  3 | from django.core.management.base import BaseCommand, CommandError
  4 | from boe_analisis.models import Diario, DocumentoAnuncio,Legislatura ,Documento, Departamento, Rango, Origen_legislativo
  5 | from boe_analisis.models import Estado_consolidacion, Nota, Materia, Alerta, Palabra, Referencia
  6 | from boe_analisis.models import Modalidad, Tipo, Tramitacion, Procedimiento,Precio
  7 | import os
  8 | import sys
  9 | import locale
 10 | from django.db.models import Q
 11 | import re
 12 | from datetime import datetime
 13 | from lxml import etree, objectify
 14 | 
 15 | from pattern.web import URL
 16 | 
 17 | last_legislatura = Legislatura.objects.get_or_none(final__isnull = True)
 18 | 
 19 | 
 20 | class ProcessDocument():
 21 |     url_a_pattern =  "http://www.boe.es/diario_boe/xml.php?id={0}"
 22 |     url_a_html_pattern = "http://www.boe.es/diario_boe/txt.php?id={0}"
 23 | 
 24 |     xmlDoc = None
 25 |     rootXML = None
 26 |     doc = Documento()
 27 |     metadatos = None
 28 |     def __init__(self, url_xml):
 29 |         self.url = url_xml
 30 |         self.downloadXML()
 31 |         self.xmlToObject()
 32 |         self.getMetadatos()
 33 |         self.getAnalisis()
 34 |         self.createDocument()
 35 | 
 36 | 
 37 | 
 38 |     def saveDoc(self):
 39 |         try:
 40 |             self.doc.save()
 41 |         except:
 42 |             raise Exception
 43 | 
 44 |     def isDocumentoAnuncio(self):
 45 |         seccion = self.getElement(self.metadatos, 'seccion')
 46 |         subseccion = self.getElement(self.metadatos, 'subseccion')
 47 |         return seccion == '5' and subseccion == 'A'
 48 | 
 49 | 
 50 |     def processReferencias(self, doc):
 51 |         if self.existElement(self.analisis, 'referencias'):
 52 |             ref = self.analisis.referencias
 53 |             if self.existElement(ref, 'anteriores'):
 54 |                 if self.existElement(ref.anteriores, 'anterior'):
 55 |                     ref_ant = []
 56 |                     for anterior in ref.anteriores.anterior:
 57 |                         referencia = anterior.get('referencia')
 58 |                         doc_ref = self.get_or_create(Documento, identificador=referencia)
 59 |                         palabra_codigo = anterior.palabra.get('codigo')
 60 |                         palabra_texto = anterior.palabra.text
 61 |                         texto = anterior.texto.text
 62 |                         palabra = self.get_or_create(Palabra, codigo=palabra_codigo, titulo=palabra_texto)
 63 |                         busqueda = dict(referencia=doc_ref, palabra=palabra)
 64 |                         insert = dict(texto=texto)
 65 |                         ref = self.get_or_create(Referencia, busqueda=busqueda, insert=insert)
 66 |                         ref_ant.append(ref)
 67 |                     doc.referencias_anteriores = ref_ant
 68 |             if self.existElement(ref, 'posteriores'):
 69 |                 if self.existElement(ref.posteriores, 'posterior'):
 70 |                     ref_post = []
 71 |                     for posterior in ref.posteriores.posterior:
 72 |                         referencia = posterior.get('referencia')
 73 |                         doc_ref = self.get_or_create(Documento, identificador=referencia)
 74 |                         palabra_codigo = posterior.palabra.get('codigo')
 75 |                         palabra_texto = posterior.palabra.text
 76 |                         texto = posterior.texto.text
 77 |                         palabra = self.get_or_create(Palabra, codigo=palabra_codigo, titulo=palabra_texto)
 78 |                         busqueda = dict(referencia=doc_ref, palabra=palabra)
 79 |                         insert = dict(texto=texto)
 80 |                         ref = self.get_or_create(Referencia, busqueda=busqueda, insert=insert)
 81 |                         ref_post.append(ref)
 82 |                     doc.referenicas_posteriores = ref_post
 83 | 
 84 |     def createDocument(self):
 85 |         identificador = self.getElement(self.metadatos, 'identificador')
 86 |         if not identificador:
 87 |             raise Exception
 88 |         if self.isDocumentoAnuncio():
 89 |             self.doc = self.get_or_create(DocumentoAnuncio, identificador=identificador)
 90 |             mod_codigo, mod_titulo = self.getElementCodigoTitulo(self.analisis, 'modalidad')
 91 |             self.doc.modalidad = self.get_or_create(Modalidad, codigo=mod_codigo, titulo=mod_titulo)
 92 |             tipo_codigo, tipo_titulo = self.getElementCodigoTitulo(self.analisis, 'tipo')
 93 |             self.doc.tipo = self.get_or_create(Tipo, codigo=tipo_codigo, titulo=tipo_titulo)
 94 |             tram_codigo, tram_titulo = self.getElementCodigoTitulo(self.analisis, 'tramitacion')
 95 |             self.doc.tramitacion = self.get_or_create(Tramitacion, codigo=tram_codigo, titulo=tram_titulo)
 96 |             proc_codigo, proc_titulo = self.getElementCodigoTitulo(self.analisis, 'procedimiento')
 97 |             self.doc.procedimiento = self.get_or_create(Procedimiento, codigo=proc_codigo, titulo=proc_titulo)
 98 |             self.doc.fecha_presentacion_ofertas = self.getElement(self.analisis, 'fecha_presentacion_ofertas')
 99 |             self.doc.fecha_apertura_ofertas = self.getElement(self.analisis, 'fecha_apertura_ofertas')
100 |             precio_codigo, precio_titulo =  self.getElementCodigoTitulo(self.analisis, 'precio')
101 |             self.doc.precio = self.get_or_create(Precio, codigo=precio_codigo, titulo=precio_titulo)
102 |             importe = self.getElement(self.analisis, 'importe')
103 |             if isinstance(importe, str):
104 |                 self.doc.importe = self.stringToFloat(importe)
105 |             self.doc.ambito_geografico = self.getElement(self.analisis, 'ambito_geografico')
106 |             self.doc.materias_anuncio = self.getElement(self.analisis, 'materias')
107 |             self.doc.materias_cpv = self.getElement(self.analisis, 'materias_cpv')
108 |             self.doc.observaciones = self.getElement(self.analisis, 'observaciones')
109 | 
110 |         else:
111 |             self.doc = self.get_or_create(Documento, identificador=identificador)
112 | 
113 |         doc = self.doc
114 |         doc.seccion = self.getElement(self.metadatos, 'seccion')
115 |         doc.subseccion = self.getElement(self.metadatos, 'subseccion')
116 |         doc.titulo = self.getElement(self.metadatos, 'titulo')
117 |         diario_codigo, diario_titulo = self.getElementCodigoTitulo(self.metadatos, 'diario')
118 |         doc.diario = self.get_or_create(Diario, codigo=diario_codigo, titulo=diario_titulo)
119 |         doc.diario_numero = self.getElement(self.metadatos, 'diario_numero')
120 |         dep_codigo, dep_titulo = self.getElementCodigoTitulo(self.metadatos, 'departamento')
121 |         doc.departamento = self.get_or_create(Departamento, codigo=dep_codigo, titulo=dep_titulo)
122 |         rango_codigo, rango_titulo = self.getElementCodigoTitulo(self.metadatos, 'rango')
123 |         doc.rango = self.get_or_create(Rango, codigo=rango_codigo, titulo=rango_titulo)
124 |         doc.numero_oficial = self.getElement(self.metadatos, 'numero_oficial')
125 |         doc.fecha_disposicion = self.textToDate(self.getElement(self.metadatos, 'fecha_disposicion'))
126 |         if doc.fecha_disposicion:
127 |             if (doc.fecha_disposicion.date() >= last_legislatura.inicio):
128 |                 doc.legislatura =  last_legislatura
129 |                 print doc.legislatura
130 |             else:
131 |                 legislatura = Legislatura.objects.get_or_none(inicio__lte = doc.fecha_disposicion, final__gte = doc.fecha_disposicion)
132 |                 print legislatura
133 |                 if legislatura is not None:
134 |                     print legislatura
135 |                     doc.legislatura = legislatura
136 | 
137 | 
138 |         doc.fecha_publicacion = self.textToDate(self.getElement(self.metadatos, 'fecha_publicacion'))
139 |         doc.fecha_vigencia = self.textToDate(self.getElement(self.metadatos, 'fecha_vigencia'))
140 |         doc.fecha_derogacion = self.textToDate(self.getElement(self.metadatos, 'fecha_derogacion'))
141 |         doc.letra_imagen = self.getElement(self.metadatos, 'letra_imagen')
142 |         doc.pagina_inicial = int(self.getElement(self.metadatos, 'pagina_inicial'))
143 |         doc.pagina_final = int(self.getElement(self.metadatos, 'pagina_final'))
144 |         doc.suplemento_pagina_inicial = self.getElement(self.metadatos, 'suplemento_pagina_inicial')
145 |         doc.suplemento_pagina_final = self.getElement(self.metadatos, 'suplemento_pagina_final')
146 |         doc.estatus_legislativo = self.getElement(self.metadatos, 'estatus_legislativo')
147 |         origen_leg_cod, origen_leg_titulo = self.getElementCodigoTitulo(self.metadatos, 'origen_legislativo')
148 |         doc.origen_legislativo = self.get_or_create(Origen_legislativo, codigo=origen_leg_cod, titulo=origen_leg_titulo)
149 |         est_cons_cod, est_cons_titulo = self.getElementCodigoTitulo(self.metadatos, 'estado_consolidacion')
150 |         if est_cons_cod != None and est_cons_cod != '':
151 |             doc.estado_consolidacion = self.get_or_create(Estado_consolidacion, codigo=int(est_cons_cod), titulo=est_cons_titulo)
152 |         doc.judicialmente_anulada = self.SiNoToBool(self.getElement(self.metadatos, 'judicialmente_anulada'))
153 |         doc.vigencia_agotada = self.SiNoToBool(self.getElement(self.metadatos, 'vigencia_agotada'))
154 |         doc.estatus_derogacion = self.SiNoToBool(self.getElement(self.metadatos, 'estatus_derogacion'))
155 |         doc.url_htm = self.url_a_html_pattern.format(doc.identificador)
156 |         doc.url_xml = self.url_a_pattern.format(doc.identificador)
157 |         doc.url_epub = self.getElement(self.metadatos, 'url_epub')
158 |         doc.url_pdf = self.getElement(self.metadatos, 'url_pdf')
159 |         doc.url_pdf_catalan = self.getElement(self.metadatos, 'url_pdf_catalan')
160 |         doc.url_pdf_euskera = self.getElement(self.metadatos, 'url_pdf_euskera')
161 |         doc.url_pdf_gallego = self.getElement(self.metadatos, 'url_pdf_gallego')
162 |         doc.url_pdf_valenciano = self.getElement(self.metadatos, 'url_pdf_valenciano')
163 |         doc.notas = self.getArrayOfElements(self.analisis, 'notas', 'nota', Nota)
164 |         doc.materias = self.getArrayOfElements(self.analisis, 'materias', 'materia', Materia)
165 |         doc.alertas = self.getArrayOfElements(self.analisis, 'alertas', 'alerta', Alerta)
166 |         self.processReferencias(doc)
167 |         doc.texto = etree.tostring(self.rootXML.texto, pretty_print=True)
168 | 
169 |     def getArrayOfElements(self, origin, element, subelement, model):
170 |         if self.existElement(origin, element):
171 |             subel = getattr(origin, element)
172 |             if self.existElement(subel, subelement):
173 |                 elements = []
174 |                 for el in getattr(subel, subelement):
175 |                     codigo =  el.get('codigo')
176 |                     titulo = el.text
177 |                     if codigo:
178 |                         ob = self.get_or_create(model, codigo=codigo, titulo=titulo)
179 |                         elements.append(ob)
180 |                 return elements
181 | 
182 |         return []
183 |         # codigo, titulo = self.getElementCodigoTitulo()
184 |     def getElementCodigoTitulo(self, origin, element):
185 |         codigo = self.getAttribute(origin, element, 'codigo')
186 |         titulo = self.getElement(origin, element)
187 | 
188 |         return codigo, titulo
189 | 
190 |     def getAttribute(self, origin, element, attribute):
191 |         if self.existElement(origin, element):
192 |             return getattr(origin,element).get(attribute)
193 |         return None
194 |     def downloadXML(self):
195 |         url_xml = URL(self.url)
196 |         self.xmlDoc = url_xml.download()
197 | 
198 |     def xmlToObject(self):
199 |         self.rootXML = objectify.fromstring(self.xmlDoc)
200 | 
201 |     def getMetadatos(self):
202 |         self.metadatos = self.rootXML.metadatos
203 | 
204 |     def getAnalisis(self):
205 |         self.analisis = self.rootXML.analisis
206 | 
207 | 
208 |     def existElement(self, origin, element):
209 |         return hasattr(origin, element)
210 | 
211 |     def getElement(self,origin, element):
212 |         if hasattr(origin, element):
213 |             return getattr(getattr(origin,element), 'text')
214 | 
215 | 
216 |     @staticmethod
217 |     def get_or_create(model, **kwargs):
218 |         len_items = len(kwargs)
219 |         count_items = 0
220 |         for k, v in kwargs.items():
221 |             if v is None or v is '':
222 |                 count_items += 1
223 | 
224 |         if len_items == count_items:
225 |             return None
226 | 
227 |         objeto = None
228 |         try:
229 |             if kwargs.has_key('busqueda'):
230 |                 objeto = model.objects.get(**kwargs['busqueda'])
231 |             else:
232 |                 objeto = model.objects.get(**kwargs)
233 |         except:
234 |             # print kwargs
235 |             if kwargs.has_key('busqueda') and kwargs.has_key('insert'):
236 |                 insert = dict(kwargs['busqueda'].items() + kwargs['insert'].items())
237 |                 objeto = model(**insert)
238 |                 # print objeto
239 |             else:
240 | 
241 |                 objeto = model(**kwargs)
242 | 
243 |             objeto.save()
244 |         return objeto
245 | 
246 |     @staticmethod
247 |     def stringToFloat(value):
248 | 
249 |         # Remove anything not a digit, comma or period
250 |         no_cruft = re.sub(r'[^\d,.-]', '', value)
251 |         # Split the result into parts consisting purely of digits
252 |         parts = re.split(r'[,.]', no_cruft)
253 |         # ...and sew them back together
254 |         try:
255 |             if len(parts) == 1:
256 |                 # No delimeters found
257 |                 float_str = parts[0]
258 |             elif len(parts[-1]) != 2:
259 |                 # >= 1 delimeters found. If the length of last part is not equal to 2, assume it is not a decimal part
260 |                 float_str = ''.join(parts)
261 |             else:
262 |                 float_str = '%s%s%s' % (''.join(parts[0:-1]),
263 |                                         locale.localeconv()['decimal_point'],
264 |                                         parts[-1])
265 | 
266 |             # Convert to float
267 |             return float(float_str)
268 |         except:
269 |             return None
270 | 
271 |     @staticmethod
272 |     def textToDate(texto):
273 |         regex = re.compile("(\d{4})(\d{2})(\d{2})")
274 |         if texto is not None:
275 |             match = re.match(regex, texto)
276 |             if match != None:
277 |                 year = int(match.group(1))
278 |                 month = int(match.group(2))
279 |                 day = int(match.group(3))
280 |                 d = datetime(year,month, day)
281 |                 return d
282 |         return None
283 | 
284 |     @staticmethod
285 |     def SiNoToBool(character):
286 |         return character == 'S'


--------------------------------------------------------------------------------
/boe/boe_analisis/models.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from django.db import models
  3 | import datetime
  4 | 
  5 | # Create your models here.
  6 | class GetOrNoneManager(models.Manager):
  7 |     """Adds get_or_none method to objects
  8 |     """
  9 |     def get_or_none(self, **kwargs):
 10 |         try:
 11 |             return self.get(**kwargs)
 12 |         except self.model.DoesNotExist:
 13 |             return None
 14 | 
 15 | 
 16 | class CodigoTitulo(models.Model):
 17 |     codigo = models.CharField(max_length=10)
 18 |     titulo = models.CharField(max_length=4000, null=True)
 19 | 
 20 |     class Meta:
 21 |         abstract = True
 22 |         unique_together = (('codigo', 'titulo'),)
 23 | 
 24 | class Diario(models.Model):
 25 |     codigo = models.CharField(max_length=10, primary_key=True)
 26 |     titulo = models.CharField(max_length=400, null=True)
 27 | 
 28 | class Departamento(models.Model):
 29 |     codigo = models.CharField(max_length=10, primary_key=True)
 30 |     titulo = models.CharField(max_length=4000, null=True)
 31 | 
 32 | class Rango(models.Model):
 33 |     codigo = models.IntegerField(primary_key=True)
 34 |     titulo = models.CharField(max_length=4000, null=True )
 35 | 
 36 | class Origen_legislativo(models.Model):
 37 |     codigo = models.IntegerField(primary_key=True)
 38 |     titulo = models.CharField(max_length=4000, null=True)
 39 | 
 40 | class Estado_consolidacion(models.Model):
 41 |     codigo = models.IntegerField(primary_key=True)
 42 |     titulo = models.CharField(max_length=4000, null=True)
 43 | 
 44 | class Nota(models.Model):
 45 |     codigo = models.IntegerField()
 46 |     titulo = models.CharField(max_length=4000, null=True)
 47 | 
 48 |     class Meta:
 49 |         unique_together = (('codigo', 'titulo'),)
 50 | 
 51 | class Materia(models.Model):
 52 |     codigo = models.IntegerField(primary_key=True)
 53 |     titulo = models.CharField(max_length=4000, null=True)
 54 | 
 55 | 
 56 | class Alerta(models.Model):
 57 |     codigo = models.IntegerField(primary_key=True)
 58 |     titulo = models.CharField(max_length=4000, null=True)
 59 | 
 60 | class Palabra(models.Model):
 61 |     codigo = models.IntegerField(primary_key=True)
 62 |     titulo = models.CharField(max_length=4000, null=True)
 63 | 
 64 | class Referencia(models.Model):
 65 |     referencia = models.ForeignKey('Documento')
 66 |     palabra = models.ForeignKey(Palabra)
 67 |     texto = models.TextField(max_length=4000)
 68 | 
 69 |     class Meta:
 70 |         unique_together = (('referencia','palabra'),)
 71 | 
 72 |     # def __unicode__(self):
 73 |     #     return self.palabra.codigo
 74 | 
 75 | class Partido(models.Model):
 76 | 
 77 |     nombre = models.CharField(max_length=200)
 78 | 
 79 |     def __unicode__(self):
 80 |         return self.nombre
 81 | 
 82 | class Legislatura(models.Model):
 83 |     inicio = models.DateField()
 84 |     final = models.DateField(null=True, blank=True)
 85 |     partido = models.ForeignKey(Partido)
 86 |     presidente = models.CharField(max_length=300)
 87 |     nombre_legislatura = models.CharField(max_length=600)
 88 | 
 89 | 
 90 |     objects = GetOrNoneManager()
 91 |     class Meta:
 92 |         ordering = ['inicio']
 93 | 
 94 |     def __unicode__(self):
 95 |         return self.nombre_legislatura
 96 | 
 97 | 
 98 | 
 99 | class Documento(models.Model):
100 | 
101 |     identificador = models.CharField(max_length=25,
102 |                                      db_index=True,
103 |                                      unique=True,
104 |                                      help_text="Identificador oficial del documento")
105 |     titulo = models.CharField(null=True,
106 |                               max_length=5000,
107 |                               default='',
108 |                               db_index=True,
109 |                               help_text="Titulo del documento")
110 |     diario = models.ForeignKey(Diario, null=True)
111 |     diario_numero = models.IntegerField(null=True,
112 |                                         help_text="Corresponde al número del diario"
113 |                                                   " en el que se publicó la norma. "
114 |                                                   "Es un número correlativo que comienza cada año."
115 |                                                   )
116 |     seccion = models.CharField(max_length=50,
117 |                                null=True,
118 |                                default='',
119 |                                help_text="Sección")
120 |     subseccion = models.CharField(max_length=50,
121 |                                   null=True,
122 |                                   default='',
123 |                                   help_text="Subsección")
124 |     rango = models.ForeignKey(Rango,
125 |                               null=True,
126 |                               help_text="Categoría normativa de la disposición: "
127 |                                         "Ley, Real Decreto, Orden, Directiva,"
128 |                                         " etcétera.")
129 |     departamento = models.ForeignKey(Departamento, null=True,
130 |                                      help_text="Organismo que emite la resolución.")
131 |     numero_oficial = models.CharField(max_length=50,
132 |                                       null=True,
133 |                                       default='',
134 |                                       help_text="Es el número de la norma, "
135 |                                                 "tiene estructura de número/año."
136 |                                                 " Ejemplo: 169/2008")
137 |     fecha_disposicion = models.DateField(null=True,
138 |                                          help_text="Es la fecha en la que se aprueba la norma.")
139 |     fecha_publicacion = models.DateField(null=True, db_index=True,
140 |                                          help_text="Es la fecha del BOE o DOUE"
141 |                                                    " en la que se publicó la norma.")
142 |     fecha_vigencia = models.DateField(null=True,
143 |                                       help_text="Fecha en la que entra en vigor la norma")
144 |     fecha_derogacion = models.DateField(null=True,
145 |                                         help_text="Fecha en la que la norma de deroga")
146 |     letra_imagen = models.CharField(max_length=10, null=True, default='',
147 |                                     help_text="-")
148 |     pagina_inicial = models.IntegerField(null=True,
149 |                                          help_text="Pagina inicial")
150 |     pagina_final = models.IntegerField(null=True,
151 |                                        help_text="Pagina final")
152 |     suplemento_letra_imagen = models.CharField(max_length=10,
153 |                                                null=True,
154 |                                                default='',
155 |                                                help_text="-")
156 |     suplemento_pagina_inicial = models.CharField(max_length=10,
157 |                                                  null=True,
158 |                                                  default='',
159 |                                                  help_text="-")
160 |     suplemento_pagina_final = models.CharField(max_length=10,
161 |                                                null=True,
162 |                                                default='',
163 |                                                help_text="-")
164 |     estatus_legislativo = models.CharField(max_length=10,
165 |                                            null=True,
166 |                                            default='',
167 |                                            help_text="-")
168 |     origen_legislativo = models.ForeignKey(Origen_legislativo,
169 |                                            null=True,
170 |                                            help_text="Autonomico, Estatal o Europeo")
171 |     estado_consolidacion = models.ForeignKey(Estado_consolidacion,
172 |                                              null=True,
173 |                                              help_text="Desactualizado, Finalizado o En proceso")
174 |     judicialmente_anulada = models.NullBooleanField(null=True,
175 |                                                     help_text="Anulada judicialmente")
176 |     vigencia_agotada = models.NullBooleanField(null=True,
177 |                                                help_text="Con vigencia agotada")
178 |     legislatura = models.ForeignKey(Legislatura,null=True,
179 |                                     help_text="Legislatura en la que se aprobó la norma")
180 |     url_epub = models.URLField(null=True,
181 |                                help_text="URL EPUB")
182 |     url_xml = models.URLField(null=True, db_index=True,
183 |                               help_text="URL XML")
184 |     url_htm = models.URLField(null=True,
185 |                               help_text="URL HTML")
186 |     url_pdf = models.URLField(null=True,
187 |                               help_text="URL PDF")
188 |     url_pdf_catalan = models.URLField(null=True,
189 |                                       help_text="URL PDF Catalan (No todos los documentos los tienen)")
190 |     url_pdf_euskera = models.URLField(null=True,
191 |                                        help_text="URL PDF Euskera (No todos los documentos los tienen)")
192 |     url_pdf_gallego = models.URLField(null=True,
193 |                                        help_text="URL PDF Gallego (No todos los documentos los tienen)")
194 |     url_pdf_valenciano = models.URLField(null=True,
195 |                                           help_text="URL PDF Valenciano (No todos los documentos los tienen)")
196 |     notas = models.ManyToManyField(Nota,
197 |                                    help_text="Notas del documento")
198 |     materias = models.ManyToManyField(Materia,
199 |                                       help_text="Materias del documento")
200 |     alertas = models.ManyToManyField(Alerta,
201 |                                      help_text="Alertas disponibles para el documento")
202 |     referencias_anteriores = models.ManyToManyField(Referencia,
203 |                                                     related_name='ref_anteriores',
204 |                                                     help_text="Referencias anteriores")
205 |     referencias_posteriores = models.ManyToManyField(Referencia,
206 |                                                      related_name='ref_posteriores',
207 |                                                      help_text="Referencias posteriores")
208 |     texto = models.TextField(null=True, default='',
209 |                              help_text="Texto del documento en HTML")
210 | 
211 |     def __unicode__(self):
212 |         return self.identificador
213 | 
214 |     def __eq__(self, other):
215 |         return self.__dict__ == other.__dict__
216 | 
217 |     class Meta:
218 |         ordering = ['-fecha_publicacion']
219 |     def get_absolute_url(self):
220 |         from django.core.urlresolvers import reverse
221 |         return reverse('individual_doc', args=[str(self.identificador)])
222 | 
223 | 
224 | 
225 | class Modalidad(CodigoTitulo):
226 |     class Meta:
227 |         ordering = ['codigo']
228 | 
229 | 
230 | class Tipo(CodigoTitulo):
231 |     class Meta:
232 |         ordering = ['codigo']
233 | 
234 | class Tramitacion(CodigoTitulo):
235 |     class Meta:
236 |         ordering = ['codigo']
237 | 
238 | class Procedimiento(CodigoTitulo):
239 |     class Meta:
240 |         ordering = ['codigo']
241 | 
242 | class Precio(CodigoTitulo):
243 |     class Meta:
244 |         ordering = ['codigo']
245 | 
246 | 
247 | class DocumentoAnuncio(Documento):
248 |     modalidad = models.ForeignKey(Modalidad, null=True)
249 |     tipo = models.ForeignKey(Tipo, null=True)
250 |     tramitacion = models.ForeignKey(Tramitacion, null=True)
251 |     fecha_presentacion_ofertas = models.CharField(max_length=4000, null=True, blank=True)
252 |     fecha_apertura_ofertas = models.CharField(max_length=4000, null=True, blank=True)
253 |     precio = models.ForeignKey(Precio, null=True)
254 |     importe = models.DecimalField(decimal_places=2, max_digits=1000, null=True, blank=True)
255 |     ambito_geografico = models.CharField(max_length=4000, null=True, blank=True)
256 |     materias_anuncio = models.CharField(max_length=4000, null=True, blank=True)
257 |     materias_cpv = models.CharField(max_length=4000, null=True, blank=True)
258 |     observaciones = models.CharField(max_length=4000, null=True, blank=True)
259 | 
260 |     def __eq__(self, other):
261 |         return self.__dict__ == other.__dict__
262 | 
263 | 


--------------------------------------------------------------------------------
/boe/boe_analisis/paginator.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'Carlos'
 2 | from django.core.paginator import Paginator, InvalidPage, EmptyPage
 3 | from django.db.models import Max,Count,Q,F
 4 | 
 5 | class ModelPagination:
 6 |     model = None
 7 |     items_per_page = None
 8 |     count = None
 9 |     page_range = []
10 | 
11 |     def __init__(self, model, items_per_page):
12 |         self.model = model
13 |         self.items_per_page = items_per_page
14 |         self.count = self.model.aggregate(Max('id'))['id__max']
15 |         print self.count
16 |         self.num_pages = divmod(self.count, self.items_per_page)[0]+1
17 | 
18 |         for i in range(self.num_pages):
19 |             self.page_range.append(i+1)
20 | 
21 |     def page(self, page_number):
22 |         if page_number > self.num_pages:
23 |             raise EmptyPage, "That page contains no results"
24 | 
25 |         if page_number <= 0:
26 |             raise EmptyPage, "That page number is less than 1"
27 | 
28 |         start = self.items_per_page * (page_number-1)
29 |         end = self.items_per_page * page_number
30 | 
31 |         object_list = self.model.filter(id__gte=start, id__lt=end)
32 |         return ModelPaginationPage(object_list, page_number, self.count, start, end, self)
33 | 
34 | class ModelPaginationPage:
35 |     object_list = None
36 |     number = None
37 |     count = None
38 |     start = None
39 |     end = None
40 |     paginator = None
41 | 
42 |     def __unicode__(self):
43 |         return "<Page %s of %s>"%(self.number, self.count)
44 | 
45 |     def __init__(self, object_list, number, count, start, end, paginator):
46 |         self.number = number
47 |         self.count = count
48 |         self.object_list = object_list
49 |         self.start = start
50 |         self.end = end
51 |         self.paginator = paginator
52 | 
53 |     def has_next(self):
54 |         return False if self.number >= self.count else True
55 | 
56 |     def has_previous(self):
57 |         return False if self.number <= 1 else True
58 | 
59 |     def has_other_pages(self):
60 |         return True if self.has_next or self.has_previous else False
61 | 
62 |     def next_number(self):
63 |         return self.number + 1
64 | 
65 |     def previous_number(self):
66 |         return self.number + 1
67 | 
68 |     def start_index(self):
69 |         return self.start
70 | 
71 |     def end_index(self):
72 |         return self.end
73 | 


--------------------------------------------------------------------------------
/boe/boe_analisis/tests.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This file demonstrates writing tests using the unittest module. These will pass
 3 | when you run "manage.py test".
 4 | 
 5 | Replace this with more appropriate tests for your application.
 6 | """
 7 | 
 8 | from django.test import TestCase
 9 | 
10 | 
11 | class SimpleTest(TestCase):
12 |     def test_basic_addition(self):
13 |         """
14 |         Tests that 1 + 1 always equals 2.
15 |         """
16 |         self.assertEqual(1 + 1, 2)
17 | 


--------------------------------------------------------------------------------
/boe/boe_analisis/urls.py:
--------------------------------------------------------------------------------
 1 | from django.conf.urls import patterns, include, url
 2 | from django.contrib import admin
 3 | from boe_analisis.api import *
 4 | from django.contrib.auth.models import User
 5 | from boe_analisis import views
 6 | from tastypie.api import Api
 7 | from django.db import models
 8 | from tastypie.models import create_api_key
 9 | from django.views.decorators.cache import cache_page
10 | # admin.site.register(boe_analisis)
11 | 
12 | v1_api = Api(api_name='v1')
13 | v1_api.register(MateriaResource())
14 | v1_api.register(DiarioResource())
15 | v1_api.register(DocumentoResource())
16 | v1_api.register(BOEResource())
17 | v1_api.register(BORMEResource())
18 | v1_api.register(NotaResource())
19 | v1_api.register(AlertaResource())
20 | v1_api.register(PalabraResource())
21 | v1_api.register(ReferenciaResource())
22 | 
23 | 
24 | 
25 | v1_api.register(DepartamentoResource())
26 | 
27 | v1_api.register(RangoResource())
28 | v1_api.register(LegislaturaResource())
29 | v1_api.register(Estado_consolidacionResource())
30 | v1_api.register(Origen_legislativoResource())
31 | v1_api.register(PartidoResource())
32 | 
33 | 
34 | 
35 | urlpatterns = patterns('',
36 |     # url(r'^$', 'boe_analisis.views.home', name='home_docs'),
37 |     url(r'^', (include(v1_api.urls))),
38 |     # url(r'^documento/(?P<identificador>[\w|\-]+)$', 'boe_analisis.views.individual', name="individual_doc"),
39 |     # url(r'^materias/(?P<materia>[\w|\-]+)/$', 'boe_analisis.views.materias' , name="individual_materia"),
40 |     # url(r'^materias/$', 'boe_analisis.views.top_materias', name='listado_materias'),
41 |     # url(r'^graficos/$', 'boe_analisis.views.graficos', name='graficos'),
42 |     url(r'^v1/legislaturas/$', ('boe_analisis.views.leyes_legislatura')),
43 |     url(r'^v1/legislaturas/meses/(?P<meses>\d+)/$', ('boe_analisis.views.leyes_meses_legislatura')),
44 |     url(r'^v1/legislaturas/meses/$', ('boe_analisis.views.leyes_meses_legislatura')),
45 |     url(r'^v1/legislaturas/materia/(?P<materias>\d+)$', ('boe_analisis.views.materias_legislatura')),
46 |     url(r'^v1/legislaturas/materia/$', ('boe_analisis.views.top_materias')),
47 |     url(r'^v1/years/$', ('boe_analisis.views.years')),
48 |     url(r'^v1/years/materia/(?P<materia>\d+)$', 'boe_analisis.views.years'),
49 |     # url(r'^docs/', include("tastydocs.urls"), {"api": v1_api}) # api must be a reference to the TastyPie API object.
50 | 
51 |                        )
52 | 
53 | models.signals.post_save.connect(create_api_key, sender=User)


--------------------------------------------------------------------------------
/boe/boe_analisis/views.py:
--------------------------------------------------------------------------------
  1 | # Create your views here.
  2 | from django.template import RequestContext
  3 | from django.shortcuts import render,render_to_response, redirect
  4 | from django.shortcuts import get_object_or_404
  5 | from boe_analisis.models import Documento, Materia, Diario
  6 | from django.http import HttpResponse
  7 | import json
  8 | 
  9 | from django.db import connection
 10 | from django.contrib.auth import authenticate, login, logout
 11 | from django.contrib.auth.forms import *
 12 | from django.contrib.auth.decorators import user_passes_test, login_required
 13 | from django.core.urlresolvers import reverse_lazy
 14 | from django.contrib.auth.models import User, Group
 15 | from django.core.paginator import Paginator, EmptyPage, PageNotAnInteger
 16 | from django.core.cache import cache
 17 | from django.views.decorators.cache import cache_page
 18 | from django.http import Http404
 19 | 
 20 | 
 21 | def leyes_meses_legislatura(request, meses=48):
 22 |     if int(meses) >= 48:
 23 |         meses = 48
 24 | 
 25 | 
 26 |     cursor = connection.cursor()
 27 |     cursor.execute("SELECT leg.nombre_legislatura, leg.id , leg.presidente, count(*)"
 28 |     "from boe_analisis_documento doc, boe_analisis_legislatura leg "
 29 |       "where doc.legislatura_id = leg.id "
 30 |       "and doc.fecha_disposicion > leg.inicio and doc.fecha_disposicion <= (leg.inicio) + (%s || ' month')::INTERVAL "
 31 |       "group by leg.nombre_legislatura, leg.presidente, leg.id order by leg.id;", [int(meses)]
 32 |       )
 33 |     legislatura_array = []
 34 |     legislatura_array.append({'meses': meses})
 35 |     for legislatura in cursor.fetchall():
 36 |         ley = {
 37 | 
 38 |             'codigo_legislatura' : legislatura [1],
 39 |             'titulo_legislatura' : legislatura[0],
 40 |             'presidente' : legislatura[2],
 41 |             'numero_leyes' : legislatura[3]
 42 |         }
 43 |         legislatura_array.append(ley)
 44 |     if len(legislatura_array) == 0:
 45 |         raise Http404
 46 | 
 47 |     return HttpResponse(json.dumps(legislatura_array), content_type="application/json")
 48 | 
 49 | def leyes_legislatura(request):
 50 |     cursor = connection.cursor()
 51 |     cursor.execute('SELECT legislatura, cod_legislatura, sum(n_leyes), presidente from materias_legislaturas '
 52 |                       'group by legislatura, cod_legislatura, presidente '
 53 |                       'order by  cod_legislatura')
 54 |     legislatura_array = []
 55 |     for legislatura in cursor.fetchall():
 56 | 
 57 |         ley = {
 58 | 
 59 |             'codigo_legislatura' : legislatura [1],
 60 |             'titulo_legislatura' : legislatura[0],
 61 |             'presidente' : legislatura[3],
 62 |             'numero_leyes' : legislatura[2]
 63 |         }
 64 |         legislatura_array.append(ley)
 65 |     if len(legislatura_array) == 0:
 66 |         raise Http404
 67 |     return HttpResponse(json.dumps(legislatura_array), content_type="application/json")
 68 | 
 69 | 
 70 | def leyes_meses_legislatura(request):
 71 |     cursor = connection.cursor()
 72 |     cursor.execute('SELECT '
 73 |                    'nombre_legislatura, presidente,'
 74 |                    'legislatura_id, anyo, mes, num_leyes from leyes_mes '
 75 | 
 76 |                       'order by  legislatura_id')
 77 |     legislatura_array = []
 78 |     for legislatura in cursor.fetchall():
 79 |         ley = {
 80 | 
 81 |             'codigo_legislatura' : legislatura [2],
 82 |             'titulo_legislatura' : legislatura[0],
 83 |             'presidente' : legislatura[1],
 84 |             'anyo' : legislatura[3],
 85 |             'mes' : legislatura[4],
 86 |             'numero_leyes' : legislatura[5]
 87 |         }
 88 |         legislatura_array.append(ley)
 89 |     if len(legislatura_array) == 0:
 90 |         raise Http404
 91 |     return HttpResponse(json.dumps(legislatura_array), content_type="application/json")
 92 | 
 93 | 
 94 | 
 95 | 
 96 | 
 97 | def materias_legislatura(request, materias):
 98 | 
 99 |     cursor = connection.cursor()
100 | 
101 |     cursor.execute('SELECT * from materias_legislaturas where codigo_materia = %s;', [materias])
102 | 
103 |     legislatura_array = []
104 | 
105 |     for legislatura in cursor.fetchall():
106 |         ley = {
107 |             'titulo_materia' : legislatura[0],
108 |             'codigo_materia' : legislatura[1],
109 |             'codigo_legislatura' : legislatura[2],
110 |             'titulo_legislatura' : legislatura[3],
111 |             'presidente' : legislatura[4],
112 |             'numero_leyes' : legislatura[5]
113 |         }
114 |         legislatura_array.append(ley)
115 |     if len(legislatura_array) == 0:
116 |         raise Http404
117 |     return HttpResponse(json.dumps(legislatura_array), content_type="application/json")
118 | 
119 | 
120 | def top_materias(request):
121 | 
122 |     cursor = connection.cursor()
123 |     cursor.execute('SELECT codigo_materia, titulo_materia, sum(n_leyes) as total from materias_legislaturas group by codigo_materia, titulo_materia order by total desc limit 50;')
124 |     materias = []
125 |     for entrada in cursor.fetchall():
126 |         materia = {
127 |             'codigo_materia': entrada[0],
128 |             'titulo_materia': entrada[1],
129 |             'total': entrada[2]
130 |         }
131 |         materias.append(materia)
132 |     if len(materias) == 0:
133 |         raise Http404
134 |     return HttpResponse(json.dumps(materias), content_type="application/json")
135 | 
136 | 
137 | def years(request, materia=None):
138 |     cursor = connection.cursor()
139 |     if materia:
140 |         print 'pasa por materia'
141 |         cursor.execute('SELECT distinct(extract(year from fecha_publicacion)) as year_select '
142 |                       ' from boe_analisis_documento doc, boe_analisis_documento_materias mat '
143 |                         'where fecha_publicacion IS NOT NULL and '
144 |                         'doc.id = mat.documento_id and mat.materia_id = %s '
145 |                        'order by  year_select', [materia])
146 | 
147 | 
148 |     else:
149 |         print 'pasa por el else'
150 |         cursor.execute('SELECT distinct(extract(year from fecha_publicacion)) as year_select '
151 |                        'from boe_analisis_documento  where fecha_publicacion'
152 |                        ' IS NOT NULL order by  year_select;')
153 |     years = []
154 | 
155 |     for year in cursor.fetchall():
156 |         # print int(year[0])
157 |         years.append(int(year[0]))
158 |     if len(years) == 0:
159 |         raise Http404
160 | 
161 |     return HttpResponse(json.dumps(years), content_type="application/json")
162 | 
163 | 
164 | 
165 | 
166 | 


--------------------------------------------------------------------------------
/boe/manage.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import os
 3 | import sys
 4 | 
 5 | if __name__ == "__main__":
 6 |     os.environ.setdefault("DJANGO_SETTINGS_MODULE", "boe.settings")
 7 | 
 8 |     from django.core.management import execute_from_command_line
 9 | 
10 |     execute_from_command_line(sys.argv)
11 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | Beaker==1.6.4
 2 | BeautifulSoup==3.2.1
 3 | Django==1.5.1
 4 | Mako==0.8.1
 5 | Markdown==2.3.1
 6 | MarkupSafe==0.18
 7 | Pattern==2.6
 8 | PyYAML==3.10
 9 | South==0.8.1
10 | anyjson==0.3.3
11 | argparse==1.2.1
12 | beautifulsoup4==4.2.1
13 | bottle==0.11.6
14 | defusedxml==0.4.1
15 | django-cache-machine==0.8
16 | django-fancy-cache==0.4.0
17 | django-filter==0.6
18 | django-tastypie==0.9.15
19 | greenlet==0.4.1
20 | httplib2==0.8
21 | iowait==0.1
22 | lxml==3.2.1
23 | mimeparse==0.1.3
24 | mock==1.0.1
25 | psutil==1.0.1
26 | psycopg2==2.5.1
27 | python-dateutil==2.1
28 | python-ptrace==0.6.5
29 | requests==1.2.3
30 | shortuuid==0.3
31 | six==1.3.0
32 | wsgiref==0.1.2
33 | 


--------------------------------------------------------------------------------