├── .gitignore ├── .pyup.yml ├── .travis.yml ├── MANIFEST.in ├── README.md ├── django_sec ├── __init__.py ├── admin.py ├── apps.py ├── constants.py ├── fixtures │ ├── company_2016_1.zip │ ├── django_sec_units.json │ └── normalized_fields.csv ├── forms.py ├── management │ ├── __init__.py │ └── commands │ │ ├── __init__.py │ │ ├── sec_import_attrs.py │ │ ├── sec_import_index.py │ │ ├── sec_mark_units.py │ │ └── sec_xbrl_to_csv.py ├── migrations │ ├── 0001_initial.py │ └── __init__.py ├── models.py ├── settings.py ├── south_migrations │ ├── 0001_initial.py │ ├── 0002_auto__add_field_index__ticker.py │ ├── 0003_auto__add_index_index_company_date_filename.py │ ├── 0004_auto__chg_field_attributevalue_value.py │ ├── 0005_auto__add_field_company_min_date__add_field_company_max_date.py │ └── __init__.py ├── tests │ ├── __init__.py │ ├── manage.py │ ├── management │ │ ├── __init__.py │ │ └── commands │ │ │ └── __init__.py │ ├── models.py │ ├── settings.py │ ├── tests.py │ ├── urls.py │ └── views.py ├── utils.py ├── xbrl.py └── xbrl_fundamentals.py ├── pep8.sh ├── pip-requirements-min-django.txt ├── pip-requirements-test.txt ├── pip-requirements.txt ├── pylint.messages ├── pylint.rc ├── setup.py └── tox.ini /.gitignore: -------------------------------------------------------------------------------- 1 | local_settings.py 2 | *cheatsheet* 3 | *.DS_Store 4 | 5 | *.py[cod] 6 | 7 | # C extensions 8 | *.so 9 | 10 | # logs 11 | *.log 12 | *.log.* 13 | 14 | 15 | # Packages 16 | *.egg 17 | *.egg-info 18 | dist 19 | build 20 | eggs 21 | parts 22 | bin 23 | var 24 | sdist 25 | develop-eggs 26 | .installed.cfg 27 | lib 28 | lib64 29 | 30 | 31 | /.project 32 | /.pydevproject 33 | /.settings 34 | /.tox/ 35 | -------------------------------------------------------------------------------- /.pyup.yml: -------------------------------------------------------------------------------- 1 | requirements: 2 | - pip-requirements.txt 3 | - pip-requirements-min-django.txt 4 | - pip-requirements-test.txt 5 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "3.5" 4 | install: 5 | - pip install tox pylint 6 | script: 7 | - ./pep8.sh 8 | - tox 9 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include django_sec/tests/fixtures * 2 | include pip-requirements-min-django.txt 3 | include pip-requirements.txt 4 | include pip-requirements-test.txt 5 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Django-SEC 2 | ========== 3 | 4 | [![](https://img.shields.io/pypi/v/django-sec.svg)](https://pypi.python.org/pypi/django-sec) [![Build Status](https://img.shields.io/travis/chrisspen/django-sec.svg?branch=master)](https://travis-ci.org/chrisspen/django-sec) [![](https://pyup.io/repos/github/chrisspen/django-sec/shield.svg)](https://pyup.io/repos/github/chrisspen/django-sec) 5 | 6 | This is a Django app that downloads all SEC filings from the [EDGAR database](https://www.sec.gov/edgar/searchedgar/accessing-edgar-data.htm) 7 | into your local database. It provides an admin interface to allow you to 8 | control which indexes and attributes are loaded as well as inspect downloaded 9 | data. 10 | 11 | This is a fork of Luke Rosiak's [PySEC](https://github.com/lukerosiak/pysec), 12 | modified to act as a pluggable Django app with fleshed out admin interface and 13 | more efficient data import commands. 14 | 15 | Installation 16 | ------------ 17 | 18 | Install the package using pip via: 19 | 20 | pip install django-sec 21 | 22 | then add `django_sec` to your `INSTALLED_APPS` and run: 23 | 24 | python manage.py migrate django_sec 25 | 26 | Usage 27 | ----- 28 | 29 | The data import process is divided into two basic commands. 30 | 31 | First, import filing indexes for a target year by running: 32 | 33 | python manage.py sec_import_index --start-year= --end-year= 34 | 35 | This will essentially load the "card catalog" of all companies that filed 36 | documents between those years. 37 | 38 | If you're running this on the devserver, you can monitor import progress at: 39 | 40 | http://localhost:8000/admin/django_sec/indexfile/ 41 | 42 | and see the loaded indexes and companies at: 43 | 44 | http://localhost:8000/admin/django_sec/index/ 45 | http://localhost:8000/admin/django_sec/company/ 46 | 47 | Because the list of companies and filings is enormous, by default, all 48 | companies are configured to not download any actual filings 49 | unless explicitly marked to do so. 50 | 51 | To mark companies for download, to go the 52 | company change list page, select one or more companies and run the action 53 | "Enable attribute loading..." Then run: 54 | 55 | python manage.py sec_import_attrs --start-year= --end-year= --form=10-Q,10-K 56 | 57 | This will download all 10-K and 10-Q filings, extract the attributes and populate 58 | them into the AttributeValue table accessible at: 59 | 60 | http://localhost:8000/admin/django_sec/attributevalue/ 61 | 62 | Currently, this has only been tested to download and extract attributes from 63 | 10-K and 10-Q filings. 64 | 65 | The commands support additional parameters and filters, such as to load data 66 | for specific companies or quarters. Run `python manage help sec_import_index` 67 | to see all options. 68 | 69 | Development 70 | ----------- 71 | 72 | Tests require the Python development headers to be installed, which you can install on Ubuntu with: 73 | 74 | sudo apt-get install python-dev python3-dev python3.4-dev 75 | 76 | To run unittests across multiple Python versions, install: 77 | 78 | sudo apt-get install python3.4-minimal python3.4-dev python3.5-minimal python3.5-dev 79 | 80 | To run all [tests](http://tox.readthedocs.org/en/latest/): 81 | 82 | export TESTNAME=; tox 83 | 84 | To run tests for a specific environment (e.g. Python 2.7 with Django 1.4): 85 | 86 | export TESTNAME=; tox -e py27-django15 87 | 88 | To run a specific test: 89 | 90 | export TESTNAME=.testname; tox -e py27-django15 91 | -------------------------------------------------------------------------------- /django_sec/__init__.py: -------------------------------------------------------------------------------- 1 | VERSION = (0, 5, 3) 2 | __version__ = '.'.join(map(str, VERSION)) 3 | 4 | default_app_config = 'django_sec.apps.DjangoSECConfig' 5 | -------------------------------------------------------------------------------- /django_sec/admin.py: -------------------------------------------------------------------------------- 1 | from django.contrib import admin 2 | from django.contrib.contenttypes.models import ContentType 3 | from django.core.urlresolvers import reverse 4 | 5 | try: 6 | from admin_steroids.queryset import ApproxCountQuerySet 7 | except ImportError as e: 8 | ApproxCountQuerySet = None 9 | 10 | from . import forms 11 | from . import models 12 | 13 | class NamespaceAdmin(admin.ModelAdmin): 14 | 15 | list_display = ( 16 | 'name', 17 | ) 18 | 19 | search_fields = ( 20 | 'name', 21 | ) 22 | 23 | admin.site.register( 24 | models.Namespace, 25 | NamespaceAdmin) 26 | 27 | class UnitAliasInlineAdmin(admin.TabularInline): 28 | 29 | model = models.Unit 30 | 31 | extra = 0 32 | max_num = 0 33 | can_delete = False 34 | 35 | list_display = ( 36 | 'name', 37 | ) 38 | 39 | readonly_fields = ( 40 | 'name', 41 | ) 42 | 43 | class UnitAdmin(admin.ModelAdmin): 44 | 45 | form = forms.UnitChangeForm 46 | 47 | list_display = ( 48 | 'name', 49 | 'true_unit', 50 | 'master', 51 | ) 52 | 53 | list_filter = ( 54 | 'master', 55 | ) 56 | 57 | readonly_fields = ( 58 | 'master', 59 | ) 60 | 61 | search_fields = ( 62 | 'name', 63 | ) 64 | 65 | inlines = ( 66 | UnitAliasInlineAdmin, 67 | ) 68 | 69 | admin.site.register( 70 | models.Unit, 71 | UnitAdmin) 72 | 73 | class AttributeAdmin(admin.ModelAdmin): 74 | 75 | list_display = ( 76 | 'name', 77 | 'namespace', 78 | 'load', 79 | 'total_values_fresh', 80 | 'total_values', 81 | ) 82 | 83 | list_filter = ( 84 | 'load', 85 | 'total_values_fresh', 86 | ) 87 | 88 | search_fields = ( 89 | 'name', 90 | ) 91 | 92 | readonly_fields = ( 93 | 'total_values', 94 | ) 95 | 96 | actions = ( 97 | 'enable_load', 98 | 'disable_load', 99 | 'refresh_total_values', 100 | ) 101 | 102 | def queryset(self, *args, **kwargs): 103 | # Deprecated in Django 1.7. 104 | return self.get_queryset(*args, **kwargs) 105 | 106 | def get_queryset(self, *args, **kwargs): 107 | try: 108 | qs = super(AttributeAdmin, self).get_queryset(*args, **kwargs) 109 | except AttributeError: 110 | qs = super(AttributeAdmin, self).queryset(*args, **kwargs) 111 | if ApproxCountQuerySet: 112 | qs = qs._clone(klass=ApproxCountQuerySet) 113 | return qs 114 | 115 | def enable_load(self, request, queryset): 116 | models.Attribute.objects.filter(id__in=queryset).update(load=True) 117 | models.Index.objects.filter(attributes_loaded=True).update(attributes_loaded=False) 118 | enable_load.short_description = 'Enable value loading of selected %(verbose_name_plural)s' 119 | 120 | def disable_load(self, request, queryset): 121 | models.Attribute.objects.filter(id__in=queryset).update(load=False) 122 | disable_load.short_description = 'Disable value loading of selected %(verbose_name_plural)s' 123 | 124 | def refresh_total_values(self, request, queryset): 125 | queryset.update(total_values_fresh=False) 126 | models.Attribute.do_update() 127 | refresh_total_values.short_description = \ 128 | 'Refresh to total values count of selected %(verbose_name_plural)s' 129 | 130 | admin.site.register( 131 | models.Attribute, 132 | AttributeAdmin) 133 | 134 | class AttributeValueAdmin(admin.ModelAdmin): 135 | 136 | list_display = ( 137 | 'company_name', 138 | 'attribute_name', 139 | 'value', 140 | 'true_unit', 141 | 'start_date', 142 | 'end_date', 143 | 'filing_date', 144 | 'attribute_total_values', 145 | ) 146 | 147 | raw_id_fields = ( 148 | 'company', 149 | 'attribute', 150 | ) 151 | 152 | search_fields = ( 153 | 'company__name', 154 | 'attribute__name', 155 | ) 156 | 157 | readonly_fields = ( 158 | 'company_name', 159 | 'attribute_name', 160 | 'attribute_total_values', 161 | 'true_unit', 162 | ) 163 | 164 | exclude = ( 165 | 'unit', 166 | ) 167 | 168 | def queryset(self, *args, **kwargs): 169 | # Deprecated in Django 1.7. 170 | return self.get_queryset(*args, **kwargs) 171 | 172 | def get_queryset(self, *args, **kwargs): 173 | try: 174 | qs = super(AttributeValueAdmin, self).get_queryset(*args, **kwargs) 175 | except AttributeError: 176 | qs = super(AttributeValueAdmin, self).queryset(*args, **kwargs) 177 | if ApproxCountQuerySet: 178 | qs = qs._clone(klass=ApproxCountQuerySet) 179 | return qs 180 | 181 | def true_unit(self, obj=None): 182 | if not obj: 183 | return '' 184 | return obj.unit.true_unit 185 | true_unit.short_description = 'unit' 186 | 187 | def company_name(self, obj=None): 188 | if not obj: 189 | return '' 190 | return obj.company.name 191 | 192 | def attribute_name(self, obj=None): 193 | if not obj: 194 | return '' 195 | return obj.attribute.name 196 | 197 | def attribute_total_values(self, obj=None): 198 | if not obj: 199 | return '' 200 | return obj.attribute.total_values 201 | attribute_total_values.admin_order_field = 'attribute__total_values' 202 | 203 | admin.site.register( 204 | models.AttributeValue, 205 | AttributeValueAdmin) 206 | 207 | class CompanyAdmin(admin.ModelAdmin): 208 | 209 | list_display = ( 210 | 'cik', 211 | 'name', 212 | 'min_date', 213 | 'max_date', 214 | 'load', 215 | ) 216 | 217 | list_filter = ( 218 | 'load', 219 | ) 220 | 221 | search_fields = ( 222 | 'cik', 223 | 'name', 224 | #'filings___ticker', 225 | ) 226 | 227 | readonly_fields = ( 228 | 'cik', 229 | 'name', 230 | 'filings_link', 231 | 'values_link', 232 | 'min_date', 233 | 'max_date', 234 | ) 235 | 236 | actions = ( 237 | 'enable_load', 238 | 'disable_load', 239 | ) 240 | 241 | def lookup_allowed(self, key, value): 242 | return True 243 | 244 | def queryset(self, *args, **kwargs): 245 | # Deprecated in Django 1.7. 246 | return self.get_queryset(*args, **kwargs) 247 | 248 | def get_queryset(self, *args, **kwargs): 249 | try: 250 | qs = super(CompanyAdmin, self).get_queryset(*args, **kwargs) 251 | except AttributeError: 252 | qs = super(CompanyAdmin, self).queryset(*args, **kwargs) 253 | if ApproxCountQuerySet: 254 | qs = qs._clone(klass=ApproxCountQuerySet) 255 | return qs 256 | 257 | def enable_load(self, request, queryset): 258 | models.Company.objects.filter(cik__in=queryset).update(load=True) 259 | models.Index.objects\ 260 | .filter(company__cik__in=queryset, attributes_loaded=True)\ 261 | .update(attributes_loaded=False) 262 | enable_load.short_description = 'Enable attribute loading of selected %(verbose_name_plural)s' 263 | 264 | def disable_load(self, request, queryset): 265 | models.Company.objects.filter(cik__in=queryset).update(load=False) 266 | disable_load.short_description = 'Disable attribute loading of selected %(verbose_name_plural)s' 267 | 268 | def filings_link(self, obj=None): 269 | if not obj: 270 | return '' 271 | ct = ContentType.objects.get_for_model(models.Index) 272 | list_url_name = 'admin:%s_%s_changelist' % (ct.app_label, ct.model) 273 | url = reverse(list_url_name) + ('?company=%s' % obj.cik) 274 | count = obj.filings.all().count() 275 | return 'View %i' % (url, count) 276 | filings_link.short_description = 'filings' 277 | filings_link.allow_tags = True 278 | 279 | def values_link(self, obj=None): 280 | if not obj: 281 | return '' 282 | ct = ContentType.objects.get_for_model(models.AttributeValue) 283 | list_url_name = 'admin:%s_%s_changelist' % (ct.app_label, ct.model) 284 | url = reverse(list_url_name) + ('?company=%s' % obj.cik) 285 | count = obj.attributes.all().count() 286 | return 'View %i' % (url, count) 287 | values_link.short_description = 'attributes' 288 | values_link.allow_tags = True 289 | 290 | admin.site.register( 291 | models.Company, 292 | CompanyAdmin) 293 | 294 | class IndexFileAdmin(admin.ModelAdmin): 295 | 296 | list_display = ( 297 | 'year', 298 | 'quarter', 299 | 'total_rows', 300 | 'processed_rows', 301 | 'percent_processed', 302 | 'downloaded', 303 | 'processed', 304 | ) 305 | 306 | readonly_fields = ( 307 | 'percent_processed', 308 | ) 309 | 310 | actions = ( 311 | 'mark_unprocessed', 312 | ) 313 | 314 | def queryset(self, *args, **kwargs): 315 | # Deprecated in Django 1.7. 316 | return self.get_queryset(*args, **kwargs) 317 | 318 | def get_queryset(self, *args, **kwargs): 319 | try: 320 | qs = super(IndexFileAdmin, self).get_queryset(*args, **kwargs) 321 | except AttributeError: 322 | qs = super(IndexFileAdmin, self).queryset(*args, **kwargs) 323 | if ApproxCountQuerySet: 324 | qs = qs._clone(klass=ApproxCountQuerySet) 325 | return qs 326 | 327 | def mark_unprocessed(self, request, queryset): 328 | models.IndexFile.objects\ 329 | .filter(id__in=queryset.values_list('id', flat=True))\ 330 | .update(processed=None, processed_rows=0) 331 | mark_unprocessed.short_description = 'Mark selected %(verbose_name_plural)s as unprocessed' 332 | 333 | def percent_processed(self, obj=None): 334 | if not obj or not obj.total_rows or not obj.processed_rows: 335 | return '' 336 | return '%.02f%%' % (obj.processed_rows/float(obj.total_rows)*100,) 337 | 338 | admin.site.register( 339 | models.IndexFile, 340 | IndexFileAdmin) 341 | 342 | class IndexAdmin(admin.ModelAdmin): 343 | list_display = ( 344 | 'filename', 345 | 'company', 346 | 'cik', 347 | '_ticker', 348 | 'form', 349 | 'date', 350 | 'quarter', 351 | 'attributes_loaded', 352 | 'valid', 353 | ) 354 | 355 | search_fields = ( 356 | 'filename', 357 | 'company__name', 358 | '_ticker', 359 | ) 360 | 361 | list_filter = ( 362 | 'attributes_loaded', 363 | 'valid', 364 | 'year', 365 | 'quarter', 366 | 'form', 367 | ) 368 | 369 | readonly_fields = ( 370 | 'cik', 371 | 'xbrl_link', 372 | ) 373 | 374 | actions = ( 375 | # 'enable', 376 | # 'disable', 377 | ) 378 | 379 | def queryset(self, *args, **kwargs): 380 | # Deprecated in Django 1.7. 381 | return self.get_queryset(*args, **kwargs) 382 | 383 | def get_queryset(self, *args, **kwargs): 384 | try: 385 | qs = super(IndexAdmin, self).get_queryset(*args, **kwargs) 386 | except AttributeError: 387 | qs = super(IndexAdmin, self).queryset(*args, **kwargs) 388 | if ApproxCountQuerySet: 389 | qs = qs._clone(klass=ApproxCountQuerySet) 390 | return qs 391 | 392 | def cik(self, obj=None): 393 | if not obj: 394 | return '' 395 | return obj.company.cik 396 | cik.admin_order_field = 'company__cik' 397 | 398 | def get_readonly_fields(self, request, obj=None): 399 | exclude = [] 400 | return [ 401 | _.name for _ in self.model._meta.fields 402 | if _.name not in exclude 403 | ] + list(self.readonly_fields) 404 | 405 | def get_fieldsets(self, request, obj=None): 406 | readonly_fields = list(self.readonly_fields) 407 | exclude = readonly_fields + ['id'] 408 | first = ['filename'] 409 | fields = first + readonly_fields + [ 410 | _.name for _ in self.model._meta.fields 411 | if _.name not in exclude and _.name not in first 412 | ] 413 | fieldsets = ( 414 | (None, { 415 | 'fields': fields, 416 | }), 417 | ) 418 | return fieldsets 419 | 420 | def enable(self, request, queryset): 421 | for r in queryset.iterator(): 422 | r.enabled = True 423 | r.save() 424 | enable.short_description = 'Enable selected %(verbose_name_plural)s' 425 | 426 | def disable(self, request, queryset): 427 | for r in queryset.iterator(): 428 | r.enabled = False 429 | r.save() 430 | disable.short_description = 'Disable selected %(verbose_name_plural)s' 431 | 432 | admin.site.register( 433 | models.Index, 434 | IndexAdmin) 435 | -------------------------------------------------------------------------------- /django_sec/apps.py: -------------------------------------------------------------------------------- 1 | from django.apps import AppConfig 2 | 3 | class DjangoSECConfig(AppConfig): 4 | name = 'django_sec' 5 | verbose_name = 'Django SEC' 6 | -------------------------------------------------------------------------------- /django_sec/constants.py: -------------------------------------------------------------------------------- 1 | INSTANT = 'Instant' 2 | DURATION = 'Duration' 3 | TIME_PERIODS = ( 4 | INSTANT, 5 | DURATION, 6 | ) 7 | 8 | MAX_DIGITS = 40 9 | MAX_DECIMALS = 6 10 | MAX_QUANTIZE = MAX_DIGITS - MAX_DECIMALS 11 | -------------------------------------------------------------------------------- /django_sec/fixtures/company_2016_1.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisspen/django-sec/184c3308b943971f75109c7ff2aeddc12ab1f58e/django_sec/fixtures/company_2016_1.zip -------------------------------------------------------------------------------- /django_sec/fixtures/normalized_fields.csv: -------------------------------------------------------------------------------- 1 | Name,Default,Condition,Preference,Description 2 | Assets,0,,us-gaap:Assets|Instant, 3 | CurrentAssets,0,,us-gaap:AssetsCurrent|Instant, 4 | NoncurrentAssets,0,,"us-gaap:AssetsNoncurrent|Instant,eval(Assets-CurrentAssets)", 5 | LiabilitiesAndEquity,0,,"us-gaap:LiabilitiesAndStockholdersEquity|Instant,us-gaap:LiabilitiesAndPartnersCapital|Instant", 6 | Liabilities,0,,us-gaap:Liabilities|Instant, 7 | CurrentLiabilities,0,,us-gaap:LiabilitiesCurrent|Instant, 8 | NoncurrentLiabilities,0,,"us-gaap:LiabilitiesNoncurrent|Instant,eval(Liabilities-CurrentLiabilities)", 9 | CommitmentsAndContingencies,0,,us-gaap:CommitmentsAndContingencies|Instant, 10 | TemporaryEquity,0,,"us-gaap:TemporaryEquityRedemptionValue|Instant,us-gaap:RedeemablePreferredStockCarryingAmount|Instant,us-gaap:TemporaryEquityCarryingAmount|Instant,us-gaap:TemporaryEquityValueExcludingAdditionalPaidInCapital|Instant,us-gaap:TemporaryEquityCarryingAmountAttributableToParent|Instant,us-gaap:RedeemableNoncontrollingInterestEquityFairValue|Instant", 11 | RedeemableNoncontrollingInterest,0,,"us-gaap:RedeemableNoncontrollingInterestEquityCarryingAmount|Instant,us-gaap:RedeemableNoncontrollingInterestEquityCommonCarryingAmount|Instant", 12 | TemporaryEquity,0,,eval(TemporaryEquity+RedeemableNoncontrollingInterest), 13 | Equity,0,,"us-gaap:StockholdersEquityIncludingPortionAttributableToNoncontrollingInterest|Instant,us-gaap:StockholdersEquity|Instant,us-gaap:PartnersCapitalIncludingPortionAttributableToNoncontrollingInterest|Instant,us-gaap:PartnersCapital|Instant,us-gaap:CommonStockholdersEquity|Instant,us-gaap:MemberEquity|Instant,us-gaap:AssetsNet|Instant", 14 | EquityAttributableToNoncontrollingInterest,0,,"us-gaap:MinorityInterest|Instant,us-gaap:PartnersCapitalAttributableToNoncontrollingInterest|Instant", 15 | EquityAttributableToParent,0,,"us-gaap:StockholdersEquity|Instant,us-gaap:LiabilitiesAndPartnersCapital|Instant", 16 | Assets,0,not Assets and LiabilitiesAndEquity CurrentAssets == LiabilitiesAndEquity,CurrentAssets, 17 | Assets,0,not Assets and not NoncurrentAssets and LiabilitiesAndEquity and LiabilitiesAndEquity == (Liabilities + Equity),CurrentAssets, 18 | NoncurrentAssets,0,Assets and CurrentAssets,eval(Assets - CurrentAssets), 19 | LiabilitiesAndEquity,0,not LiabilitiesAndEquity and Assets,Assets, 20 | Equity,0,EquityAttributableToNoncontrollingInterest and EquityAttributableToParent,eval(EquityAttributableToParent + EquityAttributableToNoncontrollingInterest), 21 | Equity,0,not Equity and not EquityAttributableToNoncontrollingInterest and EquityAttributableToParent,EquityAttributableToParent, 22 | Equity,0,not Equity,eval(EquityAttributableToParent + EquityAttributableToNoncontrollingInterest), 23 | EquityAttributableToParent,0,Equity and EquityAttributableToNoncontrollingInterest and not EquityAttributableToParent,eval(Equity - EquityAttributableToNoncontrollingInterest),Impute Equity attributable to parent based on existence of equity and noncontrolling interest. 24 | EquityAttributableToParent,0,Equity and not EquityAttributableToNoncontrollingInterest and not EquityAttributableToParent,Equity,Impute Equity attributable to parent based on existence of equity and noncontrolling interest. 25 | Liabilities,0,not Liabilities and Equity,eval(LiabilitiesAndEquity - (CommitmentsAndContingencies + TemporaryEquity + Equity)), 26 | NoncurrentLiabilities,0,Liabilities and CurrentLiabilities,eval(Liabilities - CurrentLiabilities),This seems incorrect because liabilities might not be reported 27 | Liabilities,0,not Liabilities and CurrentLiabilities and not NoncurrentLiabilities,CurrentLiabilities, 28 | lngBSCheck1,0,,eval(Equity - (EquityAttributableToParent + EquityAttributableToNoncontrollingInterest)), 29 | lngBSCheck2,0,,eval(Assets - LiabilitiesAndEquity), 30 | lngBSCheck3,0,CurrentAssets or NoncurrentAssets or CurrentLiabilities or NoncurrentLiabilities,eval(Assets - (CurrentAssets + NoncurrentAssets)),if current assets/liabilities are zero and noncurrent assets/liabilities;: don't do this test because the balance sheet is not classified 31 | lngBSCheck4,0,CurrentAssets or NoncurrentAssets or CurrentLiabilities or NoncurrentLiabilities,eval(Liabilities - (CurrentLiabilities + NoncurrentLiabilities)),if current assets/liabilities are zero and noncurrent assets/liabilities;: don't do this test because the balance sheet is not classified 32 | lngBSCheck5,0,,eval(LiabilitiesAndEquity - (Liabilities + CommitmentsAndContingencies + TemporaryEquity + Equity)), 33 | Revenues,0,,"us-gaap:Revenues|Duration,us-gaap:SalesRevenueNet|Duration,us-gaap:SalesRevenueServicesNet|Duration,us-gaap:RevenuesNetOfInterestExpense|Duration,us-gaap:RegulatedAndUnregulatedOperatingRevenue|Duration,us-gaap:HealthCareOrganizationRevenue|Duration,us-gaap:InterestAndDividendIncomeOperating|Duration,us-gaap:RealEstateRevenueNet|Duration,us-gaap:RevenueMineralSales|Duration,us-gaap:OilAndGasRevenue|Duration,us-gaap:FinancialServicesRevenue|Duration,us-gaap:RegulatedAndUnregulatedOperatingRevenue|Duration", 34 | CostOfRevenue,0,,"us-gaap:CostOfRevenue|Duration,us-gaap:CostOfServices|Duration,us-gaap:CostOfGoodsSold|Duration,us-gaap:CostOfGoodsAndServicesSold|Duration", 35 | GrossProfit,0,,"us-gaap:GrossProfit|Duration,us-gaap:GrossProfit|Duration", 36 | OperatingExpenses,0,,"us-gaap:OperatingExpenses|Duration,us-gaap:OperatingCostsAndExpenses|Duration", 37 | CostsAndExpenses,0,,"us-gaap:CostsAndExpenses|Duration,us-gaap:CostsAndExpenses|Duration", 38 | OtherOperatingIncome,0,,us-gaap:OtherOperatingIncome|Duration, 39 | OperatingIncomeLoss,0,,us-gaap:OperatingIncomeLoss|Duration, 40 | NonoperatingIncomeLoss,0,,"us-gaap:NonoperatingIncomeExpense|Duration,us-gaap:NonoperatingIncomeExpense|Duration", 41 | InterestAndDebtExpense,0,,us-gaap:InterestAndDebtExpense|Duration, 42 | IncomeBeforeEquityMethodInvestments,0,,us-gaap:IncomeLossFromContinuingOperationsBeforeIncomeTaxesMinorityInterestAndIncomeLossFromEquityMethodInvestments|Duration, 43 | IncomeFromEquityMethodInvestments,0,,us-gaap:IncomeLossFromEquityMethodInvestments|Duration, 44 | IncomeFromContinuingOperationsBeforeTax,0,,"us-gaap:IncomeLossFromContinuingOperationsBeforeIncomeTaxesMinorityInterestAndIncomeLossFromEquityMethodInvestments|Duration,us-gaap:IncomeLossFromContinuingOperationsBeforeIncomeTaxesExtraordinaryItemsNoncontrollingInterest|Duration", 45 | IncomeTaxExpenseBenefit,0,,"us-gaap:IncomeTaxExpenseBenefit|Duration,us-gaap:IncomeTaxExpenseBenefitContinuingOperations|Duration", 46 | IncomeFromContinuingOperationsAfterTax,0,,us-gaap:IncomeLossBeforeExtraordinaryItemsAndCumulativeEffectOfChangeInAccountingPrinciple|Duration, 47 | IncomeFromDiscontinuedOperations,0,,"us-gaap:IncomeLossFromDiscontinuedOperationsNetOfTax|Duration,us-gaap:DiscontinuedOperationGainLossOnDisposalOfDiscontinuedOperationNetOfTax|Duration,us-gaap:IncomeLossFromDiscontinuedOperationsNetOfTaxAttributableToReportingEntity|Duration", 48 | ExtraordaryItemsGainLoss,0,,us-gaap:ExtraordinaryItemNetOfTax|Duration, 49 | NetIncomeLoss,0,,"us-gaap:ProfitLoss|Duration,us-gaap:NetIncomeLoss|Duration,us-gaap:NetIncomeLossAvailableToCommonStockholdersBasic|Duration,us-gaap:IncomeLossFromContinuingOperations|Duration,us-gaap:IncomeLossAttributableToParent|Duration,us-gaap:IncomeLossFromContinuingOperationsIncludingPortionAttributableToNoncontrollingInterest|Duration", 50 | NetIncomeAvailableToCommonStockholdersBasic,0,,us-gaap:NetIncomeLossAvailableToCommonStockholdersBasic|Duration, 51 | PreferredStockDividendsAndOtherAdjustments,0,,us-gaap:PreferredStockDividendsAndOtherAdjustments|Duration, 52 | NetIncomeAttributableToNoncontrollingInterest,0,,us-gaap:NetIncomeLossAttributableToNoncontrollingInterest|Duration, 53 | NetIncomeAttributableToParent,0,,us-gaap:NetIncomeLoss|Duration, 54 | OtherComprehensiveIncome,0,,us-gaap:OtherComprehensiveIncomeLossNetOfTax|Duration, 55 | ComprehensiveIncome,0,,"us-gaap:ComprehensiveIncomeNetOfTaxIncludingPortionAttributableToNoncontrollingInterest|Duration,us-gaap:ComprehensiveIncomeNetOfTax|Duration", 56 | ComprehensiveIncomeAttributableToParent,0,,us-gaap:ComprehensiveIncomeNetOfTax|Duration, 57 | ComprehensiveIncomeAttributableToNoncontrollingInterest,0,,us-gaap:ComprehensiveIncomeNetOfTaxAttributableToNoncontrollingInterest|Duration, 58 | NonoperatingIncomeLossPlusInterestAndDebtExpense,0,,eval(NonoperatingIncomeLoss + InterestAndDebtExpense), 59 | NetIncomeAvailableToCommonStockholdersBasic,0,not NetIncomeAvailableToCommonStockholdersBasic and not PreferredStockDividendsAndOtherAdjustments and NetIncomeAttributableToParent,NetIncomeAttributableToParent, 60 | IncomeFromContinuingOperationsAfterTax,0,NetIncomeLoss and not IncomeFromContinuingOperationsAfterTax,eval(NetIncomeLoss - IncomeFromDiscontinuedOperations - ExtraordaryItemsGainLoss), 61 | NetIncomeAttributableToParent,0,not NetIncomeAttributableToParent and not NetIncomeAttributableToNoncontrollingInterest and NetIncomeLoss,NetIncomeLoss, 62 | PreferredStockDividendsAndOtherAdjustments,0,not PreferredStockDividendsAndOtherAdjustments and NetIncomeAttributableToParent and NetIncomeAvailableToCommonStockholdersBasic,eval(NetIncomeAttributableToParent - NetIncomeAvailableToCommonStockholdersBasic), 63 | ComprehensiveIncome,0,not ComprehensiveIncomeAttributableToParent and not ComprehensiveIncomeAttributableToNoncontrollingInterest and not ComprehensiveIncome and not OtherComprehensiveIncome,NetIncomeLoss, 64 | OtherComprehensiveIncome,0,ComprehensiveIncome and not OtherComprehensiveIncome,eval(ComprehensiveIncome - NetIncomeLoss), 65 | ComprehensiveIncomeAttributableToParent,0,not ComprehensiveIncomeAttributableToParent and not ComprehensiveIncomeAttributableToNoncontrollingInterest and ComprehensiveIncome,ComprehensiveIncome, 66 | IncomeFromContinuingOperationsBeforeTax,0,IncomeBeforeEquityMethodInvestments and IncomeFromEquityMethodInvestments and not IncomeFromContinuingOperationsBeforeTax,eval(IncomeBeforeEquityMethodInvestments + IncomeFromEquityMethodInvestments), 67 | IncomeFromContinuingOperationsBeforeTax,0,not IncomeFromContinuingOperationsBeforeTax and IncomeFromContinuingOperationsAfterTax,eval(IncomeFromContinuingOperationsAfterTax + IncomeTaxExpenseBenefit), 68 | IncomeFromContinuingOperationsAfterTax,0,not IncomeFromContinuingOperationsAfterTax and (IncomeTaxExpenseBenefit or not IncomeTaxExpenseBenefit) and IncomeFromContinuingOperationsBeforeTax,eval(IncomeFromContinuingOperationsBeforeTax - IncomeTaxExpenseBenefit), 69 | GrossProfit,0,not GrossProfit and Revenues and CostOfRevenue,eval(GrossProfit - CostOfRevenue), 70 | GrossProfit,0,not GrossProfit and Revenues and CostOfRevenue,eval(Revenues - CostOfRevenue), 71 | Revenues,0,not GrossProfit and not Revenues and CostOfRevenue,eval(GrossProfit + CostOfRevenue), 72 | CostOfRevenue,0,GrossProfit and Revenues and not CostOfRevenue,eval(GrossProfit + Revenues), 73 | CostsAndExpenses,0,not GrossProfit and not CostsAndExpenses and CostOfRevenue and OperatingExpenses,eval(CostOfRevenue + OperatingExpenses), 74 | CostsAndExpenses,0,not CostsAndExpenses and OperatingExpenses and CostOfRevenue,eval(CostOfRevenue + OperatingExpenses), 75 | CostsAndExpenses,0,not GrossProfit and not CostsAndExpenses and Revenues and OperatingIncomeLoss and OtherOperatingIncome,eval(Revenues - OperatingIncomeLoss - OtherOperatingIncome), 76 | OperatingExpenses,0,CostOfRevenue and CostsAndExpenses and not OperatingExpenses,eval(CostsAndExpenses - CostOfRevenue), 77 | CostOfRevenue,0,Revenues and not GrossProfit and (Revenues - CostsAndExpenses == OperatingIncomeLoss) and not OperatingExpenses and not OtherOperatingIncome,eval(CostsAndExpenses - OperatingExpenses), 78 | IncomeBeforeEquityMethodInvestments,0,not IncomeBeforeEquityMethodInvestments and IncomeFromContinuingOperationsBeforeTax,eval(IncomeFromContinuingOperationsBeforeTax - IncomeFromEquityMethodInvestments), 79 | InterestAndDebtExpense,0,OperatingIncomeLoss and NonoperatingIncomeLoss and not InterestAndDebtExpense and IncomeBeforeEquityMethodInvestments,eval(IncomeBeforeEquityMethodInvestments - (OperatingIncomeLoss + NonoperatingIncomeLoss), 80 | OtherOperatingIncome,0,GrossProfit and OperatingExpenses and OperatingIncomeLoss,eval(OperatingIncomeLoss - (GrossProfit - OperatingExpenses)), 81 | IncomeBeforeEquityMethodInvestments,0,IncomeFromEquityMethodInvestments and IncomeBeforeEquityMethodInvestments and IncomeBeforeEquityMethodInvestments != IncomeFromContinuingOperationsBeforeTax,eval(IncomeFromContinuingOperationsBeforeTax - IncomeFromEquityMethodInvestments), 82 | OperatingIncomeLoss,0,IncomeFromEquityMethodInvestments and IncomeBeforeEquityMethodInvestments and IncomeBeforeEquityMethodInvestments != IncomeFromContinuingOperationsBeforeTax,eval(OperatingIncomeLoss - IncomeFromEquityMethodInvestments), 83 | OperatingIncomeLoss,0,not OperatingIncomeLoss and IncomeBeforeEquityMethodInvestments,eval(IncomeBeforeEquityMethodInvestments + NonoperatingIncomeLoss - InterestAndDebtExpense),"DANGEROUS!! May need to turn off. IS3 had 2085 PASSES WITHOUT this imputing. if it is higher,: keep the test" 84 | NonoperatingIncomePlusInterestAndDebtExpensePlusIncomeFromEquityMethodInvestments,0,,eval(IncomeFromContinuingOperationsBeforeTax - OperatingIncomeLoss), 85 | NonoperatingIncomeLossPlusInterestAndDebtExpense,0,not NonoperatingIncomeLossPlusInterestAndDebtExpense and NonoperatingIncomePlusInterestAndDebtExpensePlusIncomeFromEquityMethodInvestments,eval(NonoperatingIncomePlusInterestAndDebtExpensePlusIncomeFromEquityMethodInvestments - IncomeFromEquityMethodInvestments), 86 | lngIS1,0,,eval((Revenues - CostOfRevenue) - GrossProfit), 87 | lngIS2,0,,eval((GrossProfit - OperatingExpenses + OtherOperatingIncome) - OperatingIncomeLoss), 88 | lngIS3,0,,eval((OperatingIncomeLoss + NonoperatingIncomeLossPlusInterestAndDebtExpense) - IncomeBeforeEquityMethodInvestments), 89 | lngIS4,0,,eval((IncomeBeforeEquityMethodInvestments + IncomeFromEquityMethodInvestments) - IncomeFromContinuingOperationsBeforeTax), 90 | lngIS5,0,,eval((IncomeFromContinuingOperationsBeforeTax - IncomeTaxExpenseBenefit) - IncomeFromContinuingOperationsAfterTax), 91 | lngIS6,0,,eval((IncomeFromContinuingOperationsAfterTax + IncomeFromDiscontinuedOperations + ExtraordaryItemsGainLoss) - NetIncomeLoss), 92 | lngIS7,0,,eval((NetIncomeAttributableToParent + NetIncomeAttributableToNoncontrollingInterest) - NetIncomeLoss), 93 | lngIS8,0,,eval((NetIncomeAttributableToParent - PreferredStockDividendsAndOtherAdjustments) - NetIncomeAvailableToCommonStockholdersBasic), 94 | lngIS9,0,,eval((ComprehensiveIncomeAttributableToParent + ComprehensiveIncomeAttributableToNoncontrollingInterest) - ComprehensiveIncome), 95 | lngIS10,0,,eval((NetIncomeLoss + OtherComprehensiveIncome) - ComprehensiveIncome), 96 | lngIS11,0,,eval(OperatingIncomeLoss - (Revenues - CostsAndExpenses + OtherOperatingIncome)), 97 | NetCashFlow,,,"us-gaap:CashAndCashEquivalentsPeriodIncreaseDecrease|Duration,us-gaap:CashPeriodIncreaseDecrease|Duration,us-gaap:NetCashProvidedByUsedInContinuingOperations|Duration", 98 | NetCashFlowsOperating,,,us-gaap:NetCashProvidedByUsedInOperatingActivities|Duration, 99 | NetCashFlowsInvesting,,,us-gaap:NetCashProvidedByUsedInInvestingActivities|Duration, 100 | NetCashFlowsFinancing,,,us-gaap:NetCashProvidedByUsedInFinancingActivities|Duration, 101 | NetCashFlowsOperatingContinuing,,,us-gaap:NetCashProvidedByUsedInOperatingActivitiesContinuingOperations|Duration, 102 | NetCashFlowsInvestingContinuing,,,us-gaap:NetCashProvidedByUsedInInvestingActivitiesContinuingOperations|Duration, 103 | NetCashFlowsFinancingContinuing,,,us-gaap:NetCashProvidedByUsedInFinancingActivitiesContinuingOperations|Duration, 104 | NetCashFlowsOperatingDiscontinued,,,us-gaap:CashProvidedByUsedInOperatingActivitiesDiscontinuedOperations|Duration, 105 | NetCashFlowsInvestingDiscontinued,,,us-gaap:CashProvidedByUsedInInvestingActivitiesDiscontinuedOperations|Duration, 106 | NetCashFlowsFinancingDiscontinued,,,us-gaap:CashProvidedByUsedInFinancingActivitiesDiscontinuedOperations|Duration, 107 | NetCashFlowsDiscontinued,,,us-gaap:NetCashProvidedByUsedInDiscontinuedOperations|Duration, 108 | ExchangeGainsLosses,,,"us-gaap:EffectOfExchangeRateOnCashAndCashEquivalents|Duration,us-gaap:EffectOfExchangeRateOnCashAndCashEquivalentsContinuingOperations|Duration,us-gaap:CashProvidedByUsedInFinancingActivitiesDiscontinuedOperations|Duration", 109 | NetCashFlowsDiscontinued,,not NetCashFlowsDiscontinued,eval(NetCashFlowsOperatingDiscontinued + NetCashFlowsInvestingDiscontinued + NetCashFlowsFinancingDiscontinued), 110 | NetCashFlowsOperatingContinuing,,NetCashFlowsOperating and not NetCashFlowsOperatingContinuing,eval(NetCashFlowsOperating - NetCashFlowsOperatingDiscontinued), 111 | NetCashFlowsInvestingContinuing,,NetCashFlowsInvesting and not NetCashFlowsInvestingContinuing,eval(NetCashFlowsInvesting - NetCashFlowsInvestingDiscontinued), 112 | NetCashFlowsFinancingContinuing,,NetCashFlowsFinancing and not NetCashFlowsFinancingContinuing,eval(NetCashFlowsFinancing - NetCashFlowsFinancingDiscontinued), 113 | NetCashFlowsOperating,,NetCashFlowsOperating==0 and NetCashFlowsOperatingContinuing and NetCashFlowsOperatingDiscontinued==0,NetCashFlowsOperatingContinuing, 114 | NetCashFlowsInvesting,,NetCashFlowsInvesting==0 and NetCashFlowsInvestingContinuing and NetCashFlowsInvestingDiscontinued==0,NetCashFlowsInvestingContinuing, 115 | NetCashFlowsFinancing,,NetCashFlowsFinancing==0 and NetCashFlowsFinancingContinuing and NetCashFlowsFinancingDiscontinued==0,NetCashFlowsFinancingContinuing, 116 | NetCashFlowsContinuing,,,eval(NetCashFlowsOperatingContinuing + NetCashFlowsInvestingContinuing + NetCashFlowsFinancingContinuing), 117 | NetCashFlow,,NetCashFlow==0 and (NetCashFlowsOperating or NetCashFlowsInvesting or NetCashFlowsFinancing),eval(NetCashFlowsOperating + NetCashFlowsInvesting + NetCashFlowsFinancing), 118 | SGR,,,eval(((NetIncomeLoss / Revenues) * (1 + ((Assets - Equity) / Equity))) / ((1 / (Revenues / Assets)) - (((NetIncomeLoss / Revenues) * (1 + (((Assets - Equity) / Equity))))))), 119 | ROA,,,eval(NetIncomeLoss / Assets), 120 | ROE,,,eval(NetIncomeLoss / Equity), 121 | ROS,,,eval(NetIncomeLoss / Revenues), 122 | -------------------------------------------------------------------------------- /django_sec/forms.py: -------------------------------------------------------------------------------- 1 | from django.forms import ModelForm 2 | 3 | from . import models 4 | 5 | class UnitChangeForm(ModelForm): 6 | 7 | class Meta: 8 | """Admin options.""" 9 | model = models.Unit 10 | exclude = () 11 | 12 | def __init__(self, *args, **kwargs): 13 | super(UnitChangeForm, self).__init__(*args, **kwargs) 14 | qs = models.Unit.objects.filter(master=True) 15 | self.fields["true_unit"].queryset = qs 16 | -------------------------------------------------------------------------------- /django_sec/management/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisspen/django-sec/184c3308b943971f75109c7ff2aeddc12ab1f58e/django_sec/management/__init__.py -------------------------------------------------------------------------------- /django_sec/management/commands/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisspen/django-sec/184c3308b943971f75109c7ff2aeddc12ab1f58e/django_sec/management/commands/__init__.py -------------------------------------------------------------------------------- /django_sec/management/commands/sec_import_attrs.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import re 4 | import sys 5 | import time 6 | from datetime import date, datetime, timedelta 7 | from optparse import make_option 8 | import traceback 9 | import random 10 | from multiprocessing import Process, Queue 11 | import collections 12 | 13 | import six 14 | from six import StringIO, string_types 15 | 16 | import django 17 | from django.core.management.base import BaseCommand 18 | from django.db import connection, DatabaseError 19 | from django.conf import settings 20 | from django.utils import timezone 21 | 22 | from django_sec import models 23 | from django_sec.models import c 24 | 25 | try: 26 | from psycopg2.extensions import TransactionRollbackError 27 | except ImportError: 28 | TransactionRollbackError = Exception 29 | 30 | try: 31 | from chroniker.models import Job 32 | except ImportError: 33 | Job = None 34 | 35 | def is_power_of_two(x): 36 | return (x & (x - 1)) == 0 37 | 38 | def parse_stripe(stripe): 39 | stripe_num = None 40 | stripe_mod = None 41 | if stripe: 42 | assert isinstance(stripe, string_types) and len(stripe) == 2 43 | stripe_num, stripe_mod = stripe 44 | stripe_num = int(stripe_num) 45 | stripe_mod = int(stripe_mod) 46 | assert stripe_num < stripe_mod 47 | return stripe_num, stripe_mod 48 | 49 | def get_options(parser=None): 50 | make_opt = make_option 51 | if parser: 52 | make_opt = parser.add_argument 53 | return [ 54 | make_opt('--cik', 55 | default=None), 56 | make_opt('--forms', 57 | default='10-K,10-Q'), 58 | make_opt('--start-year', 59 | default=None), 60 | make_opt('--end-year', 61 | default=None), 62 | make_opt('--quarter', 63 | default=None), 64 | # make_opt('--dryrun', 65 | # action='store_true', 66 | # default=False), 67 | make_opt('--force', 68 | action='store_true', 69 | default=False), 70 | make_opt('--verbose', 71 | action='store_true', 72 | default=False), 73 | make_opt('--multi', 74 | dest='multi', 75 | default=0, 76 | help='The number of processes to use. Must be a multiple of 2.'), 77 | make_opt('--show-pending', 78 | action='store_true', 79 | default=False, 80 | help='If given, will only report the number of pending records to process then exit.'), 81 | ] 82 | 83 | class Command(BaseCommand): 84 | help = "Shows data from filings." 85 | args = '' 86 | option_list = getattr(BaseCommand, 'option_list', ()) + tuple(get_options()) 87 | 88 | def create_parser(self, prog_name, subcommand): 89 | """ 90 | For ``Django>=1.10`` 91 | Create and return the ``ArgumentParser`` which extends ``BaseCommand`` parser with 92 | chroniker extra args and will be used to parse the arguments to this command. 93 | """ 94 | from distutils.version import StrictVersion # pylint: disable=E0611 95 | parser = super(Command, self).create_parser(prog_name, subcommand) 96 | version_threshold = StrictVersion('1.10') 97 | current_version = StrictVersion(django.get_version(django.VERSION)) 98 | if current_version >= version_threshold: 99 | get_options(parser) 100 | self.add_arguments(parser) 101 | return parser 102 | 103 | def handle(self, **options): 104 | 105 | # self.dryrun = options['dryrun'] 106 | self.force = options['force'] 107 | self.verbose = options['verbose'] 108 | 109 | self.stripe_counts = {} # {stripe:{current,total} 110 | self.last_progress_refresh = None 111 | self.start_times = {} # {key:start_time} 112 | 113 | self.cik = (options['cik'] or '').strip() 114 | if self.cik: 115 | self.cik = int(self.cik) 116 | else: 117 | self.cik = None 118 | 119 | self.forms = (options['forms'] or '').strip().split(',') 120 | 121 | start_year = options['start_year'] 122 | if start_year: 123 | start_year = int(start_year) 124 | else: 125 | #start_year = date.today().year - 1 126 | start_year = 1900 127 | self.start_year = start_year 128 | 129 | end_year = options['end_year'] 130 | if end_year: 131 | end_year = int(end_year) 132 | else: 133 | end_year = date.today().year 134 | self.end_year = end_year 135 | 136 | self.status = None 137 | self.progress = collections.OrderedDict() 138 | multi = int(options['multi']) 139 | kwargs = options.copy() 140 | if multi: 141 | assert multi > 1 and is_power_of_two(multi), \ 142 | "Process count must be greater than 1 and a multiple of 2." 143 | processes = [] 144 | self.status = Queue() 145 | for i, _ in enumerate(six.moves.range(multi)): 146 | print('Starting process %i' % i) 147 | stripe = kwargs['stripe'] = '%i%i' % (i, multi) 148 | kwargs['status'] = self.status 149 | 150 | connection.close() 151 | p = Process(target=self.run_process, kwargs=kwargs) 152 | p.daemon = True 153 | processes.append(p) 154 | p.start() 155 | self.progress[stripe] = (0, 0, 0, 0, None, '') 156 | #return 157 | while any(i.is_alive() for i in processes): 158 | time.sleep(0.1) 159 | while not self.status.empty(): 160 | stripe, current, total, sub_current, sub_total, eta, message = \ 161 | self.status.get() 162 | self.progress[stripe] = (current, total, sub_current, sub_total, eta, message) 163 | if stripe not in self.start_times: 164 | self.start_times[stripe] = time.time() 165 | self.print_progress() 166 | print('All processes complete.') 167 | else: 168 | self.start_times[None] = time.time() 169 | self.run_process(**kwargs) 170 | 171 | def print_progress(self, clear=True, newline=True): 172 | if self.last_progress_refresh \ 173 | and (datetime.now() - self.last_progress_refresh).seconds < 0.5: 174 | return 175 | bar_length = 10 176 | if clear: 177 | sys.stdout.write('\033[2J\033[H') #clear screen 178 | sys.stdout.write('Importing attributes\n') 179 | for stripe, msg_parts in sorted(self.progress.items()): 180 | (current, total, sub_current, sub_total, eta, message) = msg_parts 181 | sub_status = '' 182 | if total: 183 | if not eta: 184 | start_time = self.start_times[stripe] 185 | current_seconds = time.time() - start_time 186 | total_seconds = float(total)/current*current_seconds 187 | remaining_seconds = int(total_seconds - current_seconds) 188 | eta = timezone.now() + timedelta(seconds=remaining_seconds) 189 | 190 | self.stripe_counts[stripe] = (current, total) 191 | percent = current/float(total) 192 | bar = ('=' * int(percent * bar_length)).ljust(bar_length) 193 | percent = int(percent * 100) 194 | else: 195 | eta = eta or '?' 196 | percent = 0 197 | bar = ('=' * int(percent * bar_length)).ljust(bar_length) 198 | percent = '?' 199 | total = '?' 200 | if sub_current and sub_total: 201 | sub_status = '(subtask %s of %s) ' % (sub_current, sub_total) 202 | sys.stdout.write( 203 | (('' if newline else '\r') + \ 204 | "%s [%s] %s of %s %s%s%% eta=%s: %s"+('\n' if newline else '')) \ 205 | % (stripe, bar, current, total, sub_status, percent, eta, message)) 206 | sys.stdout.flush() 207 | self.last_progress_refresh = datetime.now() 208 | 209 | # Update job. 210 | overall_current_count = 0 211 | overall_total_count = 0 212 | for stripe, (current, total) in six.iteritems(self.stripe_counts): 213 | overall_current_count += current 214 | overall_total_count += total 215 | #print('overall_current_count:',overall_current_count 216 | #print('overall_total_count:',overall_total_count 217 | if overall_total_count and Job: 218 | Job.update_progress( 219 | total_parts_complete=overall_current_count, 220 | total_parts=overall_total_count, 221 | ) 222 | # if not self.dryrun: 223 | # transaction.commit() 224 | 225 | def run_process(self, status=None, **kwargs): 226 | tmp_debug = settings.DEBUG 227 | settings.DEBUG = False 228 | #transaction.enter_transaction_management() 229 | #transaction.managed(True) 230 | try: 231 | print('Running process:', kwargs) 232 | self.import_attributes(status=status, **kwargs) 233 | print('Done process:', kwargs) 234 | finally: 235 | settings.DEBUG = tmp_debug 236 | #if self.dryrun: 237 | # print('This is a dryrun, so no changes were committed.' 238 | # transaction.rollback() 239 | #else: 240 | # transaction.commit() 241 | #transaction.leave_transaction_management() 242 | connection.close() 243 | 244 | def import_attributes(self, status=None, **kwargs): 245 | stripe = kwargs.get('stripe') 246 | # reraise = kwargs.get('reraise') 247 | 248 | current_count = 0 # pylint: disable=unused-variable 249 | total_count = 0 250 | # fatal_errors = False 251 | # fatal_error = None 252 | estimated_completion_datetime = None 253 | sub_current = 0 254 | sub_total = 0 255 | 256 | def print_status(message, count=None, total=None): 257 | #print('message:',message 258 | current_count = count or 0 259 | total_count = total or 0 260 | if status: 261 | status.put([ 262 | stripe, 263 | current_count+1, 264 | total_count, 265 | sub_current, 266 | sub_total, 267 | estimated_completion_datetime, 268 | message, 269 | ]) 270 | else: 271 | #print('total_count:',total_count 272 | self.progress[stripe] = ( 273 | current_count, 274 | total_count, 275 | sub_current, 276 | sub_total, 277 | estimated_completion_datetime, 278 | message, 279 | ) 280 | self.print_progress(clear=False, newline=True) 281 | 282 | stripe_num, stripe_mod = parse_stripe(stripe) 283 | if stripe: 284 | print_status('Striping with number %i and modulus %i.' % (stripe_num, stripe_mod)) 285 | 286 | try: 287 | # Get a file from the index. 288 | # It may or may not be present on our hard disk. 289 | # If it's not, it will be downloaded 290 | # the first time we try to access it, or you can call 291 | # .download() explicitly. 292 | q = models.Index.objects.filter( 293 | year__gte=self.start_year, 294 | year__lte=self.end_year, 295 | company__load__exact=True) 296 | if not self.force: 297 | q = q.filter( 298 | attributes_loaded__exact=False, 299 | valid__exact=True, 300 | ) 301 | if self.forms: 302 | q = q.filter(form__in=self.forms) 303 | 304 | q2 = q 305 | if self.cik: 306 | q = q.filter(company__cik=self.cik, company__load=True) 307 | q2 = q2.filter(company__cik=self.cik) 308 | if not q.count() and q2.count(): 309 | print(('Warning: the company you specified with cik %s is not ' 310 | 'marked for loading.') % (self.cik,), file=sys.stderr) 311 | 312 | if stripe is not None: 313 | q = q.extra( 314 | where=['((django_sec_index.id %%%% %i) = %i)' % (stripe_mod, stripe_num)]) 315 | 316 | #print_status('Finding total record count...') 317 | #print('query:', q.query 318 | total_count = total = q.count() 319 | print('total_count:', total_count) 320 | 321 | if kwargs['show_pending']: 322 | print('='*80) 323 | print('%i total pending records' % total_count) 324 | return 325 | 326 | print_status('%i total rows.' % (total,)) 327 | i = 0 328 | commit_freq = 100 329 | print_status('%i indexes found for forms %s.' \ 330 | % (total, ', '.join(self.forms)), count=0, total=total) 331 | for ifile in q.iterator(): 332 | i += 1 333 | current_count = i 334 | 335 | msg = 'Processing index %s.' % (ifile.filename,) 336 | print_status(msg, count=i, total=total) 337 | 338 | if not i % commit_freq: 339 | sys.stdout.flush() 340 | 341 | ifile.download(verbose=self.verbose) 342 | 343 | # Initialize XBRL parser and populate an attribute called fields with 344 | # a dict of 50 common terms. 345 | x = None 346 | error = None 347 | try: 348 | x = ifile.xbrl() 349 | except Exception as e: 350 | ferr = StringIO() 351 | traceback.print_exc(file=ferr) 352 | error = ferr.getvalue() 353 | 354 | if x is None: 355 | if error is None: 356 | error = 'No XBRL found.' 357 | models.Index.objects.filter(id=ifile.id)\ 358 | .update(valid=False, error=error) 359 | continue 360 | 361 | maxretries = 10 362 | retry = 0 363 | #for retry in xrange(maxretries): 364 | while 1: 365 | try: 366 | 367 | #x.loadYear(2) 368 | # print'Year:', x.fields['FiscalYear'] 369 | company = ifile.company 370 | max_text_len = 0 371 | # unique_attrs = set() 372 | bulk_objects = [] 373 | prior_keys = set() 374 | j = sub_total = 0 375 | #print 376 | for node, sub_total in x.iter_namespace(): 377 | j += 1 378 | sub_current = j 379 | if not j % commit_freq: 380 | print_status(msg, count=i, total=total) 381 | #sys.stdout.flush() 382 | # if not self.dryrun: 383 | # transaction.commit() 384 | 385 | matches = re.findall(r'^\{([^\}]+)\}(.*)$', node.tag) 386 | if matches: 387 | ns, attr_name = matches[0] 388 | else: 389 | ns = None 390 | attr_name = node 391 | decimals = node.attrib.get('decimals', None) 392 | if decimals is None: 393 | continue 394 | if decimals.upper() == 'INF': 395 | decimals = 6 396 | decimals = int(decimals) 397 | max_text_len = max(max_text_len, len((node.text or '').strip())) 398 | context_id = node.attrib['contextRef'] 399 | # if context_id != 'D2009Q4YTD': 400 | # continue 401 | start_date = x.get_context_start_date(context_id) 402 | if not start_date: 403 | continue 404 | end_date = x.get_context_end_date(context_id) 405 | if not end_date: 406 | continue 407 | namespace, _ = models.Namespace.objects.get_or_create(name=ns.strip()) 408 | attribute, _ = models.Attribute.objects.get_or_create( 409 | namespace=namespace, 410 | name=attr_name, 411 | defaults=dict(load=True), 412 | ) 413 | if not attribute.load: 414 | continue 415 | unit, _ = models.Unit.objects.get_or_create( 416 | name=node.attrib['unitRef'].strip()) 417 | unit.save() 418 | value = (node.text or '').strip() 419 | if not value: 420 | continue 421 | assert len(value.split('.')[0]) <= c.MAX_QUANTIZE, \ 422 | 'Value too large, must be less than %i digits: %i %s' \ 423 | % (c.MAX_QUANTIZE, len(value), repr(value)) 424 | 425 | models.Attribute.objects.filter(id=attribute.id).update(total_values_fresh=False) 426 | 427 | if models.AttributeValue.objects.filter(company=company, attribute=attribute, start_date=start_date).exists(): 428 | continue 429 | 430 | # Some attributes are listed multiple times in differently 431 | # named contexts even though the value and date ranges are 432 | # identical. 433 | key = (company, attribute, start_date) 434 | if key in prior_keys: 435 | continue 436 | prior_keys.add(key) 437 | 438 | bulk_objects.append(models.AttributeValue( 439 | company=company, 440 | attribute=attribute, 441 | start_date=start_date, 442 | end_date=end_date, 443 | value=value, 444 | unit=unit, 445 | filing_date=ifile.date, 446 | )) 447 | 448 | if not len(bulk_objects) % commit_freq: 449 | models.AttributeValue.objects.bulk_create(bulk_objects) 450 | bulk_objects = [] 451 | prior_keys.clear() 452 | 453 | # if not self.dryrun: 454 | # transaction.commit() 455 | # print('\rImporting attribute %i of %i.' % (sub_total, sub_total), 456 | # print 457 | print_status('Importing attributes.', count=i, total=total) 458 | 459 | if bulk_objects: 460 | models.AttributeValue.objects.bulk_create(bulk_objects) 461 | bulk_objects = [] 462 | 463 | ticker = ifile.ticker() 464 | models.Index.objects\ 465 | .filter(id=ifile.id)\ 466 | .update(attributes_loaded=True, _ticker=ticker) 467 | 468 | models.Attribute.do_update() 469 | 470 | models.Unit.do_update() 471 | 472 | # if not self.dryrun: 473 | # transaction.commit() 474 | 475 | break 476 | 477 | except DatabaseError as e: 478 | if retry+1 == maxretries: 479 | raise 480 | print(e, 'retry', retry) 481 | connection.close() 482 | time.sleep(random.random()*5) 483 | 484 | except TransactionRollbackError as e: 485 | if TransactionRollbackError.__name__ != 'TransactionRollbackError': 486 | raise 487 | if retry+1 == maxretries: 488 | raise 489 | print(e, 'retry', retry) 490 | connection.close() 491 | time.sleep(random.random()*5) 492 | 493 | except Exception as e: 494 | print('Error extracting attributes: %s' % e) 495 | ferr = StringIO() 496 | traceback.print_exc(file=ferr) 497 | error = ferr.getvalue() 498 | print('Fatal error: %s' % error) 499 | print_status('Fatal error: %s' % (error,)) 500 | finally: 501 | connection.close() 502 | -------------------------------------------------------------------------------- /django_sec/management/commands/sec_import_index.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import os 4 | import sys 5 | from zipfile import ZipFile 6 | from datetime import date, datetime, timedelta 7 | from optparse import make_option 8 | 9 | import django 10 | from django.core.management.base import BaseCommand 11 | from django.db import transaction, connection 12 | from django.conf import settings 13 | from django.utils import timezone 14 | from django.utils.encoding import force_text 15 | from django.db.transaction import TransactionManagementError 16 | 17 | import six 18 | from six.moves.urllib.request import urlopen 19 | 20 | from django_sec.models import Company, Index, IndexFile, DATA_DIR 21 | 22 | def removeNonAscii(s): 23 | return "".join(i for i in s if ord(i) < 128) 24 | 25 | def get_options(parser=None): 26 | make_opt = make_option 27 | if parser: 28 | make_opt = parser.add_argument 29 | return [ 30 | make_opt('--start-year', 31 | default=None), 32 | make_opt('--end-year', 33 | default=None), 34 | make_opt('--quarter', 35 | default=None), 36 | make_opt('--delete-prior-indexes', 37 | action='store_true', 38 | default=False), 39 | make_opt('--reprocess', 40 | action='store_true', 41 | default=False), 42 | make_opt('--auto-reprocess-last-n-days', 43 | default=90, 44 | help='The number of days to automatically redownload and reprocess index files.'), 45 | make_opt('--max-lines', 46 | default=0), 47 | make_opt('--dryrun', 48 | action='store_true', 49 | default=False), 50 | ] 51 | 52 | class Command(BaseCommand): 53 | help = "Download new files representing one month of 990s, ignoring months we already have. "\ 54 | "Each quarter contains hundreds of thousands of filings; will take a while to run. " 55 | #args = '' 56 | option_list = getattr(BaseCommand, 'option_list', ()) + tuple(get_options()) 57 | 58 | def create_parser(self, prog_name, subcommand): 59 | """ 60 | For ``Django>=1.10`` 61 | Create and return the ``ArgumentParser`` which extends ``BaseCommand`` parser with 62 | chroniker extra args and will be used to parse the arguments to this command. 63 | """ 64 | from distutils.version import StrictVersion # pylint: disable=E0611 65 | parser = super(Command, self).create_parser(prog_name, subcommand) 66 | version_threshold = StrictVersion('1.10') 67 | current_version = StrictVersion(django.get_version(django.VERSION)) 68 | if current_version >= version_threshold: 69 | get_options(parser) 70 | self.add_arguments(parser) 71 | return parser 72 | 73 | def handle(self, **options): 74 | 75 | self.dryrun = options['dryrun'] 76 | 77 | max_lines = int(options['max_lines']) 78 | 79 | start_year = options['start_year'] 80 | if start_year: 81 | start_year = int(start_year) 82 | else: 83 | start_year = date.today().year - 1 84 | 85 | end_year = options['end_year'] 86 | if end_year: 87 | end_year = int(end_year) 88 | else: 89 | end_year = date.today().year+1 90 | 91 | reprocess = options['reprocess'] 92 | 93 | target_quarter = options['quarter'] 94 | if target_quarter: 95 | target_quarter = int(target_quarter) 96 | 97 | auto_reprocess_last_n_days = int(options['auto_reprocess_last_n_days']) 98 | 99 | tmp_debug = settings.DEBUG 100 | settings.DEBUG = False 101 | try: 102 | for year in range(start_year, end_year): 103 | for quarter in range(4): 104 | if target_quarter and quarter+1 != target_quarter: 105 | continue 106 | quarter_start = date(year, quarter*3+1, 1) 107 | cutoff_date = date.today() - timedelta(days=auto_reprocess_last_n_days) 108 | _reprocess = reprocess or (quarter_start > cutoff_date) 109 | self.get_filing_list( 110 | year, 111 | quarter+1, 112 | reprocess=_reprocess, 113 | max_lines=max_lines) 114 | finally: 115 | settings.DEBUG = tmp_debug 116 | connection.close() 117 | 118 | def get_filing_list(self, year, quarter, reprocess=False, max_lines=None): 119 | """ 120 | Gets the list of filings and download locations for the given year and quarter. 121 | """ 122 | 123 | def commit(): 124 | try: 125 | transaction.commit() 126 | except TransactionManagementError: 127 | pass 128 | 129 | url = 'https://www.sec.gov/Archives/edgar/full-index/%d/QTR%d/company.zip' % (year, quarter) 130 | 131 | # Download the data and save to a file 132 | if not os.path.isdir(DATA_DIR): 133 | os.makedirs(DATA_DIR) 134 | fn = os.path.join(DATA_DIR, 'company_%d_%d.zip' % (year, quarter)) 135 | 136 | ifile, _ = IndexFile.objects.get_or_create( 137 | year=year, quarter=quarter, defaults=dict(filename=fn)) 138 | if ifile.processed and not reprocess: 139 | return 140 | ifile.filename = fn 141 | 142 | if os.path.exists(fn) and reprocess: 143 | print('Deleting old file %s.' % fn) 144 | os.remove(fn) 145 | 146 | if self.dryrun: 147 | return 148 | 149 | if not os.path.exists(fn): 150 | print('Downloading %s.' % (url,)) 151 | try: 152 | compressed_data = urlopen(url).read() 153 | except IOError as e: 154 | print('Unable to download url: %s' % e) 155 | return 156 | fileout = open(fn, 'wb') 157 | fileout.write(compressed_data) 158 | fileout.close() 159 | ifile.downloaded = timezone.now() 160 | 161 | if not ifile.downloaded: 162 | ifile.downloaded = timezone.now() 163 | ifile.save() 164 | commit() 165 | 166 | # Extract the compressed file 167 | print('Opening index file %s.' % (fn,)) 168 | zipf = ZipFile(fn) 169 | zdata = zipf.read('company.idx') 170 | #zdata = removeNonAscii(zdata) 171 | 172 | # Parse the fixed-length fields 173 | bulk_companies = [] 174 | bulk_indexes = [] 175 | bulk_commit_freq = 1000 176 | status_secs = 3 177 | 178 | # In Python3, default type is now bytes, so we have to convert back to string. 179 | if not isinstance(zdata, six.string_types): 180 | zdata = zdata.decode() 181 | 182 | lines = zdata.split('\n') 183 | i = 0 184 | total = len(lines) 185 | IndexFile.objects.filter(id=ifile.id).update(total_rows=total) 186 | last_status = None 187 | prior_keys = set() 188 | #print('Found %i prior index keys.' % len(prior_keys) 189 | prior_ciks = set(Company.objects.all().values_list('cik', flat=True)) 190 | print('Found %i prior ciks.' % len(prior_ciks)) 191 | index_add_count = 0 192 | company_add_count = 0 193 | for r in lines[10:]: # Note, first 10 lines are useless headers. 194 | i += 1 195 | if not reprocess and ifile.processed_rows and i < ifile.processed_rows: 196 | continue 197 | if not last_status or ((datetime.now() - last_status).seconds >= status_secs): 198 | sys.stdout.write( 199 | '\rProcessing record %i of %i (%.02f%%).' % (i, total, float(i)/total*100)) 200 | sys.stdout.flush() 201 | last_status = datetime.now() 202 | IndexFile.objects.filter(id=ifile.id).update(processed_rows=i) 203 | dt = r[86:98].strip() 204 | if not dt: 205 | continue 206 | dt = date(*map(int, dt.split('-'))) 207 | if r.strip() == '': 208 | continue 209 | name = r[0:62].strip() 210 | 211 | cik = int(r[74:86].strip()) 212 | if cik not in prior_ciks: 213 | company_add_count += 1 214 | prior_ciks.add(cik) 215 | bulk_companies.append(Company(cik=cik, name=force_text(name, errors='replace'))) 216 | 217 | filename = r[98:].strip() 218 | key = (cik, dt, filename)#, year, quarter) 219 | if key in prior_keys: 220 | continue 221 | prior_keys.add(key) 222 | if Index.objects.filter(company__cik=cik, date=dt, filename=filename).exists(): 223 | continue 224 | index_add_count += 1 225 | bulk_indexes.append(Index( 226 | company_id=cik, 227 | form=r[62:74].strip(), # form type 228 | date=dt, # date filed 229 | year=year, 230 | quarter=quarter, 231 | filename=filename, 232 | )) 233 | if not len(bulk_indexes) % bulk_commit_freq: 234 | if len(bulk_companies): 235 | Company.objects.bulk_create(bulk_companies) 236 | bulk_companies = [] 237 | Index.objects.bulk_create(bulk_indexes) 238 | bulk_indexes = [] 239 | commit() 240 | 241 | # Mainly used during unittesting to limit processing. 242 | if max_lines and i >= max_lines: 243 | break 244 | 245 | if bulk_indexes: 246 | if len(bulk_companies): 247 | Company.objects.bulk_create(bulk_companies) 248 | bulk_companies = [] 249 | Index.objects.bulk_create(bulk_indexes) 250 | IndexFile.objects.filter(id=ifile.id).update(processed=timezone.now()) 251 | commit() 252 | 253 | print('\rProcessing record %i of %i (%.02f%%).' % (total, total, 100)) 254 | print() 255 | print('%i new companies found.' % company_add_count) 256 | print('%i new indexes found.' % index_add_count) 257 | sys.stdout.flush() 258 | IndexFile.objects.filter(id=ifile.id).update(processed_rows=total) 259 | -------------------------------------------------------------------------------- /django_sec/management/commands/sec_mark_units.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import sys 4 | from optparse import make_option 5 | 6 | import django 7 | from django.core.management.base import BaseCommand 8 | from django.db.models import Q 9 | from django.conf import settings 10 | 11 | from django_sec import models 12 | 13 | def get_options(parser=None): 14 | make_opt = make_option 15 | if parser: 16 | make_opt = parser.add_argument 17 | return [ 18 | make_opt('--name', default=None), 19 | ] 20 | 21 | class Command(BaseCommand): 22 | help = "Links duplicate units to the true canonical unit." 23 | args = '' 24 | option_list = getattr(BaseCommand, 'option_list', ()) + tuple(get_options()) 25 | 26 | def create_parser(self, prog_name, subcommand): 27 | """ 28 | For ``Django>=1.10`` 29 | Create and return the ``ArgumentParser`` which extends ``BaseCommand`` parser with 30 | chroniker extra args and will be used to parse the arguments to this command. 31 | """ 32 | from distutils.version import StrictVersion # pylint: disable=E0611 33 | parser = super(Command, self).create_parser(prog_name, subcommand) 34 | version_threshold = StrictVersion('1.10') 35 | current_version = StrictVersion(django.get_version(django.VERSION)) 36 | if current_version >= version_threshold: 37 | get_options(parser) 38 | self.add_arguments(parser) 39 | return parser 40 | 41 | def handle(self, **options): 42 | 43 | settings.DEBUG = False 44 | 45 | only_name = options['name'] 46 | 47 | qs = models.Unit.objects.all() 48 | if only_name: 49 | qs = qs.filter(name__icontains=only_name) 50 | total = qs.count() 51 | i = 0 52 | dups = set() 53 | 54 | # Link all singular to plurals. 55 | qs = models.Unit.objects.filter(master=True) 56 | total = qs.count() 57 | i = 0 58 | for r in qs.iterator(): 59 | i += 1 60 | sys.stdout.write('\r%i of %i' % (i, total)) 61 | sys.stdout.flush() 62 | 63 | plural_qs = models.Unit.objects\ 64 | .filter(master=True)\ 65 | .filter(Q(name=r.name+'s')|Q(name=r.name+'es'))\ 66 | .exclude(id=r.id) 67 | if plural_qs.exists(): 68 | models.Unit.objects.filter(true_unit=r)\ 69 | .update(true_unit=plural_qs[0], master=False) 70 | r.true_unit = plural_qs[0] 71 | r.master = False 72 | r.save() 73 | 74 | print() 75 | print('%i duplicates linked' % len(dups)) 76 | -------------------------------------------------------------------------------- /django_sec/management/commands/sec_xbrl_to_csv.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import csv 4 | 5 | from django.core.management.base import BaseCommand 6 | import psycopg2 7 | 8 | from django_sec.models import * 9 | 10 | #TODO:remove? deprecated? 11 | class Command(BaseCommand): 12 | help = "Put the 50+ common accounting terms from an arbitrary list of 10ks into a spreadsheet" 13 | 14 | def handle(self, csv_fn, **options): 15 | 16 | headers = [ 17 | 'EntityRegistrantName', 'EntityCentralIndexKey', 'EntityFilerCategory', 18 | 'TradingSymbol', 'FiscalYear', 'FiscalPeriod', 'DocumentType', 'PeriodStartDate', 19 | 'DocumentPeriodEndDate', 'Assets', 'CurrentAssets', 'NoncurrentAssets', 20 | 'LiabilitiesAndEquity', 'Liabilities', 'CurrentLiabilities', 'NoncurrentLiabilities', 21 | 'CommitmentsAndContingencies', 'TemporaryEquity', 'Equity', 22 | 'EquityAttributableToParent', 'EquityAttributableToNoncontrollingInterest', 'Revenues', 23 | 'CostOfRevenue', 'GrossProfit', 'OperatingExpenses', 'CostsAndExpenses', 24 | 'OtherOperatingIncome', 'OperatingIncomeLoss', 'NonoperatingIncomeLoss', 25 | 'InterestAndDebtExpense', 'NonoperatingIncomeLossPlusInterestAndDebtExpense', 26 | 'IncomeBeforeEquityMethodInvestments', 'IncomeFromEquityMethodInvestments', 27 | 'IncomeFromContinuingOperationsBeforeTax', 'IncomeTaxExpenseBenefit', 28 | 'IncomeFromContinuingOperationsAfterTax', 'IncomeFromDiscontinuedOperations', 29 | 'ExtraordaryItemsGainLoss', 'NetIncomeLoss', 'NetIncomeAttributableToParent', 30 | 'NetIncomeAttributableToNoncontrollingInterest', 31 | 'PreferredStockDividendsAndOtherAdjustments', 32 | 'NetIncomeAvailableToCommonStockholdersBasic', 'ComprehensiveIncome', 33 | 'OtherComprehensiveIncome', 'NetCashFlowsOperating', 34 | 'NetCashFlowsOperatingContinuing', 'NetCashFlowsOperatingDiscontinued', 35 | 'NetCashFlowsInvesting', 'NetCashFlowsInvestingContinuing', 36 | 'NetCashFlowsInvestingDiscontinued', 'NetCashFlowsFinancing', 37 | 'NetCashFlowsFinancingContinuing', 'NetCashFlowsFinancingDiscontinued', 38 | 'NetCashFlowsContinuing', 'NetCashFlowsDiscontinued', 'ExchangeGainsLosses', 39 | 'NetCashFlow', 'ComprehensiveIncomeAttributableToParent', 40 | 'ComprehensiveIncomeAttributableToNoncontrollingInterest', 'SGR', 'ROA', 'ROE', 41 | 'ROS', 'SECFilingPage', 'LinkToXBRLInstance', 42 | ] 43 | 44 | fout = csv.DictWriter(open(csv_fn, 'w'), headers) 45 | fout.writeheader() 46 | 47 | #this SQL is just a way of getting a list of particular CIKs I want 48 | conn = psycopg2.connect("dbname=recovery") 49 | cur = conn.cursor() 50 | cur.execute("SELECT cik, ticker FROM index WHERE cik is not null and use='1';") 51 | rows = cur.fetchall() 52 | for row in rows: 53 | cik = row[0] 54 | print('cik:', cik) 55 | for year in range(2011, 2014): 56 | latest = Index.objects.filter( 57 | form='10-K', 58 | cik=cik, 59 | quarter__startswith=year 60 | ).order_by('-date') 61 | if len(latest): 62 | latest = latest[0] 63 | latest.download() 64 | x = latest.xbrl() 65 | if x is None: 66 | print('no xbrl for ', cik, year) 67 | continue 68 | 69 | d = {} 70 | for f in headers: 71 | if f in x.fields.keys(): 72 | d[f] = x.fields[f] 73 | else: 74 | d[f] = '' 75 | d['FiscalPeriod'] = x.fields['DocumentFiscalPeriodFocus'] 76 | d['FiscalYear'] = x.fields['DocumentFiscalYearFocus'] 77 | d['DocumentPeriodEndDate'] = x.fields['BalanceSheetDate'] 78 | d['PeriodStartDate'] = x.fields['IncomeStatementPeriodYTD'] 79 | d['SECFilingPage'] = latest.index_link() 80 | d['LinkToXBRLInstance'] = latest.xbrl_link() 81 | 82 | fout.writerow(d) 83 | -------------------------------------------------------------------------------- /django_sec/migrations/0001_initial.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by Django 1.9 on 2015-12-26 21:44 3 | from __future__ import unicode_literals 4 | 5 | from django.db import migrations, models 6 | import django.db.models.deletion 7 | 8 | 9 | class Migration(migrations.Migration): 10 | 11 | initial = True 12 | 13 | dependencies = [ 14 | ] 15 | 16 | operations = [ 17 | migrations.CreateModel( 18 | name='Attribute', 19 | fields=[ 20 | ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), 21 | ('name', models.CharField(db_index=True, max_length=200)), 22 | ('load', models.BooleanField(db_index=True, default=False, help_text='If checked, all values will be loaded for this attribute.')), 23 | ('total_values', models.PositiveIntegerField(blank=True, null=True)), 24 | ('total_values_fresh', models.BooleanField(default=False, verbose_name=b'fresh')), 25 | ], 26 | ), 27 | migrations.CreateModel( 28 | name='AttributeValue', 29 | fields=[ 30 | ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), 31 | ('value', models.DecimalField(decimal_places=6, max_digits=40)), 32 | ('start_date', models.DateField(db_index=True, help_text='If attribute implies a duration, this is the date\n the duration begins. If the attribute implies an instance, this\n is the exact date it applies to.')), 33 | ('end_date', models.DateField(blank=True, help_text='If this attribute implies a duration, this is the date\n the duration ends.', null=True)), 34 | ('filing_date', models.DateField(help_text='The date this information became publically available.')), 35 | ('attribute', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='values', to='django_sec.Attribute')), 36 | ], 37 | options={ 38 | 'ordering': ('-attribute__total_values', '-start_date', 'attribute__name'), 39 | }, 40 | ), 41 | migrations.CreateModel( 42 | name='Company', 43 | fields=[ 44 | ('cik', models.IntegerField(db_index=True, help_text='Central index key that uniquely identifies a filing entity.', primary_key=True, serialize=False)), 45 | ('name', models.CharField(db_index=True, help_text='The name of the company.', max_length=100)), 46 | ('load', models.BooleanField(db_index=True, default=False, help_text='If checked, all values for load-enabled attributes will be loaded for this company.')), 47 | ('min_date', models.DateField(blank=True, db_index=True, editable=False, help_text='The oldest date of associated SEC Edgar filings\n for this company.', null=True)), 48 | ('max_date', models.DateField(blank=True, db_index=True, editable=False, help_text='The most recent date of associated SEC Edgar filings\n for this company.', null=True)), 49 | ], 50 | options={ 51 | 'verbose_name_plural': 'companies', 52 | }, 53 | ), 54 | migrations.CreateModel( 55 | name='Index', 56 | fields=[ 57 | ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), 58 | ('form', models.CharField(blank=True, db_index=True, help_text='The type of form the document is classified as.', max_length=10, verbose_name='form type')), 59 | ('date', models.DateField(db_index=True, help_text='The date the item was filed with the SEC.', verbose_name='date filed')), 60 | ('filename', models.CharField(db_index=True, help_text='The name of the associated financial filing.', max_length=100)), 61 | ('year', models.IntegerField(db_index=True)), 62 | ('quarter', models.IntegerField(db_index=True)), 63 | ('_ticker', models.CharField(blank=True, db_column='ticker', db_index=True, help_text='Caches the trading symbol if one is detected in the\n filing during attribute load.', max_length=50, null=True, verbose_name='ticker')), 64 | ('attributes_loaded', models.BooleanField(db_index=True, default=False)), 65 | ('valid', models.BooleanField(db_index=True, default=True, help_text='If false, errors were encountered trying to parse the associated files.')), 66 | ('error', models.TextField(blank=True, null=True)), 67 | ('company', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='filings', to='django_sec.Company')), 68 | ], 69 | options={ 70 | 'ordering': ('-date', 'filename'), 71 | 'verbose_name_plural': 'indexes', 72 | }, 73 | ), 74 | migrations.CreateModel( 75 | name='IndexFile', 76 | fields=[ 77 | ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), 78 | ('year', models.IntegerField(db_index=True)), 79 | ('quarter', models.IntegerField(db_index=True)), 80 | ('filename', models.CharField(max_length=100)), 81 | ('total_rows', models.PositiveIntegerField(blank=True, null=True)), 82 | ('processed_rows', models.PositiveIntegerField(blank=True, null=True)), 83 | ('downloaded', models.DateTimeField(blank=True, null=True)), 84 | ('processed', models.DateTimeField(blank=True, null=True)), 85 | ], 86 | options={ 87 | 'ordering': ('year', 'quarter'), 88 | }, 89 | ), 90 | migrations.CreateModel( 91 | name='Namespace', 92 | fields=[ 93 | ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), 94 | ('name', models.CharField(max_length=100, unique=True)), 95 | ], 96 | ), 97 | migrations.CreateModel( 98 | name='Unit', 99 | fields=[ 100 | ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), 101 | ('name', models.CharField(db_index=True, max_length=50, unique=True)), 102 | ('master', models.BooleanField(default=True, editable=False, help_text='If true, indicates this unit is the master referred to by duplicates.')), 103 | ('true_unit', models.ForeignKey(blank=True, help_text='Points the the unit record this record duplicates.\n Points to itself if this is the master unit.', null=True, on_delete=django.db.models.deletion.SET_NULL, to='django_sec.Unit')), 104 | ], 105 | options={ 106 | 'ordering': ('name',), 107 | }, 108 | ), 109 | migrations.AlterUniqueTogether( 110 | name='indexfile', 111 | unique_together=set([('year', 'quarter')]), 112 | ), 113 | migrations.AddField( 114 | model_name='attributevalue', 115 | name='company', 116 | field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='attributes', to='django_sec.Company'), 117 | ), 118 | migrations.AddField( 119 | model_name='attributevalue', 120 | name='unit', 121 | field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='django_sec.Unit'), 122 | ), 123 | migrations.AddField( 124 | model_name='attribute', 125 | name='namespace', 126 | field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='django_sec.Namespace'), 127 | ), 128 | migrations.AlterUniqueTogether( 129 | name='index', 130 | unique_together=set([('company', 'form', 'date', 'filename', 'year', 'quarter')]), 131 | ), 132 | migrations.AlterIndexTogether( 133 | name='index', 134 | index_together=set([('company', 'date', 'filename'), ('year', 'quarter')]), 135 | ), 136 | migrations.AlterUniqueTogether( 137 | name='attributevalue', 138 | unique_together=set([('company', 'attribute', 'start_date', 'end_date')]), 139 | ), 140 | migrations.AlterIndexTogether( 141 | name='attributevalue', 142 | index_together=set([('company', 'attribute', 'start_date')]), 143 | ), 144 | migrations.AlterUniqueTogether( 145 | name='attribute', 146 | unique_together=set([('namespace', 'name')]), 147 | ), 148 | migrations.AlterIndexTogether( 149 | name='attribute', 150 | index_together=set([('namespace', 'name')]), 151 | ), 152 | ] 153 | -------------------------------------------------------------------------------- /django_sec/migrations/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Django migrations for django_sec app 3 | 4 | This package does not contain South migrations. South migrations can be found 5 | in the ``south_migrations`` package. 6 | """ 7 | 8 | SOUTH_ERROR_MESSAGE = """\n 9 | For South support, customize the SOUTH_MIGRATION_MODULES setting like so: 10 | 11 | SOUTH_MIGRATION_MODULES = { 12 | 'django_sec': 'django_sec.south_migrations', 13 | } 14 | """ 15 | 16 | # Ensure the user is not using Django 1.6 or below with South 17 | try: 18 | from django.db import migrations # noqa 19 | except ImportError: 20 | from django.core.exceptions import ImproperlyConfigured 21 | raise ImproperlyConfigured(SOUTH_ERROR_MESSAGE) 22 | -------------------------------------------------------------------------------- /django_sec/models.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import os 4 | import sys 5 | import re 6 | import zipfile 7 | 8 | # from six.moves.urllib.request import urlopen 9 | import wget 10 | 11 | from django.db import models 12 | from django.db.models import Min, Max 13 | from django.utils.translation import ugettext_lazy as _ 14 | 15 | try: 16 | from admin_steroids.utils import StringWithTitle 17 | APP_LABEL = StringWithTitle('django_sec', 'SEC') 18 | except ImportError: 19 | APP_LABEL = 'django_sec' 20 | 21 | from django_sec import xbrl 22 | 23 | from . import constants as c 24 | from .settings import DATA_DIR 25 | 26 | def clean_unit_name(s): 27 | s = re.sub(r'[^a-z0-9]+', '', str(s).strip().lower()) 28 | return s 29 | 30 | class Namespace(models.Model): 31 | """ 32 | Represents an XBRL namespace used to segment attribute names. 33 | """ 34 | 35 | name = models.CharField( 36 | # Causes MySQL error: 37 | # Specified key was too long; max key length is 767 bytes 38 | # Note, PostreSQL has much higher limits. 39 | #max_length=255, 40 | max_length=100, 41 | blank=False, 42 | null=False, 43 | #db_index=True, 44 | unique=True) 45 | 46 | class Meta: 47 | app_label = APP_LABEL 48 | 49 | def __unicode__(self): 50 | return self.name 51 | 52 | class UnitManager(models.Manager): 53 | 54 | def get_by_natural_key(self, name, *true_unit_nk): 55 | 56 | true_unit = None 57 | if true_unit_nk: 58 | true_unit = Unit.objects.get_by_natural_key(*true_unit_nk) 59 | 60 | u, _ = Unit.objects.get_or_create(name=name) 61 | u.true_unit = true_unit 62 | u.save() 63 | 64 | return u 65 | 66 | class Unit(models.Model): 67 | """ 68 | Represents a numeric unit. 69 | """ 70 | 71 | objects = UnitManager() 72 | 73 | name = models.CharField( 74 | max_length=50, 75 | blank=False, 76 | null=False, 77 | db_index=True, 78 | unique=True) 79 | 80 | true_unit = models.ForeignKey( 81 | 'self', 82 | on_delete=models.SET_NULL, 83 | blank=True, 84 | null=True, 85 | help_text=_('''Points the the unit record this record duplicates. 86 | Points to itself if this is the master unit.''')) 87 | 88 | master = models.BooleanField( 89 | default=True, 90 | editable=False, 91 | help_text=_('If true, indicates this unit is the master referred to by duplicates.')) 92 | 93 | class Meta: 94 | app_label = APP_LABEL 95 | ordering = ( 96 | 'name', 97 | ) 98 | 99 | def __unicode__(self): 100 | return self.name 101 | 102 | def natural_key(self): 103 | parts = (self.name,) 104 | if self.true_unit != self: 105 | parts += self.true_unit.natural_key() 106 | return parts 107 | 108 | def save(self, *args, **kwargs): 109 | assert self.name.strip() 110 | if self.id: 111 | self.true_unit = self.true_unit or self 112 | self.master = self == self.true_unit 113 | if self.id: 114 | assert self.true_unit.master 115 | super(Unit, self).save(*args, **kwargs) 116 | 117 | @classmethod 118 | def do_update(cls, *args, **kwargs): 119 | q = cls.objects.filter(true_unit__isnull=True) 120 | for r in q.iterator(): 121 | r.save() 122 | 123 | class Attribute(models.Model): 124 | """ 125 | Represents a financial attribute tag. 126 | """ 127 | 128 | namespace = models.ForeignKey('Namespace') 129 | 130 | name = models.CharField( 131 | max_length=200, 132 | blank=False, 133 | null=False, 134 | db_index=True) 135 | 136 | load = models.BooleanField( 137 | default=False, 138 | db_index=True, 139 | help_text=_('If checked, all values will be loaded for this attribute.')) 140 | 141 | total_values = models.PositiveIntegerField( 142 | blank=True, 143 | null=True, 144 | editable=True) 145 | 146 | total_values_fresh = models.BooleanField( 147 | default=False, 148 | verbose_name='fresh') 149 | 150 | class Meta: 151 | app_label = APP_LABEL 152 | unique_together = ( 153 | ('namespace', 'name'), 154 | ) 155 | index_together = ( 156 | ('namespace', 'name'), 157 | ) 158 | 159 | def __unicode__(self): 160 | return '{%s}%s' % (self.namespace, self.name) 161 | 162 | @classmethod 163 | def do_update(cls, *args, **kwargs): 164 | verbose = kwargs.pop('verbose', False) 165 | q = cls.objects.filter(total_values_fresh=False).only('id', 'name') 166 | total = q.count() 167 | i = 0 168 | for r in q.iterator(): 169 | i += 1 170 | if verbose and (i == 1 or not i % 100 or i == total): 171 | sys.stdout.write('\rRefreshing attribute %i of %i.' % (i, total)) 172 | sys.stdout.flush() 173 | total_values = AttributeValue.objects.filter(attribute__name=r.name).count() 174 | cls.objects.filter(id=r.id).update( 175 | #total_values=r.values.all().count(), 176 | total_values=total_values, 177 | total_values_fresh=True) 178 | if verbose: 179 | print('\n') 180 | 181 | class AttributeValue(models.Model): 182 | 183 | company = models.ForeignKey('Company', related_name='attributes') 184 | 185 | attribute = models.ForeignKey('Attribute', related_name='values') 186 | 187 | # Inspecting several XBRL samples, no digits above 12 characters 188 | # or decimals above 5 were found, so I've started there and added 189 | # a little more to handle future increases. 190 | value = models.DecimalField( 191 | max_digits=c.MAX_DIGITS, 192 | decimal_places=c.MAX_DECIMALS, 193 | blank=False, 194 | null=False) 195 | 196 | unit = models.ForeignKey('Unit') 197 | 198 | start_date = models.DateField( 199 | blank=False, 200 | null=False, 201 | db_index=True, 202 | help_text=_('''If attribute implies a duration, this is the date 203 | the duration begins. If the attribute implies an instance, this 204 | is the exact date it applies to.''')) 205 | 206 | end_date = models.DateField( 207 | blank=True, 208 | null=True, 209 | help_text=_('''If this attribute implies a duration, this is the date 210 | the duration ends.''')) 211 | 212 | filing_date = models.DateField( 213 | blank=False, 214 | null=False, 215 | help_text=_('The date this information became publically available.')) 216 | 217 | class Meta: 218 | app_label = APP_LABEL 219 | ordering = ('-attribute__total_values', '-start_date', 'attribute__name') 220 | unique_together = ( 221 | ('company', 'attribute', 'start_date', 'end_date'), 222 | ) 223 | index_together = ( 224 | ('company', 'attribute', 'start_date'), 225 | ) 226 | 227 | def __unicode__(self): 228 | return '%s %s=%s %s on %s' % ( 229 | self.company, 230 | self.attribute.name, 231 | self.value, 232 | self.unit, 233 | self.start_date, 234 | ) 235 | 236 | class IndexFile(models.Model): 237 | 238 | year = models.IntegerField( 239 | blank=False, 240 | null=False, 241 | db_index=True) 242 | 243 | quarter = models.IntegerField( 244 | blank=False, 245 | null=False, 246 | db_index=True) 247 | 248 | filename = models.CharField(max_length=100, blank=False, null=False) 249 | 250 | total_rows = models.PositiveIntegerField(blank=True, null=True) 251 | 252 | processed_rows = models.PositiveIntegerField(blank=True, null=True) 253 | 254 | downloaded = models.DateTimeField(blank=True, null=True) 255 | 256 | processed = models.DateTimeField(blank=True, null=True) 257 | 258 | class Meta: 259 | app_label = APP_LABEL 260 | ordering = ('year', 'quarter') 261 | unique_together = ( 262 | ('year', 'quarter'), 263 | ) 264 | 265 | class Company(models.Model): 266 | 267 | cik = models.IntegerField( 268 | db_index=True, 269 | primary_key=True, 270 | help_text=_('Central index key that uniquely identifies a filing entity.')) 271 | 272 | name = models.CharField( 273 | max_length=100, 274 | db_index=True, 275 | blank=False, 276 | null=False, 277 | help_text=_('The name of the company.')) 278 | 279 | load = models.BooleanField( 280 | default=False, 281 | db_index=True, 282 | help_text=_('If checked, all values for load-enabled attributes ' 283 | 'will be loaded for this company.')) 284 | 285 | min_date = models.DateField( 286 | blank=True, 287 | null=True, 288 | editable=False, 289 | db_index=True, 290 | help_text=_('''The oldest date of associated SEC Edgar filings 291 | for this company.''')) 292 | 293 | max_date = models.DateField( 294 | blank=True, 295 | null=True, 296 | editable=False, 297 | db_index=True, 298 | help_text=_('''The most recent date of associated SEC Edgar filings 299 | for this company.''')) 300 | 301 | class Meta: 302 | app_label = APP_LABEL 303 | verbose_name_plural = _('companies') 304 | 305 | def __unicode__(self): 306 | return self.name 307 | 308 | def save(self, *args, **kwargs): 309 | if self.cik: 310 | try: 311 | old = type(self).objects.get(cik=self.cik) 312 | 313 | aggs = self.attributes.all()\ 314 | .aggregate(Min('start_date'), Max('start_date')) 315 | self.min_date = aggs['start_date__min'] 316 | self.max_date = aggs['start_date__max'] 317 | 318 | if not old.load and self.load: 319 | # If we just flag this company for loading then 320 | # flag this company's indexes for loading. 321 | Index.objects.filter( 322 | company=self, attributes_loaded=True 323 | ).update(attributes_loaded=False) 324 | except type(self).DoesNotExist: 325 | pass 326 | super(Company, self).save(*args, **kwargs) 327 | 328 | class Index(models.Model): 329 | 330 | company = models.ForeignKey( 331 | 'Company', 332 | related_name='filings') 333 | 334 | form = models.CharField( 335 | max_length=10, 336 | blank=True, 337 | db_index=True, 338 | verbose_name=_('form type'), 339 | help_text=_('The type of form the document is classified as.')) 340 | 341 | date = models.DateField( 342 | blank=False, 343 | null=False, 344 | db_index=True, 345 | verbose_name=_('date filed'), 346 | help_text=_('The date the item was filed with the SEC.')) 347 | 348 | filename = models.CharField( 349 | max_length=100, 350 | blank=False, 351 | null=False, 352 | db_index=True, 353 | help_text=_('The name of the associated financial filing.')) 354 | 355 | year = models.IntegerField( 356 | blank=False, 357 | null=False, 358 | db_index=True) 359 | 360 | quarter = models.IntegerField( 361 | blank=False, 362 | null=False, 363 | db_index=True) 364 | 365 | _ticker = models.CharField( 366 | max_length=50, 367 | db_index=True, 368 | db_column='ticker', 369 | verbose_name=_('ticker'), 370 | blank=True, 371 | null=True, 372 | help_text=_('''Caches the trading symbol if one is detected in the 373 | filing during attribute load.''')) 374 | 375 | attributes_loaded = models.BooleanField(default=False, db_index=True) 376 | 377 | valid = models.BooleanField( 378 | default=True, 379 | db_index=True, 380 | help_text=_('If false, errors were encountered trying to parse the associated files.')) 381 | 382 | error = models.TextField(blank=True, null=True) 383 | 384 | class Meta: 385 | app_label = APP_LABEL 386 | verbose_name_plural = _('indexes') 387 | unique_together = ( 388 | # Note, filenames are not necessarily unique. 389 | # Filenames may be listed more than once under a different 390 | # form type. 391 | ('company', 'form', 'date', 'filename', 'year', 'quarter'), 392 | ) 393 | index_together = ( 394 | ('year', 'quarter'), 395 | ('company', 'date', 'filename'), 396 | ) 397 | ordering = ('-date', 'filename') 398 | 399 | def xbrl_link(self): 400 | if self.form.startswith('10-K') or self.form.startswith('10-Q'): 401 | _id = self.filename.split('/')[-1][:-4] 402 | return 'http://www.sec.gov/Archives/edgar/data/%s/%s/%s-xbrl.zip' \ 403 | % (self.company.cik, _id.replace('-', ''), _id) 404 | return 405 | 406 | def html_link(self): 407 | return 'http://www.sec.gov/Archives/%s' % self.filename 408 | 409 | def index_link(self): 410 | _id = self.filename.split('/')[-1][:-4] 411 | return 'http://www.sec.gov/Archives/edgar/data/%s/%s/%s-index.htm' \ 412 | % (self.company.cik, _id.replace('-', ''), _id) 413 | 414 | def txt(self): 415 | return self.filename.split('/')[-1] 416 | 417 | def localfile(self): 418 | filename = '%s/%s/%s/%s' % (DATA_DIR, self.company.cik, self.txt()[:-4], self.txt()) 419 | if os.path.exists(filename): 420 | return filename 421 | return 422 | 423 | def localpath(self): 424 | return '%s/%s/%s/' % (DATA_DIR, self.company.cik, self.txt()[:-4]) 425 | 426 | def localcik(self): 427 | return '%s/%s/' % (DATA_DIR, self.company.cik) 428 | 429 | def html(self): 430 | filename = self.localfile() 431 | if not filename: 432 | return 433 | f = open(filename, 'r').read() 434 | f_lower = f.lower() 435 | try: 436 | return f[f_lower.find(''): f_lower.find('')+4] 437 | except IndexError: 438 | print('html tag not found') 439 | return f 440 | 441 | def download(self, verbose=False): 442 | 443 | d = self.localcik() 444 | if not os.path.isdir(d): 445 | os.makedirs(d) 446 | 447 | d = self.localpath() 448 | if not os.path.isdir(d): 449 | os.makedirs(d) 450 | 451 | os.chdir(self.localpath()) 452 | 453 | # html_link = self.html_link() 454 | xbrl_link = self.xbrl_link() 455 | if verbose: 456 | print('xbrl_link:', xbrl_link) 457 | 458 | if xbrl_link: 459 | if not os.path.exists(xbrl_link.split('/')[-1]): 460 | #urlopen(xbrl_link) 461 | wget.download(xbrl_link) 462 | 463 | # if verbose: 464 | #os.system('wget %s' % xbrl_link) 465 | # else: 466 | #os.system('wget --quiet %s' % xbrl_link) 467 | # Don't to this. It wastes disk space. Just read the ZIP directly. 468 | #os.system('unzip *.zip') 469 | 470 | def xbrl_localpath(self): 471 | try: 472 | os.chdir(self.localpath()) 473 | except OSError: 474 | self.download() 475 | files = os.listdir('.') 476 | # print('files:',files 477 | archives = [elem for elem in files if elem.endswith('.zip')] 478 | if not archives: 479 | return None, None 480 | zf = zipfile.ZipFile(archives[0]) 481 | #xml = sorted([elem for elem in files if elem.endswith('.xml')],key=len) 482 | xml = sorted([elem for elem in zf.namelist() if elem.endswith('.xml')], key=len) 483 | # print('xml:',xml 484 | # sys.exit() 485 | if not len(xml): 486 | return None, None 487 | #return self.localpath() + xml[0], zf.open 488 | return xml[0], zf.open 489 | 490 | def xbrl(self): 491 | filepath, open_method = self.xbrl_localpath() 492 | # print('filepath:',filepath 493 | if not filepath: 494 | print('no xbrl found. this option is for 10-ks.') 495 | return 496 | x = xbrl.XBRL(filepath, opener=open_method) 497 | x.fields['FiscalPeriod'] = x.fields['DocumentFiscalPeriodFocus'] 498 | x.fields['FiscalYear'] = x.fields['DocumentFiscalYearFocus'] 499 | x.fields['DocumentPeriodEndDate'] = x.fields['BalanceSheetDate'] 500 | x.fields['PeriodStartDate'] = x.fields['IncomeStatementPeriodYTD'] 501 | x.fields['SECFilingPage'] = self.index_link() 502 | x.fields['LinkToXBRLInstance'] = self.xbrl_link() 503 | return x 504 | 505 | def ticker(self): 506 | """ 507 | Retrieves the company's stock ticker from an XML filing. 508 | Note, this is not guaranteed to exist. 509 | """ 510 | if self._ticker: 511 | return self._ticker 512 | filepath, _ = self.xbrl_localpath() 513 | if filepath: 514 | ticker = filepath.split('/')[-1].split('-')[0].strip().upper() 515 | if ticker: 516 | self._ticker = ticker 517 | else: 518 | self._ticker = None 519 | return self._ticker 520 | -------------------------------------------------------------------------------- /django_sec/settings.py: -------------------------------------------------------------------------------- 1 | from django.conf import settings 2 | 3 | DATA_DIR = settings.django_sec_DATA_DIR = getattr( 4 | settings, 5 | 'django_sec_DATA_DIR', 6 | '/tmp/django_sec') 7 | -------------------------------------------------------------------------------- /django_sec/south_migrations/0001_initial.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from south.utils import datetime_utils as datetime 3 | from south.db import db 4 | from south.v2 import SchemaMigration 5 | from django.db import models 6 | 7 | 8 | class Migration(SchemaMigration): 9 | 10 | def forwards(self, orm): 11 | # Adding model 'Namespace' 12 | db.create_table(u'django_sec_namespace', ( 13 | (u'id', self.gf('django.db.models.fields.AutoField')(primary_key=True)), 14 | ('name', self.gf('django.db.models.fields.CharField')(unique=True, max_length=500, db_index=True)), 15 | )) 16 | db.send_create_signal(u'django_sec', ['Namespace']) 17 | 18 | # Adding model 'Unit' 19 | db.create_table(u'django_sec_unit', ( 20 | (u'id', self.gf('django.db.models.fields.AutoField')(primary_key=True)), 21 | ('name', self.gf('django.db.models.fields.CharField')(unique=True, max_length=200, db_index=True)), 22 | ('true_unit', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['django_sec.Unit'], null=True, on_delete=models.SET_NULL, blank=True)), 23 | ('master', self.gf('django.db.models.fields.BooleanField')(default=True)), 24 | )) 25 | db.send_create_signal(u'django_sec', ['Unit']) 26 | 27 | # Adding model 'Attribute' 28 | db.create_table(u'django_sec_attribute', ( 29 | (u'id', self.gf('django.db.models.fields.AutoField')(primary_key=True)), 30 | ('namespace', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['django_sec.Namespace'])), 31 | ('name', self.gf('django.db.models.fields.CharField')(max_length=500, db_index=True)), 32 | ('load', self.gf('django.db.models.fields.BooleanField')(default=False, db_index=True)), 33 | ('total_values', self.gf('django.db.models.fields.PositiveIntegerField')(null=True, blank=True)), 34 | ('total_values_fresh', self.gf('django.db.models.fields.BooleanField')(default=False)), 35 | )) 36 | db.send_create_signal(u'django_sec', ['Attribute']) 37 | 38 | # Adding unique constraint on 'Attribute', fields ['namespace', 'name'] 39 | db.create_unique(u'django_sec_attribute', ['namespace_id', 'name']) 40 | 41 | # Adding index on 'Attribute', fields ['namespace', 'name'] 42 | db.create_index(u'django_sec_attribute', ['namespace_id', 'name']) 43 | 44 | # Adding model 'AttributeValue' 45 | db.create_table(u'django_sec_attributevalue', ( 46 | (u'id', self.gf('django.db.models.fields.AutoField')(primary_key=True)), 47 | ('company', self.gf('django.db.models.fields.related.ForeignKey')(related_name='attributes', to=orm['django_sec.Company'])), 48 | ('attribute', self.gf('django.db.models.fields.related.ForeignKey')(related_name='values', to=orm['django_sec.Attribute'])), 49 | ('value', self.gf('django.db.models.fields.DecimalField')(max_digits=20, decimal_places=6)), 50 | ('unit', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['django_sec.Unit'])), 51 | ('start_date', self.gf('django.db.models.fields.DateField')(db_index=True)), 52 | ('end_date', self.gf('django.db.models.fields.DateField')(null=True, blank=True)), 53 | ('filing_date', self.gf('django.db.models.fields.DateField')()), 54 | )) 55 | db.send_create_signal(u'django_sec', ['AttributeValue']) 56 | 57 | # Adding unique constraint on 'AttributeValue', fields ['company', 'attribute', 'start_date', 'end_date'] 58 | db.create_unique(u'django_sec_attributevalue', ['company_id', 'attribute_id', 'start_date', 'end_date']) 59 | 60 | # Adding index on 'AttributeValue', fields ['company', 'attribute', 'start_date'] 61 | db.create_index(u'django_sec_attributevalue', ['company_id', 'attribute_id', 'start_date']) 62 | 63 | # Adding model 'IndexFile' 64 | db.create_table(u'django_sec_indexfile', ( 65 | (u'id', self.gf('django.db.models.fields.AutoField')(primary_key=True)), 66 | ('year', self.gf('django.db.models.fields.IntegerField')(db_index=True)), 67 | ('quarter', self.gf('django.db.models.fields.IntegerField')(db_index=True)), 68 | ('filename', self.gf('django.db.models.fields.CharField')(max_length=200)), 69 | ('total_rows', self.gf('django.db.models.fields.PositiveIntegerField')(null=True, blank=True)), 70 | ('processed_rows', self.gf('django.db.models.fields.PositiveIntegerField')(null=True, blank=True)), 71 | ('downloaded', self.gf('django.db.models.fields.DateTimeField')(null=True, blank=True)), 72 | ('processed', self.gf('django.db.models.fields.DateTimeField')(null=True, blank=True)), 73 | )) 74 | db.send_create_signal(u'django_sec', ['IndexFile']) 75 | 76 | # Adding unique constraint on 'IndexFile', fields ['year', 'quarter'] 77 | db.create_unique(u'django_sec_indexfile', ['year', 'quarter']) 78 | 79 | # Adding model 'Company' 80 | db.create_table(u'django_sec_company', ( 81 | ('cik', self.gf('django.db.models.fields.IntegerField')(primary_key=True, db_index=True)), 82 | ('name', self.gf('django.db.models.fields.CharField')(max_length=100, db_index=True)), 83 | ('load', self.gf('django.db.models.fields.BooleanField')(default=False, db_index=True)), 84 | )) 85 | db.send_create_signal(u'django_sec', ['Company']) 86 | 87 | # Adding model 'Index' 88 | db.create_table(u'django_sec_index', ( 89 | (u'id', self.gf('django.db.models.fields.AutoField')(primary_key=True)), 90 | ('company', self.gf('django.db.models.fields.related.ForeignKey')(related_name='filings', to=orm['django_sec.Company'])), 91 | ('form', self.gf('django.db.models.fields.CharField')(db_index=True, max_length=10, blank=True)), 92 | ('date', self.gf('django.db.models.fields.DateField')(db_index=True)), 93 | ('filename', self.gf('django.db.models.fields.CharField')(max_length=100, db_index=True)), 94 | ('year', self.gf('django.db.models.fields.IntegerField')(db_index=True)), 95 | ('quarter', self.gf('django.db.models.fields.IntegerField')(db_index=True)), 96 | ('attributes_loaded', self.gf('django.db.models.fields.BooleanField')(default=False, db_index=True)), 97 | ('valid', self.gf('django.db.models.fields.BooleanField')(default=True, db_index=True)), 98 | ('error', self.gf('django.db.models.fields.TextField')(null=True, blank=True)), 99 | )) 100 | db.send_create_signal(u'django_sec', ['Index']) 101 | 102 | # Adding unique constraint on 'Index', fields ['company', 'form', 'date', 'filename', 'year', 'quarter'] 103 | db.create_unique(u'django_sec_index', ['company_id', 'form', 'date', 'filename', 'year', 'quarter']) 104 | 105 | # Adding index on 'Index', fields ['year', 'quarter'] 106 | db.create_index(u'django_sec_index', ['year', 'quarter']) 107 | 108 | 109 | def backwards(self, orm): 110 | # Removing index on 'Index', fields ['year', 'quarter'] 111 | db.delete_index(u'django_sec_index', ['year', 'quarter']) 112 | 113 | # Removing unique constraint on 'Index', fields ['company', 'form', 'date', 'filename', 'year', 'quarter'] 114 | db.delete_unique(u'django_sec_index', ['company_id', 'form', 'date', 'filename', 'year', 'quarter']) 115 | 116 | # Removing unique constraint on 'IndexFile', fields ['year', 'quarter'] 117 | db.delete_unique(u'django_sec_indexfile', ['year', 'quarter']) 118 | 119 | # Removing index on 'AttributeValue', fields ['company', 'attribute', 'start_date'] 120 | db.delete_index(u'django_sec_attributevalue', ['company_id', 'attribute_id', 'start_date']) 121 | 122 | # Removing unique constraint on 'AttributeValue', fields ['company', 'attribute', 'start_date', 'end_date'] 123 | db.delete_unique(u'django_sec_attributevalue', ['company_id', 'attribute_id', 'start_date', 'end_date']) 124 | 125 | # Removing index on 'Attribute', fields ['namespace', 'name'] 126 | db.delete_index(u'django_sec_attribute', ['namespace_id', 'name']) 127 | 128 | # Removing unique constraint on 'Attribute', fields ['namespace', 'name'] 129 | db.delete_unique(u'django_sec_attribute', ['namespace_id', 'name']) 130 | 131 | # Deleting model 'Namespace' 132 | db.delete_table(u'django_sec_namespace') 133 | 134 | # Deleting model 'Unit' 135 | db.delete_table(u'django_sec_unit') 136 | 137 | # Deleting model 'Attribute' 138 | db.delete_table(u'django_sec_attribute') 139 | 140 | # Deleting model 'AttributeValue' 141 | db.delete_table(u'django_sec_attributevalue') 142 | 143 | # Deleting model 'IndexFile' 144 | db.delete_table(u'django_sec_indexfile') 145 | 146 | # Deleting model 'Company' 147 | db.delete_table(u'django_sec_company') 148 | 149 | # Deleting model 'Index' 150 | db.delete_table(u'django_sec_index') 151 | 152 | 153 | models = { 154 | u'django_sec.attribute': { 155 | 'Meta': {'unique_together': "(('namespace', 'name'),)", 'object_name': 'Attribute', 'index_together': "(('namespace', 'name'),)"}, 156 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 157 | 'load': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'db_index': 'True'}), 158 | 'name': ('django.db.models.fields.CharField', [], {'max_length': '500', 'db_index': 'True'}), 159 | 'namespace': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['django_sec.Namespace']"}), 160 | 'total_values': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}), 161 | 'total_values_fresh': ('django.db.models.fields.BooleanField', [], {'default': 'False'}) 162 | }, 163 | u'django_sec.attributevalue': { 164 | 'Meta': {'ordering': "('-attribute__total_values', '-start_date', 'attribute__name')", 'unique_together': "(('company', 'attribute', 'start_date', 'end_date'),)", 'object_name': 'AttributeValue', 'index_together': "(('company', 'attribute', 'start_date'),)"}, 165 | 'attribute': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'values'", 'to': u"orm['django_sec.Attribute']"}), 166 | 'company': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'attributes'", 'to': u"orm['django_sec.Company']"}), 167 | 'end_date': ('django.db.models.fields.DateField', [], {'null': 'True', 'blank': 'True'}), 168 | 'filing_date': ('django.db.models.fields.DateField', [], {}), 169 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 170 | 'start_date': ('django.db.models.fields.DateField', [], {'db_index': 'True'}), 171 | 'unit': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['django_sec.Unit']"}), 172 | 'value': ('django.db.models.fields.DecimalField', [], {'max_digits': '20', 'decimal_places': '6'}) 173 | }, 174 | u'django_sec.company': { 175 | 'Meta': {'object_name': 'Company'}, 176 | 'cik': ('django.db.models.fields.IntegerField', [], {'primary_key': 'True', 'db_index': 'True'}), 177 | 'load': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'db_index': 'True'}), 178 | 'name': ('django.db.models.fields.CharField', [], {'max_length': '100', 'db_index': 'True'}) 179 | }, 180 | u'django_sec.index': { 181 | 'Meta': {'unique_together': "(('company', 'form', 'date', 'filename', 'year', 'quarter'),)", 'object_name': 'Index', 'index_together': "(('year', 'quarter'),)"}, 182 | 'attributes_loaded': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'db_index': 'True'}), 183 | 'company': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'filings'", 'to': u"orm['django_sec.Company']"}), 184 | 'date': ('django.db.models.fields.DateField', [], {'db_index': 'True'}), 185 | 'error': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}), 186 | 'filename': ('django.db.models.fields.CharField', [], {'max_length': '100', 'db_index': 'True'}), 187 | 'form': ('django.db.models.fields.CharField', [], {'db_index': 'True', 'max_length': '10', 'blank': 'True'}), 188 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 189 | 'quarter': ('django.db.models.fields.IntegerField', [], {'db_index': 'True'}), 190 | 'valid': ('django.db.models.fields.BooleanField', [], {'default': 'True', 'db_index': 'True'}), 191 | 'year': ('django.db.models.fields.IntegerField', [], {'db_index': 'True'}) 192 | }, 193 | u'django_sec.indexfile': { 194 | 'Meta': {'ordering': "('year', 'quarter')", 'unique_together': "(('year', 'quarter'),)", 'object_name': 'IndexFile'}, 195 | 'downloaded': ('django.db.models.fields.DateTimeField', [], {'null': 'True', 'blank': 'True'}), 196 | 'filename': ('django.db.models.fields.CharField', [], {'max_length': '200'}), 197 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 198 | 'processed': ('django.db.models.fields.DateTimeField', [], {'null': 'True', 'blank': 'True'}), 199 | 'processed_rows': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}), 200 | 'quarter': ('django.db.models.fields.IntegerField', [], {'db_index': 'True'}), 201 | 'total_rows': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}), 202 | 'year': ('django.db.models.fields.IntegerField', [], {'db_index': 'True'}) 203 | }, 204 | u'django_sec.namespace': { 205 | 'Meta': {'object_name': 'Namespace'}, 206 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 207 | 'name': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '500', 'db_index': 'True'}) 208 | }, 209 | u'django_sec.unit': { 210 | 'Meta': {'object_name': 'Unit'}, 211 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 212 | 'master': ('django.db.models.fields.BooleanField', [], {'default': 'True'}), 213 | 'name': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '200', 'db_index': 'True'}), 214 | 'true_unit': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['django_sec.Unit']", 'null': 'True', 'on_delete': 'models.SET_NULL', 'blank': 'True'}) 215 | } 216 | } 217 | 218 | complete_apps = ['django_sec'] -------------------------------------------------------------------------------- /django_sec/south_migrations/0002_auto__add_field_index__ticker.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from south.utils import datetime_utils as datetime 3 | from south.db import db 4 | from south.v2 import SchemaMigration 5 | from django.db import models 6 | 7 | 8 | class Migration(SchemaMigration): 9 | 10 | def forwards(self, orm): 11 | # Adding field 'Index._ticker' 12 | db.add_column(u'django_sec_index', '_ticker', 13 | self.gf('django.db.models.fields.CharField')(db_index=True, max_length=50, null=True, db_column='ticker', blank=True), 14 | keep_default=False) 15 | 16 | 17 | def backwards(self, orm): 18 | # Deleting field 'Index._ticker' 19 | db.delete_column(u'django_sec_index', 'ticker') 20 | 21 | 22 | models = { 23 | u'django_sec.attribute': { 24 | 'Meta': {'unique_together': "(('namespace', 'name'),)", 'object_name': 'Attribute', 'index_together': "(('namespace', 'name'),)"}, 25 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 26 | 'load': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'db_index': 'True'}), 27 | 'name': ('django.db.models.fields.CharField', [], {'max_length': '500', 'db_index': 'True'}), 28 | 'namespace': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['django_sec.Namespace']"}), 29 | 'total_values': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}), 30 | 'total_values_fresh': ('django.db.models.fields.BooleanField', [], {'default': 'False'}) 31 | }, 32 | u'django_sec.attributevalue': { 33 | 'Meta': {'ordering': "('-attribute__total_values', '-start_date', 'attribute__name')", 'unique_together': "(('company', 'attribute', 'start_date', 'end_date'),)", 'object_name': 'AttributeValue', 'index_together': "(('company', 'attribute', 'start_date'),)"}, 34 | 'attribute': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'values'", 'to': u"orm['django_sec.Attribute']"}), 35 | 'company': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'attributes'", 'to': u"orm['django_sec.Company']"}), 36 | 'end_date': ('django.db.models.fields.DateField', [], {'null': 'True', 'blank': 'True'}), 37 | 'filing_date': ('django.db.models.fields.DateField', [], {}), 38 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 39 | 'start_date': ('django.db.models.fields.DateField', [], {'db_index': 'True'}), 40 | 'unit': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['django_sec.Unit']"}), 41 | 'value': ('django.db.models.fields.DecimalField', [], {'max_digits': '20', 'decimal_places': '6'}) 42 | }, 43 | u'django_sec.company': { 44 | 'Meta': {'object_name': 'Company'}, 45 | 'cik': ('django.db.models.fields.IntegerField', [], {'primary_key': 'True', 'db_index': 'True'}), 46 | 'load': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'db_index': 'True'}), 47 | 'name': ('django.db.models.fields.CharField', [], {'max_length': '100', 'db_index': 'True'}) 48 | }, 49 | u'django_sec.index': { 50 | 'Meta': {'unique_together': "(('company', 'form', 'date', 'filename', 'year', 'quarter'),)", 'object_name': 'Index', 'index_together': "(('year', 'quarter'),)"}, 51 | '_ticker': ('django.db.models.fields.CharField', [], {'db_index': 'True', 'max_length': '50', 'null': 'True', 'db_column': "'ticker'", 'blank': 'True'}), 52 | 'attributes_loaded': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'db_index': 'True'}), 53 | 'company': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'filings'", 'to': u"orm['django_sec.Company']"}), 54 | 'date': ('django.db.models.fields.DateField', [], {'db_index': 'True'}), 55 | 'error': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}), 56 | 'filename': ('django.db.models.fields.CharField', [], {'max_length': '100', 'db_index': 'True'}), 57 | 'form': ('django.db.models.fields.CharField', [], {'db_index': 'True', 'max_length': '10', 'blank': 'True'}), 58 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 59 | 'quarter': ('django.db.models.fields.IntegerField', [], {'db_index': 'True'}), 60 | 'valid': ('django.db.models.fields.BooleanField', [], {'default': 'True', 'db_index': 'True'}), 61 | 'year': ('django.db.models.fields.IntegerField', [], {'db_index': 'True'}) 62 | }, 63 | u'django_sec.indexfile': { 64 | 'Meta': {'ordering': "('year', 'quarter')", 'unique_together': "(('year', 'quarter'),)", 'object_name': 'IndexFile'}, 65 | 'downloaded': ('django.db.models.fields.DateTimeField', [], {'null': 'True', 'blank': 'True'}), 66 | 'filename': ('django.db.models.fields.CharField', [], {'max_length': '200'}), 67 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 68 | 'processed': ('django.db.models.fields.DateTimeField', [], {'null': 'True', 'blank': 'True'}), 69 | 'processed_rows': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}), 70 | 'quarter': ('django.db.models.fields.IntegerField', [], {'db_index': 'True'}), 71 | 'total_rows': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}), 72 | 'year': ('django.db.models.fields.IntegerField', [], {'db_index': 'True'}) 73 | }, 74 | u'django_sec.namespace': { 75 | 'Meta': {'object_name': 'Namespace'}, 76 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 77 | 'name': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '500', 'db_index': 'True'}) 78 | }, 79 | u'django_sec.unit': { 80 | 'Meta': {'object_name': 'Unit'}, 81 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 82 | 'master': ('django.db.models.fields.BooleanField', [], {'default': 'True'}), 83 | 'name': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '200', 'db_index': 'True'}), 84 | 'true_unit': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['django_sec.Unit']", 'null': 'True', 'on_delete': 'models.SET_NULL', 'blank': 'True'}) 85 | } 86 | } 87 | 88 | complete_apps = ['django_sec'] -------------------------------------------------------------------------------- /django_sec/south_migrations/0003_auto__add_index_index_company_date_filename.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from south.utils import datetime_utils as datetime 3 | from south.db import db 4 | from south.v2 import SchemaMigration 5 | from django.db import models 6 | 7 | 8 | class Migration(SchemaMigration): 9 | 10 | def forwards(self, orm): 11 | # Adding index on 'Index', fields ['company', 'date', 'filename'] 12 | db.create_index(u'django_sec_index', ['company_id', 'date', 'filename']) 13 | 14 | 15 | def backwards(self, orm): 16 | # Removing index on 'Index', fields ['company', 'date', 'filename'] 17 | db.delete_index(u'django_sec_index', ['company_id', 'date', 'filename']) 18 | 19 | 20 | models = { 21 | u'django_sec.attribute': { 22 | 'Meta': {'unique_together': "(('namespace', 'name'),)", 'object_name': 'Attribute', 'index_together': "(('namespace', 'name'),)"}, 23 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 24 | 'load': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'db_index': 'True'}), 25 | 'name': ('django.db.models.fields.CharField', [], {'max_length': '500', 'db_index': 'True'}), 26 | 'namespace': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['django_sec.Namespace']"}), 27 | 'total_values': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}), 28 | 'total_values_fresh': ('django.db.models.fields.BooleanField', [], {'default': 'False'}) 29 | }, 30 | u'django_sec.attributevalue': { 31 | 'Meta': {'ordering': "('-attribute__total_values', '-start_date', 'attribute__name')", 'unique_together': "(('company', 'attribute', 'start_date', 'end_date'),)", 'object_name': 'AttributeValue', 'index_together': "(('company', 'attribute', 'start_date'),)"}, 32 | 'attribute': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'values'", 'to': u"orm['django_sec.Attribute']"}), 33 | 'company': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'attributes'", 'to': u"orm['django_sec.Company']"}), 34 | 'end_date': ('django.db.models.fields.DateField', [], {'null': 'True', 'blank': 'True'}), 35 | 'filing_date': ('django.db.models.fields.DateField', [], {}), 36 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 37 | 'start_date': ('django.db.models.fields.DateField', [], {'db_index': 'True'}), 38 | 'unit': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['django_sec.Unit']"}), 39 | 'value': ('django.db.models.fields.DecimalField', [], {'max_digits': '20', 'decimal_places': '6'}) 40 | }, 41 | u'django_sec.company': { 42 | 'Meta': {'object_name': 'Company'}, 43 | 'cik': ('django.db.models.fields.IntegerField', [], {'primary_key': 'True', 'db_index': 'True'}), 44 | 'load': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'db_index': 'True'}), 45 | 'name': ('django.db.models.fields.CharField', [], {'max_length': '100', 'db_index': 'True'}) 46 | }, 47 | u'django_sec.index': { 48 | 'Meta': {'unique_together': "(('company', 'form', 'date', 'filename', 'year', 'quarter'),)", 'object_name': 'Index', 'index_together': "(('year', 'quarter'), ('company', 'date', 'filename'))"}, 49 | '_ticker': ('django.db.models.fields.CharField', [], {'db_index': 'True', 'max_length': '50', 'null': 'True', 'db_column': "'ticker'", 'blank': 'True'}), 50 | 'attributes_loaded': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'db_index': 'True'}), 51 | 'company': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'filings'", 'to': u"orm['django_sec.Company']"}), 52 | 'date': ('django.db.models.fields.DateField', [], {'db_index': 'True'}), 53 | 'error': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}), 54 | 'filename': ('django.db.models.fields.CharField', [], {'max_length': '100', 'db_index': 'True'}), 55 | 'form': ('django.db.models.fields.CharField', [], {'db_index': 'True', 'max_length': '10', 'blank': 'True'}), 56 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 57 | 'quarter': ('django.db.models.fields.IntegerField', [], {'db_index': 'True'}), 58 | 'valid': ('django.db.models.fields.BooleanField', [], {'default': 'True', 'db_index': 'True'}), 59 | 'year': ('django.db.models.fields.IntegerField', [], {'db_index': 'True'}) 60 | }, 61 | u'django_sec.indexfile': { 62 | 'Meta': {'ordering': "('year', 'quarter')", 'unique_together': "(('year', 'quarter'),)", 'object_name': 'IndexFile'}, 63 | 'downloaded': ('django.db.models.fields.DateTimeField', [], {'null': 'True', 'blank': 'True'}), 64 | 'filename': ('django.db.models.fields.CharField', [], {'max_length': '200'}), 65 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 66 | 'processed': ('django.db.models.fields.DateTimeField', [], {'null': 'True', 'blank': 'True'}), 67 | 'processed_rows': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}), 68 | 'quarter': ('django.db.models.fields.IntegerField', [], {'db_index': 'True'}), 69 | 'total_rows': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}), 70 | 'year': ('django.db.models.fields.IntegerField', [], {'db_index': 'True'}) 71 | }, 72 | u'django_sec.namespace': { 73 | 'Meta': {'object_name': 'Namespace'}, 74 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 75 | 'name': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '500', 'db_index': 'True'}) 76 | }, 77 | u'django_sec.unit': { 78 | 'Meta': {'object_name': 'Unit'}, 79 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 80 | 'master': ('django.db.models.fields.BooleanField', [], {'default': 'True'}), 81 | 'name': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '200', 'db_index': 'True'}), 82 | 'true_unit': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['django_sec.Unit']", 'null': 'True', 'on_delete': 'models.SET_NULL', 'blank': 'True'}) 83 | } 84 | } 85 | 86 | complete_apps = ['django_sec'] -------------------------------------------------------------------------------- /django_sec/south_migrations/0004_auto__chg_field_attributevalue_value.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from south.utils import datetime_utils as datetime 3 | from south.db import db 4 | from south.v2 import SchemaMigration 5 | from django.db import models 6 | 7 | 8 | class Migration(SchemaMigration): 9 | 10 | def forwards(self, orm): 11 | 12 | # Changing field 'AttributeValue.value' 13 | db.alter_column(u'django_sec_attributevalue', 'value', self.gf('django.db.models.fields.DecimalField')(max_digits=40, decimal_places=6)) 14 | 15 | def backwards(self, orm): 16 | 17 | # Changing field 'AttributeValue.value' 18 | db.alter_column(u'django_sec_attributevalue', 'value', self.gf('django.db.models.fields.DecimalField')(max_digits=20, decimal_places=6)) 19 | 20 | models = { 21 | u'django_sec.attribute': { 22 | 'Meta': {'unique_together': "(('namespace', 'name'),)", 'object_name': 'Attribute', 'index_together': "(('namespace', 'name'),)"}, 23 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 24 | 'load': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'db_index': 'True'}), 25 | 'name': ('django.db.models.fields.CharField', [], {'max_length': '500', 'db_index': 'True'}), 26 | 'namespace': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['django_sec.Namespace']"}), 27 | 'total_values': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}), 28 | 'total_values_fresh': ('django.db.models.fields.BooleanField', [], {'default': 'False'}) 29 | }, 30 | u'django_sec.attributevalue': { 31 | 'Meta': {'ordering': "('-attribute__total_values', '-start_date', 'attribute__name')", 'unique_together': "(('company', 'attribute', 'start_date', 'end_date'),)", 'object_name': 'AttributeValue', 'index_together': "(('company', 'attribute', 'start_date'),)"}, 32 | 'attribute': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'values'", 'to': u"orm['django_sec.Attribute']"}), 33 | 'company': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'attributes'", 'to': u"orm['django_sec.Company']"}), 34 | 'end_date': ('django.db.models.fields.DateField', [], {'null': 'True', 'blank': 'True'}), 35 | 'filing_date': ('django.db.models.fields.DateField', [], {}), 36 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 37 | 'start_date': ('django.db.models.fields.DateField', [], {'db_index': 'True'}), 38 | 'unit': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['django_sec.Unit']"}), 39 | 'value': ('django.db.models.fields.DecimalField', [], {'max_digits': '40', 'decimal_places': '6'}) 40 | }, 41 | u'django_sec.company': { 42 | 'Meta': {'object_name': 'Company'}, 43 | 'cik': ('django.db.models.fields.IntegerField', [], {'primary_key': 'True', 'db_index': 'True'}), 44 | 'load': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'db_index': 'True'}), 45 | 'name': ('django.db.models.fields.CharField', [], {'max_length': '100', 'db_index': 'True'}) 46 | }, 47 | u'django_sec.index': { 48 | 'Meta': {'unique_together': "(('company', 'form', 'date', 'filename', 'year', 'quarter'),)", 'object_name': 'Index', 'index_together': "(('year', 'quarter'), ('company', 'date', 'filename'))"}, 49 | '_ticker': ('django.db.models.fields.CharField', [], {'db_index': 'True', 'max_length': '50', 'null': 'True', 'db_column': "'ticker'", 'blank': 'True'}), 50 | 'attributes_loaded': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'db_index': 'True'}), 51 | 'company': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'filings'", 'to': u"orm['django_sec.Company']"}), 52 | 'date': ('django.db.models.fields.DateField', [], {'db_index': 'True'}), 53 | 'error': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}), 54 | 'filename': ('django.db.models.fields.CharField', [], {'max_length': '100', 'db_index': 'True'}), 55 | 'form': ('django.db.models.fields.CharField', [], {'db_index': 'True', 'max_length': '10', 'blank': 'True'}), 56 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 57 | 'quarter': ('django.db.models.fields.IntegerField', [], {'db_index': 'True'}), 58 | 'valid': ('django.db.models.fields.BooleanField', [], {'default': 'True', 'db_index': 'True'}), 59 | 'year': ('django.db.models.fields.IntegerField', [], {'db_index': 'True'}) 60 | }, 61 | u'django_sec.indexfile': { 62 | 'Meta': {'ordering': "('year', 'quarter')", 'unique_together': "(('year', 'quarter'),)", 'object_name': 'IndexFile'}, 63 | 'downloaded': ('django.db.models.fields.DateTimeField', [], {'null': 'True', 'blank': 'True'}), 64 | 'filename': ('django.db.models.fields.CharField', [], {'max_length': '200'}), 65 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 66 | 'processed': ('django.db.models.fields.DateTimeField', [], {'null': 'True', 'blank': 'True'}), 67 | 'processed_rows': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}), 68 | 'quarter': ('django.db.models.fields.IntegerField', [], {'db_index': 'True'}), 69 | 'total_rows': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}), 70 | 'year': ('django.db.models.fields.IntegerField', [], {'db_index': 'True'}) 71 | }, 72 | u'django_sec.namespace': { 73 | 'Meta': {'object_name': 'Namespace'}, 74 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 75 | 'name': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '500', 'db_index': 'True'}) 76 | }, 77 | u'django_sec.unit': { 78 | 'Meta': {'object_name': 'Unit'}, 79 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 80 | 'master': ('django.db.models.fields.BooleanField', [], {'default': 'True'}), 81 | 'name': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '200', 'db_index': 'True'}), 82 | 'true_unit': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['django_sec.Unit']", 'null': 'True', 'on_delete': 'models.SET_NULL', 'blank': 'True'}) 83 | } 84 | } 85 | 86 | complete_apps = ['django_sec'] -------------------------------------------------------------------------------- /django_sec/south_migrations/0005_auto__add_field_company_min_date__add_field_company_max_date.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from south.utils import datetime_utils as datetime 3 | from south.db import db 4 | from south.v2 import SchemaMigration 5 | from django.db import models 6 | 7 | 8 | class Migration(SchemaMigration): 9 | 10 | def forwards(self, orm): 11 | # Adding field 'Company.min_date' 12 | db.add_column(u'django_sec_company', 'min_date', 13 | self.gf('django.db.models.fields.DateField')(db_index=True, null=True, blank=True), 14 | keep_default=False) 15 | 16 | # Adding field 'Company.max_date' 17 | db.add_column(u'django_sec_company', 'max_date', 18 | self.gf('django.db.models.fields.DateField')(db_index=True, null=True, blank=True), 19 | keep_default=False) 20 | 21 | 22 | def backwards(self, orm): 23 | # Deleting field 'Company.min_date' 24 | db.delete_column(u'django_sec_company', 'min_date') 25 | 26 | # Deleting field 'Company.max_date' 27 | db.delete_column(u'django_sec_company', 'max_date') 28 | 29 | 30 | models = { 31 | 'django_sec.attribute': { 32 | 'Meta': {'unique_together': "(('namespace', 'name'),)", 'object_name': 'Attribute', 'index_together': "(('namespace', 'name'),)"}, 33 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 34 | 'load': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'db_index': 'True'}), 35 | 'name': ('django.db.models.fields.CharField', [], {'max_length': '500', 'db_index': 'True'}), 36 | 'namespace': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['django_sec.Namespace']"}), 37 | 'total_values': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}), 38 | 'total_values_fresh': ('django.db.models.fields.BooleanField', [], {'default': 'False'}) 39 | }, 40 | 'django_sec.attributevalue': { 41 | 'Meta': {'ordering': "('-attribute__total_values', '-start_date', 'attribute__name')", 'unique_together': "(('company', 'attribute', 'start_date', 'end_date'),)", 'object_name': 'AttributeValue', 'index_together': "(('company', 'attribute', 'start_date'),)"}, 42 | 'attribute': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'values'", 'to': "orm['django_sec.Attribute']"}), 43 | 'company': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'attributes'", 'to': "orm['django_sec.Company']"}), 44 | 'end_date': ('django.db.models.fields.DateField', [], {'null': 'True', 'blank': 'True'}), 45 | 'filing_date': ('django.db.models.fields.DateField', [], {}), 46 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 47 | 'start_date': ('django.db.models.fields.DateField', [], {'db_index': 'True'}), 48 | 'unit': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['django_sec.Unit']"}), 49 | 'value': ('django.db.models.fields.DecimalField', [], {'max_digits': '40', 'decimal_places': '6'}) 50 | }, 51 | 'django_sec.company': { 52 | 'Meta': {'object_name': 'Company'}, 53 | 'cik': ('django.db.models.fields.IntegerField', [], {'primary_key': 'True', 'db_index': 'True'}), 54 | 'load': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'db_index': 'True'}), 55 | 'max_date': ('django.db.models.fields.DateField', [], {'db_index': 'True', 'null': 'True', 'blank': 'True'}), 56 | 'min_date': ('django.db.models.fields.DateField', [], {'db_index': 'True', 'null': 'True', 'blank': 'True'}), 57 | 'name': ('django.db.models.fields.CharField', [], {'max_length': '100', 'db_index': 'True'}) 58 | }, 59 | 'django_sec.index': { 60 | 'Meta': {'ordering': "('-date', 'filename')", 'unique_together': "(('company', 'form', 'date', 'filename', 'year', 'quarter'),)", 'object_name': 'Index', 'index_together': "(('year', 'quarter'), ('company', 'date', 'filename'))"}, 61 | '_ticker': ('django.db.models.fields.CharField', [], {'db_index': 'True', 'max_length': '50', 'null': 'True', 'db_column': "'ticker'", 'blank': 'True'}), 62 | 'attributes_loaded': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'db_index': 'True'}), 63 | 'company': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'filings'", 'to': "orm['django_sec.Company']"}), 64 | 'date': ('django.db.models.fields.DateField', [], {'db_index': 'True'}), 65 | 'error': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}), 66 | 'filename': ('django.db.models.fields.CharField', [], {'max_length': '100', 'db_index': 'True'}), 67 | 'form': ('django.db.models.fields.CharField', [], {'db_index': 'True', 'max_length': '10', 'blank': 'True'}), 68 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 69 | 'quarter': ('django.db.models.fields.IntegerField', [], {'db_index': 'True'}), 70 | 'valid': ('django.db.models.fields.BooleanField', [], {'default': 'True', 'db_index': 'True'}), 71 | 'year': ('django.db.models.fields.IntegerField', [], {'db_index': 'True'}) 72 | }, 73 | 'django_sec.indexfile': { 74 | 'Meta': {'ordering': "('year', 'quarter')", 'unique_together': "(('year', 'quarter'),)", 'object_name': 'IndexFile'}, 75 | 'downloaded': ('django.db.models.fields.DateTimeField', [], {'null': 'True', 'blank': 'True'}), 76 | 'filename': ('django.db.models.fields.CharField', [], {'max_length': '200'}), 77 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 78 | 'processed': ('django.db.models.fields.DateTimeField', [], {'null': 'True', 'blank': 'True'}), 79 | 'processed_rows': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}), 80 | 'quarter': ('django.db.models.fields.IntegerField', [], {'db_index': 'True'}), 81 | 'total_rows': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}), 82 | 'year': ('django.db.models.fields.IntegerField', [], {'db_index': 'True'}) 83 | }, 84 | 'django_sec.namespace': { 85 | 'Meta': {'object_name': 'Namespace'}, 86 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 87 | 'name': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '500', 'db_index': 'True'}) 88 | }, 89 | 'django_sec.unit': { 90 | 'Meta': {'ordering': "('name',)", 'object_name': 'Unit'}, 91 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 92 | 'master': ('django.db.models.fields.BooleanField', [], {'default': 'True'}), 93 | 'name': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '200', 'db_index': 'True'}), 94 | 'true_unit': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['django_sec.Unit']", 'null': 'True', 'on_delete': 'models.SET_NULL', 'blank': 'True'}) 95 | } 96 | } 97 | 98 | complete_apps = ['django_sec'] -------------------------------------------------------------------------------- /django_sec/south_migrations/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisspen/django-sec/184c3308b943971f75109c7ff2aeddc12ab1f58e/django_sec/south_migrations/__init__.py -------------------------------------------------------------------------------- /django_sec/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisspen/django-sec/184c3308b943971f75109c7ff2aeddc12ab1f58e/django_sec/tests/__init__.py -------------------------------------------------------------------------------- /django_sec/tests/manage.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os 3 | import sys 4 | 5 | sys.path.insert(0, '.') 6 | 7 | if __name__ == "__main__": 8 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "settings") 9 | 10 | from django.core.management import execute_from_command_line 11 | 12 | execute_from_command_line(sys.argv) 13 | -------------------------------------------------------------------------------- /django_sec/tests/management/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisspen/django-sec/184c3308b943971f75109c7ff2aeddc12ab1f58e/django_sec/tests/management/__init__.py -------------------------------------------------------------------------------- /django_sec/tests/management/commands/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisspen/django-sec/184c3308b943971f75109c7ff2aeddc12ab1f58e/django_sec/tests/management/commands/__init__.py -------------------------------------------------------------------------------- /django_sec/tests/models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisspen/django-sec/184c3308b943971f75109c7ff2aeddc12ab1f58e/django_sec/tests/models.py -------------------------------------------------------------------------------- /django_sec/tests/settings.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import django 4 | 5 | PROJECT_DIR = os.path.dirname(__file__) 6 | 7 | DATABASES = { 8 | 'default':{ 9 | 'ENGINE': 'django.db.backends.sqlite3', 10 | # Don't do this. It dramatically slows down the test. 11 | # 'NAME': '/tmp/django_sec.db', 12 | # 'TEST_NAME': '/tmp/django_sec.db', 13 | } 14 | } 15 | 16 | ROOT_URLCONF = 'django_sec.tests.urls' 17 | 18 | INSTALLED_APPS = [ 19 | 'django.contrib.auth', 20 | 'django.contrib.admin', 21 | 'django.contrib.contenttypes', 22 | 'django.contrib.sessions', 23 | 'django.contrib.sites', 24 | 'django_sec', 25 | 'django_sec.tests', 26 | ] 27 | 28 | MEDIA_ROOT = os.path.join(PROJECT_DIR, 'media') 29 | 30 | # Disable migrations. 31 | # http://stackoverflow.com/a/28560805/247542 32 | class DisableMigrations(object): 33 | 34 | def __contains__(self, item): 35 | return True 36 | 37 | def __getitem__(self, item): 38 | return "notmigrations" 39 | #SOUTH_TESTS_MIGRATE = False # Use syncdb <= Django 1.8 40 | SOUTH_TESTS_MIGRATE = True # Use migrate 41 | #if django.VERSION > (1, 8, 0): # > Django 1.8 42 | # if django.VERSION > (1, 7, 0): # > Django 1.8 43 | # MIGRATION_MODULES = DisableMigrations() 44 | 45 | if django.VERSION < (1, 7, 0): 46 | SOUTH_MIGRATION_MODULES = { 47 | 'django_sec': 'django_sec.south_migrations', 48 | } 49 | 50 | USE_TZ = True 51 | 52 | AUTH_USER_MODEL = 'auth.User' 53 | 54 | SECRET_KEY = 'abc123' 55 | 56 | SITE_ID = 1 57 | 58 | BASE_SECURE_URL = 'https://localhost' 59 | 60 | BASE_URL = 'http://localhost' 61 | 62 | MIDDLEWARE_CLASSES = ( 63 | 'django.middleware.common.CommonMiddleware', 64 | 'django.contrib.sessions.middleware.SessionMiddleware', 65 | 'django.middleware.csrf.CsrfViewMiddleware', 66 | #'django.middleware.transaction.TransactionMiddleware', 67 | 'django.contrib.auth.middleware.AuthenticationMiddleware', 68 | 'django.contrib.messages.middleware.MessageMiddleware', 69 | 'django.middleware.locale.LocaleMiddleware', 70 | ) 71 | 72 | # Required in Django>=1.10. 73 | TEMPLATES = [ 74 | { 75 | 'BACKEND': 'django.template.backends.django.DjangoTemplates', 76 | 'DIRS': [ 77 | '%s/../templates' % PROJECT_DIR, 78 | '%s/../static' % PROJECT_DIR, 79 | ], 80 | # 'APP_DIRS': True, 81 | 'OPTIONS': { 82 | 'context_processors': [ 83 | 'django.contrib.auth.context_processors.auth', 84 | 'django.template.context_processors.debug', 85 | 'django.template.context_processors.request', 86 | 'django.template.context_processors.i18n', 87 | 'django.template.context_processors.media', 88 | 'django.template.context_processors.static', 89 | 'django.template.context_processors.tz', 90 | 'django.contrib.messages.context_processors.messages', 91 | ], 92 | 'loaders': [ 93 | 'django.template.loaders.filesystem.Loader', 94 | 'django.template.loaders.app_directories.Loader', 95 | ], 96 | 'debug': True, 97 | }, 98 | }, 99 | ] 100 | -------------------------------------------------------------------------------- /django_sec/tests/tests.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | # import os 4 | from datetime import date 5 | import socket 6 | import warnings 7 | # import shutil 8 | 9 | import six 10 | 11 | from django.core.management import call_command 12 | from django.test import TestCase 13 | from django.contrib.auth.models import User 14 | 15 | from django_sec import models 16 | 17 | warnings.simplefilter('error', RuntimeWarning) 18 | 19 | socket.gethostname = lambda: 'localhost' 20 | 21 | class Tests(TestCase): 22 | 23 | #fixtures = ['test_jobs.json'] 24 | 25 | def setUp(self): 26 | pass 27 | 28 | def _test_example(self): 29 | 30 | # get a file from the index. it may or may not be present on our hard disk. 31 | # if it's not, it will be downloaded 32 | # the first time we try to access it, or you can call .download() explicitly 33 | filing = models.Index.objects\ 34 | .filter(form='10-K', cik=1090872)\ 35 | .order_by('-date')[0] 36 | 37 | print(filing.name) 38 | 39 | # initialize XBRL parser and populate an attribute called fields with a dict 40 | # of 50 common terms 41 | x = filing.xbrl() 42 | 43 | print(x.fields['FiscalYear']) 44 | 45 | print(x.fields) 46 | 47 | # fetch arbitrary XBRL tags representing eiter an Instant or a Duration in time 48 | print('Tax rate', 49 | x.GetFactValue('us-gaap:EffectiveIncomeTaxRateContinuingOperations', 'Duration')) 50 | 51 | if x.loadYear(1): 52 | # Most 10-Ks have two or three previous years contained in them for the major values. 53 | # This call switches the contexts to the prior year (set it to 2 or 3 instead of 1 to 54 | # go back further) and reloads the fundamental concepts. 55 | # Any calls to GetFactValue will use that year's value from that point on. 56 | 57 | print(x.fields['FiscalYear']) 58 | 59 | print(x.fields) 60 | 61 | print('Tax rate', 62 | x.GetFactValue('us-gaap:EffectiveIncomeTaxRateContinuingOperations', 'Duration')) 63 | 64 | def test_sec_import_index_attrs(self): 65 | 66 | self.assertEqual(models.Index.objects.all().count(), 0) 67 | self.assertEqual(models.Company.objects.all().count(), 0) 68 | self.assertEqual(models.Attribute.objects.all().count(), 0) 69 | 70 | # Download index file. 71 | out = six.StringIO() 72 | call_command( 73 | 'sec_import_index', 74 | start_year='2016',#str(date.today().year-1), 75 | max_lines='20', 76 | quarter='1', 77 | traceback=True, 78 | dryrun=False, 79 | stdout=out) 80 | out = out.getvalue() 81 | print(out) 82 | self.assertTrue('error' not in out.lower()) 83 | 84 | # The index import creates company records and index records for each company. 85 | self.assertTrue(models.Index.objects.all().count() > 0) 86 | self.assertTrue(models.Company.objects.all().count() > 0) 87 | self.assertEqual(models.Attribute.objects.all().count(), 0) 88 | self.assertEqual(models.AttributeValue.objects.all().count(), 0) 89 | 90 | # However, by default, companies aren't marked to download any attributes. 91 | self.assertTrue(models.Company.objects.filter(load=True).count() == 0) 92 | self.assertEqual(models.Attribute.objects.filter(load=True).count(), 0) 93 | 94 | # _fn = '/tmp/django_sec/company_2016_1.zip' 95 | # try: 96 | # os.remove(_fn) 97 | # except OSError: 98 | # pass 99 | # self.assertTrue(not os.path.isfile(_fn)) 100 | # shutil.copy('django_sec/fixtures/company_2016_1.zip', _fn) 101 | # self.assertTrue(os.path.isfile(_fn)) 102 | 103 | # Extract attributes from all downloaded indexes. 104 | out = six.StringIO() 105 | call_command( 106 | 'sec_import_attrs', 107 | start_year=str(date.today().year-1), 108 | verbose=True, 109 | traceback=True, 110 | stdout=out) 111 | out = out.getvalue() 112 | print(out) 113 | self.assertTrue('error' not in out.lower()) 114 | 115 | # No company or attribute was marked to load, so nothing should have been created. 116 | self.assertEqual(models.Attribute.objects.all().count(), 0) 117 | self.assertEqual(models.AttributeValue.objects.all().count(), 0) 118 | 119 | # Mark companies for loading attributes. 120 | models.Company.objects.all().update(load=True) 121 | # Extract attributes from all downloaded indexes. 122 | out = six.StringIO() 123 | call_command( 124 | 'sec_import_attrs', 125 | start_year=str(date.today().year-1), 126 | verbose=True, 127 | traceback=True, 128 | stdout=out) 129 | out = out.getvalue() 130 | print(out) 131 | self.assertTrue('error' not in out.lower()) 132 | 133 | # Many attribute and attribute value records should have been loaded. 134 | self.assertTrue(models.Attribute.objects.all().count() > 0) 135 | self.assertTrue(models.AttributeValue.objects.all().count() > 0) 136 | 137 | def _test_sec_xbrl_to_csv(self): 138 | call_command('sec_xbrl_to_csv') 139 | 140 | def test_sec_mark_units(self): 141 | unit, _ = models.Unit.objects.get_or_create(name='U_iso4217USD') 142 | # In Django >= 1.9, you can't set a self-referential field during creation. 143 | unit.save() 144 | self.assertTrue(unit.true_unit) 145 | self.assertEqual(unit.true_unit, unit) 146 | 147 | call_command('sec_mark_units') 148 | 149 | def test_search(self): 150 | #client = Client() 151 | user = User.objects.create(username='testuser', is_active=True, is_staff=True, is_superuser=True) 152 | user.set_password('12345') 153 | user.save() 154 | #self.client.force_login(user) 155 | self.client.login(username=user.username, password='12345') 156 | 157 | response = self.client.get('/admin/django_sec/company/') 158 | self.assertEqual(response.status_code, 200) 159 | 160 | response = self.client.get('/admin/django_sec/company/?q=abc') 161 | self.assertEqual(response.status_code, 200) 162 | 163 | response = self.client.get('/admin/django_sec/index/') 164 | self.assertEqual(response.status_code, 200) 165 | 166 | response = self.client.get('/admin/django_sec/index/?q=abc') 167 | self.assertEqual(response.status_code, 200) 168 | 169 | response = self.client.get('/admin/django_sec/attribute/') 170 | self.assertEqual(response.status_code, 200) 171 | 172 | response = self.client.get('/admin/django_sec/attribute/?q=abc') 173 | self.assertEqual(response.status_code, 200) 174 | 175 | response = self.client.get('/admin/django_sec/attributevalue/') 176 | self.assertEqual(response.status_code, 200) 177 | 178 | response = self.client.get('/admin/django_sec/attributevalue/?q=abc') 179 | self.assertEqual(response.status_code, 200) 180 | 181 | response = self.client.get('/admin/django_sec/namespace/') 182 | self.assertEqual(response.status_code, 200) 183 | 184 | response = self.client.get('/admin/django_sec/namespace/?q=abc') 185 | self.assertEqual(response.status_code, 200) 186 | 187 | response = self.client.get('/admin/django_sec/unit/') 188 | self.assertEqual(response.status_code, 200) 189 | 190 | response = self.client.get('/admin/django_sec/unit/?q=abc') 191 | self.assertEqual(response.status_code, 200) 192 | -------------------------------------------------------------------------------- /django_sec/tests/urls.py: -------------------------------------------------------------------------------- 1 | try: 2 | # Removed in Django 1.6 3 | from django.conf.urls.defaults import url, include 4 | except ImportError: 5 | from django.conf.urls import url, include 6 | 7 | try: 8 | # Relocated in Django 1.6 9 | from django.conf.urls.defaults import patterns 10 | except ImportError: 11 | # Completely removed in Django 1.10 12 | try: 13 | from django.conf.urls import patterns 14 | except ImportError: 15 | patterns = None 16 | 17 | from django.contrib import admin 18 | 19 | admin.autodiscover() 20 | 21 | _patterns = [ 22 | url(r'^admin/', include(admin.site.urls)), 23 | ] 24 | 25 | if patterns is None: 26 | urlpatterns = _patterns 27 | else: 28 | urlpatterns = patterns('', *_patterns) 29 | -------------------------------------------------------------------------------- /django_sec/tests/views.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisspen/django-sec/184c3308b943971f75109c7ff2aeddc12ab1f58e/django_sec/tests/views.py -------------------------------------------------------------------------------- /django_sec/utils.py: -------------------------------------------------------------------------------- 1 | import re 2 | from datetime import date 3 | 4 | import dateutil.parser 5 | import six 6 | from six.moves import urllib as urllib2 7 | 8 | try: 9 | from fake_useragent import UserAgent 10 | except ImportError: 11 | UserAgent = None 12 | 13 | def get_user_agent(): 14 | if UserAgent: 15 | ua = UserAgent() 16 | return ua.random 17 | else: 18 | return 'Python-urllib/2.7/Django-SEC' 19 | 20 | def str_to_date(s): 21 | s = str(s).strip() 22 | if not s: 23 | return 24 | dt = dateutil.parser.parse(s) 25 | if not dt: 26 | return 27 | return date(dt.year, dt.month, dt.day) 28 | 29 | def lookup_cik(ticker, name=None): 30 | """ 31 | Given a ticker symbol, retrieves the CIK. 32 | """ 33 | ticker = ticker.strip().upper() 34 | 35 | # First try the SEC. In theory, should for all known symbols, even 36 | # deactivated ones. In practice, fails to work for many, even active ones. 37 | url = ( 38 | 'http://www.sec.gov/cgi-bin/browse-edgar?' 39 | 'CIK={cik}&' 40 | 'owner=exclude&' 41 | 'Find=Find+Companies&' 42 | 'action=getcompany' 43 | ).format(cik=ticker) 44 | request = urllib2.Request(url=url, headers={'User-agent':get_user_agent()}) 45 | response = urllib2.urlopen(request) 46 | data = response.read() 47 | try: 48 | match = re.finditer(r'CIK=([0-9]+)', data).next() 49 | return match.group().split('=')[-1] 50 | except StopIteration: 51 | pass 52 | 53 | # Next, try SEC's other CIK lookup form. 54 | # It doesn't always work with just the ticker, so we also need to pass in 55 | # company name but it's the next most accurate after the first. 56 | # Unfortunately, this search is sensitive to punctuation in the company 57 | # name, which we might not have stored correctly. 58 | # So we start searching with everything we have, and then backoff to widen 59 | # the search. 60 | name = (name or '').strip() 61 | name = ''.join(_ for _ in (name or '').strip() if ord(_) < 128) 62 | if name: 63 | name_parts = name.split(' ') 64 | for i in six.moves.xrange(len(name_parts)): 65 | url = 'http://www.sec.gov/cgi-bin/cik.pl.c?company={company}'\ 66 | .format(company='+'.join(name_parts[:-(i+1)])) 67 | # response = urllib2.urlopen(url) 68 | request = urllib2.Request(url=url, headers={'User-agent':get_user_agent()}) 69 | response = urllib2.urlopen(request) 70 | data = response.read() 71 | matches = re.findall(r'CIK=([0-9]+)', data) 72 | if len(matches) == 1: 73 | return matches[0] 74 | 75 | # If the SEC search doesn't find anything, then try Yahoo. 76 | # Should work for all active symbols, but won't work for any deactive 77 | # symbols. 78 | url = 'http://finance.yahoo.com/q/sec?s={symbol}+SEC+Filings'.format(symbol=ticker) 79 | #print('url2:',url 80 | # response = urllib2.urlopen(url) 81 | request = urllib2.Request(url=url, headers={'User-agent':get_user_agent()}) 82 | response = urllib2.urlopen(request) 83 | data = response.read() 84 | try: 85 | match = re.finditer(r'search/\?cik=([0-9]+)', data).next() 86 | return match.group().split('=')[-1] 87 | except StopIteration: 88 | pass 89 | -------------------------------------------------------------------------------- /django_sec/xbrl.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from lxml import etree 4 | 5 | from .xbrl_fundamentals import FundamentantalAccountingConcepts 6 | from . import constants as c 7 | from . import utils 8 | 9 | class XBRL: 10 | 11 | def __init__(self, XBRLInstanceLocation, opener=None): 12 | self.XBRLInstanceLocation = XBRLInstanceLocation 13 | self.fields = {} 14 | 15 | if opener: 16 | # Allow us to read directly from a ZIP archive without extracting 17 | # the whole thing. 18 | self.EntireInstanceDocument = opener(XBRLInstanceLocation, 'r').read() 19 | else: 20 | self.EntireInstanceDocument = open(XBRLInstanceLocation, 'r').read() 21 | 22 | self.oInstance = etree.fromstring(self.EntireInstanceDocument) 23 | self.ns = {} 24 | for k in self.oInstance.nsmap.keys(): 25 | if k != None: 26 | self.ns[k] = self.oInstance.nsmap[k] 27 | self.ns['xbrli'] = 'http://www.xbrl.org/2003/instance' 28 | self.ns['xlmns'] = 'http://www.xbrl.org/2003/instance' 29 | self.GetBaseInformation() 30 | self.loadYear(0) 31 | 32 | self._context_start_dates = {} 33 | self._context_end_dates = {} 34 | 35 | def loadYear(self, yearminus=0): 36 | currentEnd = self.getNode("//dei:DocumentPeriodEndDate").text 37 | asdate = re.match(r'\s*(\d{4})-(\d{2})-(\d{2})\s*', currentEnd) 38 | if asdate: 39 | year = int(asdate.groups()[0]) - yearminus 40 | thisend = '%s-%s-%s' % (year, asdate.groups()[1], asdate.groups()[2]) 41 | self.GetCurrentPeriodAndContextInformation(thisend) 42 | FundamentantalAccountingConcepts(self) 43 | return True 44 | else: 45 | print(currentEnd, ' is not a date') 46 | return False 47 | 48 | def getNodeList(self, xpath, root=None): 49 | if root is None: 50 | root = self.oInstance 51 | oNodelist = root.xpath(xpath, namespaces=self.ns) 52 | return oNodelist 53 | 54 | def getNode(self, xpath, root=None): 55 | oNodelist = self.getNodeList(xpath, root) 56 | if len(oNodelist): 57 | return oNodelist[0] 58 | return None 59 | 60 | def iter_namespace(self, ns='us-gaap'): 61 | """ 62 | Iterates over all namespace elements, yielding each one. 63 | """ 64 | SeekConcept = '%s:*' % (ns,) 65 | node_list = self.getNodeList("//" + SeekConcept) 66 | total = len(node_list) 67 | for node in node_list: 68 | yield node, total 69 | 70 | def GetFactValue(self, SeekConcept, ConceptPeriodType): 71 | 72 | factValue = None 73 | 74 | if ConceptPeriodType == c.INSTANT: 75 | ContextReference = self.fields['ContextForInstants'] 76 | elif ConceptPeriodType == c.DURATION: 77 | ContextReference = self.fields['ContextForDurations'] 78 | else: 79 | #An error occured 80 | return "CONTEXT ERROR" 81 | 82 | if not ContextReference: 83 | return None 84 | 85 | oNode = self.getNode("//" + SeekConcept + "[@contextRef='" + ContextReference + "']") 86 | if oNode is not None: 87 | factValue = oNode.text 88 | if 'nil' in oNode.keys() and oNode.get('nil') == 'true': 89 | factValue = 0 90 | #set the value to ZERO if it is nil 91 | #if type(factValue)==str: 92 | try: 93 | factValue = float(factValue) 94 | except (ValueError, TypeError): 95 | #print('couldnt convert %s=%s to string' % (SeekConcept,factValue) 96 | factValue = None 97 | 98 | return factValue 99 | 100 | def GetBaseInformation(self): 101 | 102 | #Registered Name 103 | oNode = self.getNode("//dei:EntityRegistrantName[@contextRef]") 104 | if oNode is not None: 105 | self.fields['EntityRegistrantName'] = oNode.text 106 | else: 107 | self.fields['EntityRegistrantName'] = "Registered name not found" 108 | 109 | #Fiscal year 110 | oNode = self.getNode("//dei:CurrentFiscalYearEndDate[@contextRef]") 111 | if oNode is not None: 112 | self.fields['FiscalYear'] = oNode.text 113 | else: 114 | self.fields['FiscalYear'] = "Fiscal year not found" 115 | 116 | #EntityCentralIndexKey 117 | oNode = self.getNode("//dei:EntityCentralIndexKey[@contextRef]") 118 | if oNode is not None: 119 | self.fields['EntityCentralIndexKey'] = oNode.text 120 | else: 121 | self.fields['EntityCentralIndexKey'] = "CIK not found" 122 | 123 | #EntityFilerCategory 124 | oNode = self.getNode("//dei:EntityFilerCategory[@contextRef]") 125 | if oNode is not None: 126 | self.fields['EntityFilerCategory'] = oNode.text 127 | else: 128 | self.fields['EntityFilerCategory'] = "Filer category not found" 129 | 130 | #TradingSymbol 131 | oNode = self.getNode("//dei:TradingSymbol[@contextRef]") 132 | if oNode is not None: 133 | self.fields['TradingSymbol'] = oNode.text 134 | else: 135 | self.fields['TradingSymbol'] = None 136 | 137 | #DocumentFiscalYearFocus 138 | oNode = self.getNode("//dei:DocumentFiscalYearFocus[@contextRef]") 139 | if oNode is not None: 140 | self.fields['DocumentFiscalYearFocus'] = oNode.text 141 | else: 142 | self.fields['DocumentFiscalYearFocus'] = "Fiscal year focus not found" 143 | 144 | #DocumentFiscalPeriodFocus 145 | oNode = self.getNode("//dei:DocumentFiscalPeriodFocus[@contextRef]") 146 | if oNode is not None: 147 | self.fields['DocumentFiscalPeriodFocus'] = oNode.text 148 | else: 149 | self.fields['DocumentFiscalPeriodFocus'] = "Fiscal period focus not found" 150 | 151 | #DocumentType 152 | oNode = self.getNode("//dei:DocumentType[@contextRef]") 153 | if oNode is not None: 154 | self.fields['DocumentType'] = oNode.text 155 | else: 156 | self.fields['DocumentType'] = "Fiscal period focus not found" 157 | 158 | def get_context_start_date(self, context_id): 159 | if context_id not in self._context_start_dates: 160 | node = self.getNode( 161 | "//xbrli:context[@id='" + context_id + "']/xbrli:period/xbrli:startDate") 162 | if node is None: 163 | node = self.getNode( 164 | "//xbrli:context[@id='" + context_id + "']/xbrli:period/xbrli:instant") 165 | dt = None 166 | if node is not None and node.text: 167 | #dt = date(*map(int, node.text.split('-'))) 168 | dt = utils.str_to_date(node.text) 169 | self._context_start_dates[context_id] = dt 170 | return self._context_start_dates[context_id] 171 | 172 | def get_context_end_date(self, context_id): 173 | if context_id not in self._context_end_dates: 174 | node = self.getNode( 175 | "//xbrli:context[@id='" + context_id + "']/xbrli:period/xbrli:endDate") 176 | dt = None 177 | if node is not None and node.text: 178 | #dt = date(*map(int, node.text.split('-'))) 179 | dt = utils.str_to_date(node.text) 180 | self._context_end_dates[context_id] = dt 181 | return self._context_end_dates[context_id] 182 | 183 | def GetCurrentPeriodAndContextInformation(self, EndDate): 184 | # Figures out the current period and contexts for the current 185 | # period instance/duration contexts 186 | 187 | self.fields['BalanceSheetDate'] = "ERROR" 188 | self.fields['IncomeStatementPeriodYTD'] = "ERROR" 189 | 190 | self.fields['ContextForInstants'] = "ERROR" 191 | self.fields['ContextForDurations'] = "ERROR" 192 | 193 | # This finds the period end date for the database table, 194 | # and instant date (for balance sheet): 195 | UseContext = "ERROR" 196 | #EndDate = self.getNode("//dei:DocumentPeriodEndDate").text 197 | #This is the or the 198 | 199 | # Uses the concept ASSETS to find the correct instance context 200 | # This finds the Context ID for that end date (has correct 201 | # date plus has no dimensions): 202 | # oNodelist2 = self.getNodeList( 203 | # "//us-gaap:Assets | " 204 | # "//us-gaap:AssetsCurrent | " 205 | # "//us-gaap:LiabilitiesAndStockholdersEquity") 206 | 207 | ContextForInstants = UseContext 208 | self.fields['ContextForInstants'] = ContextForInstants 209 | 210 | ###This finds the duration context 211 | ###This may work incorrectly for fiscal year ends because the dates cross calendar years 212 | #Get context ID of durations and the start date for the database table 213 | # oNodelist2 = self.getNodeList( 214 | # "//us-gaap:CashAndCashEquivalentsPeriodIncreaseDecrease | " 215 | # "//us-gaap:CashPeriodIncreaseDecrease | " 216 | # "//us-gaap:NetIncomeLoss | " 217 | # "//dei:DocumentPeriodEndDate") 218 | 219 | #StartDate = "ERROR" 220 | StartDateYTD = "2099-01-01" 221 | UseContext = "ERROR" 222 | 223 | #Balance sheet date of current period 224 | self.fields['BalanceSheetDate'] = EndDate 225 | 226 | #MsgBox "Instant context is: " + ContextForInstants 227 | if ContextForInstants == "ERROR": 228 | #MsgBox "Looking for alternative instance context" 229 | 230 | ContextForInstants = self.LookForAlternativeInstanceContext() 231 | self.fields['ContextForInstants'] = ContextForInstants 232 | 233 | 234 | #Income statement date for current fiscal year, year to date 235 | self.fields['IncomeStatementPeriodYTD'] = StartDateYTD 236 | 237 | ContextForDurations = UseContext 238 | self.fields['ContextForDurations'] = ContextForDurations 239 | 240 | def LookForAlternativeInstanceContext(self): 241 | #This deals with the situation where no instance context has no dimensions 242 | #Finds something 243 | 244 | something = None 245 | 246 | #See if there are any nodes with the document period focus date 247 | oNodeList_Alt = self.getNodeList( 248 | "//xbrli:context[xbrli:period/xbrli:instant='" + \ 249 | self.fields['BalanceSheetDate'] + "']") 250 | 251 | #MsgBox "Node list length: " + oNodeList_Alt.length 252 | for oNode_Alt in oNodeList_Alt: 253 | #Found possible contexts 254 | #MsgBox oNode_Alt.selectSingleNode("@id").text 255 | something = self.getNode("//us-gaap:Assets[@contextRef='" + oNode_Alt.get("id") + "']") 256 | if something is not None: 257 | #MsgBox "Use this context: " + oNode_Alt.selectSingleNode("@id").text 258 | return oNode_Alt.get("id") 259 | -------------------------------------------------------------------------------- /django_sec/xbrl_fundamentals.py: -------------------------------------------------------------------------------- 1 | 2 | class FundamentantalAccountingConcepts: 3 | 4 | def __init__(self, xbrl): 5 | 6 | self.xbrl = xbrl 7 | 8 | #self.xbrl.fields['Assets'] 9 | 10 | #TODO:parse fixtures/normalized_fields.csv 11 | -------------------------------------------------------------------------------- /pep8.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | pylint --rcfile=pylint.rc django_sec setup.py 3 | -------------------------------------------------------------------------------- /pip-requirements-min-django.txt: -------------------------------------------------------------------------------- 1 | Django>=1.5 2 | -------------------------------------------------------------------------------- /pip-requirements-test.txt: -------------------------------------------------------------------------------- 1 | tox>=2.0.0 2 | -------------------------------------------------------------------------------- /pip-requirements.txt: -------------------------------------------------------------------------------- 1 | python-dateutil>=2.2 2 | lxml==3.7.2 3 | six>=1.7.2 4 | wget==3.2 5 | -------------------------------------------------------------------------------- /pylint.rc: -------------------------------------------------------------------------------- 1 | # lint Python modules using external checkers. 2 | # 3 | # This is the main checker controlling the other ones and the reports 4 | # generation. It is itself both a raw checker and an astng checker in order 5 | # to: 6 | # * handle message activation / deactivation at the module level 7 | # * handle some basic but necessary stats'data (number of classes, methods...) 8 | # 9 | [MASTER] 10 | 11 | # Specify a configuration file. 12 | #rcfile= 13 | 14 | # Python code to execute, usually for sys.path manipulation such as 15 | # pygtk.require(). 16 | #init-hook= 17 | 18 | # Profiled execution. 19 | profile=no 20 | 21 | # Add to the black list. It should be a base name, not a 22 | # path. You may set this option multiple times. 23 | # Ignore all auto-generated South migration directories. 24 | ignore=migrations,south_migrations 25 | 26 | # Pickle collected data for later comparisons. 27 | persistent=yes 28 | 29 | # Set the cache size for astng objects. 30 | cache-size=500 31 | 32 | # List of plugins (as comma separated values of python modules names) to load, 33 | # usually to register additional checkers. 34 | load-plugins= 35 | 36 | [MESSAGES CONTROL] 37 | 38 | # Enable only checker(s) with the given id(s). This option conflicts with the 39 | # disable-checker option 40 | #enable-checker= 41 | 42 | # Enable all checker(s) except those with the given id(s). This option 43 | # conflicts with the enable-checker option 44 | #disable-checker= 45 | 46 | # Enable all messages in the listed categories (IRCWEF). 47 | #enable-msg-cat= 48 | 49 | # Disable all messages in the listed categories (IRCWEF). 50 | disable-msg-cat=I 51 | 52 | # Enable the message(s) with the given id(s). 53 | #enable-msg= 54 | 55 | #http://docs.pylint.org/features.html 56 | #http://pylint-messages.wikidot.com/all-codes 57 | #pylint --list-msgs > pylint.messages 58 | 59 | # All these are disabled below. 60 | # C1001: old-style class defined (Django uses these for Meta options) 61 | # C0103: variable regex check. 62 | # C0111: missing docstring check. It's too vague. Complains about no docstrings in __init__ and other places we don't care about. 63 | # C0303: Trailing whitespace. 64 | # C0330: bad-continuation 65 | # E1101: member check...this is usually wrong. 66 | # E1103: type inference...this is usually wrong. 67 | # F0401: unable to import 68 | # R0201: method should be function check. 69 | # R0401: cyclic import check...because sometimes it's wrong. 70 | # R0902: too many instance attributes check. 71 | # R0903: too few public methods check...makes no sense with Django. 72 | # R0904: too many public method check. 73 | # R0913: too many argument check. 74 | # R0921: abstract class not referenced check. 75 | # W0104: no effect check. 76 | # W0142: magic check. 77 | # W0212: protected data check. 78 | # W0232: __init__ check. 79 | # W0311: bad-indentation 80 | # W0401: wildcard import. 81 | # W0404: reimport check...this is sometimes wrong. 82 | # W0511: TODO check. 83 | # W0613: unused argument check. Too vague. 84 | # W0614: wildcard import usage check. 85 | # W0704: empty except check. 86 | # E1002: Use of super on an old style class 87 | # E1120: No value for argument 88 | # R0901: Too many ancestors 89 | # E1123: Unexpected keyword argument %r in %s call 90 | # C0302: *Too many lines in module (%s)* 91 | # R0801: *Similar lines in %s files* 92 | # R0914: *Too many local variables (%s/%s)* 93 | # R0912: *Too many branches (%s/%s)* 94 | # R0915: *Too many statements (%s/%s)* 95 | # W0703: *Catching too general exception %s* 96 | # E1003: *Bad first argument %r given to super()* 97 | # E0202: *An attribute defined in %s line %s hides this method* 98 | # W0201: *Attribute %r defined outside __init__* 99 | # W0221: *Arguments number differs from %s method* 100 | # C0325: *Unnecessary parens after %r keyword* 101 | # R0916: too-many-boolean-expressions 102 | # R0204: *Redefinition of %s type from %s to %s* 103 | # R0101: *Too many nested blocks (%s/%s)* 104 | # I0011: *Locally disabling %s (%s)* 105 | # W1001: *Use of "property" on an old style class* 106 | disable=C1001,C0103,R0201,W0212,W0614,W0401,W0704,E1101,W0142,R0904,R0913,W0404,R0903,W0232,C0111,W0613,W0511,W0104,R0902,R0921,R0401,E1103,C0303,W0311,C0330,F0401,E1002,E1120,R0901,E1123,C0302,R0801,R0914,R0912,R0915,W0703,E1003,E0202,W0201,W0221,C0325,R0916,R0204,R0101,I0011,W1001,blacklisted-name 107 | 108 | [REPORTS] 109 | 110 | # Set the output format. Available formats are text, parseable, colorized, msvs 111 | # (visual studio) and html 112 | output-format=text 113 | 114 | # Include message's id in output 115 | include-ids=yes 116 | 117 | # Put messages in a separate file for each module / package specified on the 118 | # command line instead of printing them on stdout. Reports (if any) will be 119 | # written in a file name "pylint_global.[txt|html]". 120 | files-output=no 121 | 122 | # Tells whether to display a full report or only the messages 123 | reports=yes 124 | 125 | # Python expression which should return a note less than 10 (10 is the highest 126 | # note). You have access to the variables errors warning, statement which 127 | # respectively contain the number of errors / warnings messages and the total 128 | # number of statements analyzed. This is used by the global evaluation report 129 | # (R0004). 130 | evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) 131 | 132 | # Add a comment according to your evaluation note. This is used by the global 133 | # evaluation report (R0004). 134 | comment=no 135 | 136 | # Enable the report(s) with the given id(s). 137 | #enable-report= 138 | 139 | # Disable the report(s) with the given id(s). 140 | #disable-report= 141 | 142 | 143 | # checks for : 144 | # * doc strings 145 | # * modules / classes / functions / methods / arguments / variables name 146 | # * number of arguments, local variables, branches, returns and statements in 147 | # functions, methods 148 | # * required module attributes 149 | # * dangerous default values as arguments 150 | # * redefinition of function / method / class 151 | # * uses of the global statement 152 | # 153 | [BASIC] 154 | 155 | # Regular expression which should only match functions or classes name which do 156 | # not require a docstring 157 | no-docstring-rgx=__.*__ 158 | 159 | # Regular expression which should only match correct module names 160 | module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ 161 | 162 | # Regular expression which should only match correct module level names 163 | const-rgx=(([A-Z_][A-Z0-9_]*)|(__.*__))$ 164 | 165 | # Regular expression which should only match correct class names 166 | class-rgx=[A-Z_][a-zA-Z0-9]+$ 167 | 168 | # Regular expression which should only match correct function names 169 | function-rgx=[a-z_][a-z0-9_]{2,30}$ 170 | 171 | # Regular expression which should only match correct method names 172 | method-rgx=[a-z_][a-z0-9_]{2,30}$ 173 | 174 | # Regular expression which should only match correct instance attribute names 175 | attr-rgx=[a-z_][a-z0-9_]{2,30}$ 176 | 177 | # Regular expression which should only match correct argument names 178 | argument-rgx=[a-z_][a-z0-9_]{2,30}$ 179 | 180 | # Regular expression which should only match correct variable names 181 | variable-rgx=[a-z_][a-z0-9_]{2,30}$ 182 | 183 | # Regular expression which should only match correct list comprehension / 184 | # generator expression variable names 185 | inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$ 186 | 187 | # Good variable names which should always be accepted, separated by a comma 188 | good-names=i,j,k,ex,Run,_ 189 | 190 | # Bad variable names which should always be refused, separated by a comma 191 | bad-names=foo,bar,baz,toto,tutu,tata 192 | 193 | # List of builtins function names that should not be used, separated by a comma 194 | bad-functions=map,filter,apply,input 195 | 196 | 197 | # try to find bugs in the code using type inference 198 | # 199 | [TYPECHECK] 200 | 201 | # Tells whether missing members accessed in mixin class should be ignored. A 202 | # mixin class is detected if its name ends with "mixin" (case insensitive). 203 | ignore-mixin-members=yes 204 | 205 | # List of classes names for which member attributes should not be checked 206 | # (useful for classes with attributes dynamically set). 207 | ignored-classes=SQLObject 208 | 209 | # When zope mode is activated, add a predefined set of Zope acquired attributes 210 | # to generated-members. 211 | zope=no 212 | 213 | # List of members which are set dynamically and missed by pylint inference 214 | # system, and so shouldn't trigger E0201 when accessed. 215 | generated-members=REQUEST,acl_users,aq_parent 216 | 217 | 218 | # checks for 219 | # * unused variables / imports 220 | # * undefined variables 221 | # * redefinition of variable from builtins or from an outer scope 222 | # * use of variable before assignment 223 | # 224 | [VARIABLES] 225 | 226 | # Tells whether we should check for unused import in __init__ files. 227 | init-import=no 228 | 229 | # A regular expression matching names used for dummy variables (i.e. not used). 230 | dummy-variables-rgx=_|dummy 231 | 232 | # List of additional names supposed to be defined in builtins. Remember that 233 | # you should avoid to define new builtins when possible. 234 | additional-builtins= 235 | 236 | 237 | # checks for 238 | # * external modules dependencies 239 | # * relative / wildcard imports 240 | # * cyclic imports 241 | # * uses of deprecated modules 242 | # 243 | [IMPORTS] 244 | 245 | # Deprecated modules which should not be used, separated by a comma 246 | deprecated-modules=regsub,string,TERMIOS,Bastion,rexec 247 | 248 | # Create a graph of every (i.e. internal and external) dependencies in the 249 | # given file (report R0402 must not be disabled) 250 | import-graph= 251 | 252 | # Create a graph of external dependencies in the given file (report R0402 must 253 | # not be disabled) 254 | ext-import-graph= 255 | 256 | # Create a graph of internal dependencies in the given file (report R0402 must 257 | # not be disabled) 258 | int-import-graph= 259 | 260 | 261 | # checks for sign of poor/misdesign: 262 | # * number of methods, attributes, local variables... 263 | # * size, complexity of functions, methods 264 | # 265 | [DESIGN] 266 | 267 | # Maximum number of arguments for function / method 268 | max-args=5 269 | 270 | # Maximum number of locals for function / method body 271 | max-locals=15 272 | 273 | # Maximum number of return / yield for function / method body 274 | max-returns=6 275 | 276 | # Maximum number of branch for function / method body 277 | max-branchs=12 278 | 279 | # Maximum number of statements in function / method body 280 | max-statements=50 281 | 282 | # Maximum number of parents for a class (see R0901). 283 | max-parents=7 284 | 285 | # Maximum number of attributes for a class (see R0902). 286 | max-attributes=7 287 | 288 | # Minimum number of public methods for a class (see R0903). 289 | min-public-methods=2 290 | 291 | # Maximum number of public methods for a class (see R0904). 292 | max-public-methods=20 293 | 294 | 295 | # checks for : 296 | # * methods without self as first argument 297 | # * overridden methods signature 298 | # * access only to existent members via self 299 | # * attributes not defined in the __init__ method 300 | # * supported interfaces implementation 301 | # * unreachable code 302 | # 303 | [CLASSES] 304 | 305 | # List of interface methods to ignore, separated by a comma. This is used for 306 | # instance to not check methods defines in Zope's Interface base class. 307 | #ignore-iface-methods=isImplementedBy,deferred,extends,names,namesAndDescriptions,queryDescriptionFor,getBases,getDescriptionFor,getDoc,getName,getTaggedValue,getTaggedValueTags,isEqualOrExtendedBy,setTaggedValue,isImplementedByInstancesOf,adaptWith,is_implemented_by 308 | 309 | # List of method names used to declare (i.e. assign) instance attributes. 310 | defining-attr-methods=__init__,__new__,setUp 311 | 312 | 313 | # checks for similarities and duplicated code. This computation may be 314 | # memory / CPU intensive, so you should disable it if you experiments some 315 | # problems. 316 | # 317 | [SIMILARITIES] 318 | 319 | # Minimum lines number of a similarity. 320 | min-similarity-lines=4 321 | 322 | # Ignore comments when computing similarities. 323 | ignore-comments=yes 324 | 325 | # Ignore docstrings when computing similarities. 326 | ignore-docstrings=yes 327 | 328 | 329 | # checks for : 330 | # * unauthorized constructions 331 | # * strict indentation 332 | # * line length 333 | # * use of <> instead of != 334 | # 335 | [FORMAT] 336 | 337 | # Maximum number of characters on a single line. 338 | max-line-length=160 339 | 340 | # Maximum number of lines in a module 341 | max-module-lines=1000 342 | 343 | # String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 344 | # tab). 345 | indent-string=' ' 346 | 347 | 348 | # checks for: 349 | # * warning notes in the code like FIXME, XXX 350 | # * PEP 263: source code with non ascii character but no encoding declaration 351 | # 352 | [MISCELLANEOUS] 353 | 354 | # List of note tags to take in consideration, separated by a comma. 355 | notes=FIXME,XXX,TODO 356 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os 3 | 4 | from setuptools import setup, find_packages 5 | 6 | import django_sec 7 | 8 | CURRENT_DIR = os.path.abspath(os.path.dirname(__file__)) 9 | 10 | def get_reqs(*fns): 11 | lst = [] 12 | for fn in fns: 13 | for package in open(os.path.join(CURRENT_DIR, fn)).readlines(): 14 | package = package.strip() 15 | if not package: 16 | continue 17 | lst.append(package.strip()) 18 | return lst 19 | 20 | setup( 21 | name="django-sec", 22 | version=django_sec.__version__, 23 | packages=find_packages(), 24 | # package_data={ 25 | # 'django_sec': [ 26 | # 'templates/*.*', 27 | # 'templates/*/*.*', 28 | # 'templates/*/*/*.*', 29 | # 'static/*.*', 30 | # 'static/*/*.*', 31 | # 'static/*/*/*.*', 32 | # ], 33 | # }, 34 | author="Chris Spencer", 35 | author_email="chrisspen@gmail.com", 36 | description="Parse XBRL filings from the SEC's EDGAR in Python", 37 | license="LGPL", 38 | url="https://github.com/chrisspen/django-sec", 39 | #https://pypi.python.org/pypi?%3Aaction=list_classifiers 40 | classifiers=[ 41 | 'License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3)', 42 | 'Development Status :: 4 - Beta', 43 | 'Environment :: Web Environment', 44 | 'Intended Audience :: Developers', 45 | 'Intended Audience :: Financial and Insurance Industry', 46 | 'Operating System :: OS Independent', 47 | 'Programming Language :: Python', 48 | 'Programming Language :: Python :: 2.7', 49 | 'Programming Language :: Python :: 3', 50 | 'Programming Language :: Python :: 3.0', 51 | 'Programming Language :: Python :: 3.1', 52 | 'Programming Language :: Python :: 3.2', 53 | 'Programming Language :: Python :: 3.3', 54 | 'Programming Language :: Python :: 3.4', 55 | 'Programming Language :: Python :: 3.5', 56 | 'Framework :: Django', 57 | ], 58 | zip_safe=False, 59 | install_requires=get_reqs('pip-requirements-min-django.txt', 'pip-requirements.txt'), 60 | tests_require=get_reqs('pip-requirements-test.txt'), 61 | ) 62 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | # Note, several versions support Python 3.2, but Pip has dropped support, so we can't test them. 3 | # See https://github.com/travis-ci/travis-ci/issues/5485 4 | # Python3.3 and Python3.4 dropped in Ubuntu 16. 5 | envlist = py{27}-django{15,16},py{27}-django{17,18},py{27,35}-django{19},py{27,35}-django{110} 6 | recreate = True 7 | 8 | [testenv] 9 | basepython = 10 | py27: python2.7 11 | py32: python3.2 12 | py33: python3.3 13 | py34: python3.4 14 | py35: python3.5 15 | deps = 16 | -r{toxinidir}/pip-requirements.txt 17 | -r{toxinidir}/pip-requirements-test.txt 18 | django15: Django>=1.5,<1.6 19 | django16: Django>=1.6,<1.7 20 | django17: Django>=1.7,<1.8 21 | django18: Django>=1.8,<1.9 22 | django19: Django>=1.9,<1.10 23 | django110: Django>=1.10,<2 24 | commands = django-admin.py test --traceback --settings=django_sec.tests.settings django_sec.tests.tests.Tests{env:TESTNAME:} 25 | 26 | # Django 1.5 uses a different test module lookup mechanism, so it needs a different command. 27 | [testenv:py27-django15] 28 | commands = django-admin.py test --traceback --settings=django_sec.tests.settings tests.Tests{env:TESTNAME:} 29 | [testenv:py33-django15] 30 | commands = django-admin.py test --traceback --settings=django_sec.tests.settings tests.Tests{env:TESTNAME:} 31 | [testenv:py34-django15] 32 | commands = django-admin.py test --traceback --settings=django_sec.tests.settings tests.Tests{env:TESTNAME:} 33 | --------------------------------------------------------------------------------