├── .gitignore ├── LICENSE ├── MANIFEST.in ├── README.rst ├── __init__.py ├── csvimport ├── __init__.py ├── admin.py ├── app.py ├── make_model.py ├── management │ ├── __init__.py │ └── commands │ │ ├── __init__.py │ │ ├── importcsv.py │ │ └── inspectcsv.py ├── messytables │ ├── README.md │ ├── __init__.py │ ├── dateparser.py │ └── types.py ├── migrations │ ├── 0001_initial.py │ └── __init__.py ├── models.py ├── parser.py ├── settings.py ├── signals.py ├── tests │ ├── README.txt │ ├── __init__.py │ ├── admin.py │ ├── admin_tests.py │ ├── constraint_tests.py │ ├── fixtures │ │ ├── bad_country.csv │ │ ├── countries.csv │ │ ├── issue98.csv │ │ ├── test_broken_rows.csv │ │ ├── test_char.csv │ │ ├── test_char2.csv │ │ ├── test_duplicate.csv │ │ ├── test_headless.csv │ │ ├── test_new_model.csv │ │ ├── test_number.csv │ │ ├── test_plain.csv │ │ ├── test_quoted.csv │ │ ├── test_single_row.csv │ │ └── test_tab.csv │ ├── issue_tests.py │ ├── log_tests.py │ ├── migrations │ │ ├── 0001_initial.py │ │ ├── 0002_country_issue98_item_organisation_unitofmeasure.py │ │ └── __init__.py │ ├── models.py │ ├── optional_tests.py │ ├── parse_tests.py │ ├── performance_tests.py │ ├── testcase.py │ ├── testrunner.py │ ├── urls.py │ └── views.py └── wsgi.py ├── docs ├── HISTORY.txt └── test_script.sh ├── setup.cfg └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | django_csvimport.egg-info 2 | build 3 | dist 4 | *.pyc 5 | .pydevproject 6 | .project 7 | reports 8 | *-old 9 | MANIFEST 10 | csvimport/csvimport_test.log 11 | db.sqlite3 -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.rst 2 | include csvimport/__init__.py 3 | include __init__.py 4 | recursive-include csvimport * 5 | recursive-include docs * 6 | recursive-include csvimport/tests/fixtures * 7 | recursive-include csvimport/migrations * 8 | global-include *rst 9 | global-include *txt 10 | global-include *csv 11 | global-exclude *pyc -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | Django CSV Import 2 | ================= 3 | 4 | Ed Crewe - December 2023 5 | 6 | Overview 7 | -------- 8 | 9 | django-csvimport is a generic importer tool to allow the upload of CSV files for 10 | populating data. The egg installs an admin csvimport model that has a file upload field. 11 | Add a new csvimport and upload a comma separated values file or MS Excel file. 12 | 13 | The upload triggers the import mechanism which matches the header line of the files 14 | field names to the fields in the selected model. Importing any rows that include all required fields. 15 | Optionally required fields can be specified as part of the upload. 16 | By default duplicate value rows are not inserted. 17 | 18 | The import can also be run as a custom command, ie manage.py importcsv filename 19 | for possible use via cronjob etc. 20 | 21 | For CSV files import where their schema is unknown, and there is no existing model to import to, there 22 | is another command, inspectcsv, to generate the model code from the CSV file, guessing data types from the data 23 | using code from https://messytables.readthedocs.org 24 | 25 | The core import code was based on http://djangosnippets.org/snippets/633/ by Jonathan Holst. 26 | It adds character encoding handling, model field and column autodetection, admin interface, 27 | custom command etc. 28 | 29 | Version 3 - Dec 2023 30 | -------------------- 31 | 32 | #. Test with Django 5 and Python 3.12 33 | #. Incorporate messytables relevant code, the types.py file, since its no longer supported for Python 3.12 34 | #. Use dateparser for auto-handling a much wider range of date formats 35 | 36 | Version 2 - Sept 2014 37 | --------------------- 38 | 39 | #. New management command csvinspect to generate models from CSV files 40 | #. General code refactor 41 | #. Management command renamed from csvimport to importcsv 42 | #. More features to cope with bad encoding and date types 43 | 44 | Version Compatibility 45 | --------------------- 46 | 47 | - version 3.0 tested with Django 5.0 Python 3.12 48 | - version 2.16 tested with Django 3.2.16 on Python 3.9.6 49 | - version 2.14 tested with Django 3.0.5 on Python 3.7.6, 3.8.2 50 | - version 2.13 was tested with Django 2.2.5 on Python 3.7.3 51 | - version 2.6 was tested with Django 1.7, 1.8, 1.9, 1.10, 1.11 on Python 2.7.13, Python 3.5.6 52 | 53 | Please use version 2.1, eg. pip install django-csvimport==2.1 54 | for Django versions prior to 1.7 55 | 56 | This Django >= 1.7 requirement is because django-csvimport uses the newly added AppConfig for versions > 2.1 57 | (NB: To fix this issue you could install django-appconf to django 1.6 or earlier 58 | and tweak csvimport to use it in csvimport.app) 59 | 60 | For really old Django versions < 1.4 you may have to dial back the versions until it works! 61 | 62 | Note that only versions > 2.2 are compatible with Python 3.4 63 | 64 | 65 | Installation instructions 66 | ------------------------- 67 | 68 | Add the following to the INSTALLED_APPS in the settings.py of your project: 69 | 70 | >>> pip install django-csvimport 71 | ... 72 | ... INSTALLED_APPS = ( 73 | ... ... 74 | ... 'csvimport.app.CSVImportConf', # use AppConfig for django >=1.7 csvimport >=2.2 75 | ... ) 76 | ... 77 | ... python manage.py migrate (or syncdb if django < 1.9) 78 | 79 | Note that migrate has the core tables in 0001_initial migration and test tables in 0002 so 80 | rm migrations/0002_test_models.py if you do not want these cluttering your database 81 | 82 | Custom commands 83 | --------------- 84 | 85 | INSPECTCSV 86 | 87 | manage.py inspectcsv importfile.csv > models.py 88 | 89 | This returns the code for a new models file with a guesstimated model for the CSV file. 90 | Add it to your app then run 91 | 92 | >>> makemigrations your_app 93 | >>> migrate 94 | 95 | You can then run the import to that model for importfile.csv 96 | 97 | NB: As it says its a guesstimate, you may have to manually tweak the generated models.py to get 98 | the import to work better. 99 | 100 | If there are no headings in the CSV file, then it just uses automated ones col_1, col_2 ... etc. 101 | 102 | IMPORTCSV 103 | 104 | (Please note this command used to be csvimport but that caused name clash issues with the module) 105 | 106 | manage.py importcsv --mappings='' --model='app_label.model_name' --delimiter='\t' importfile.csv 107 | 108 | For mappings enter a list of fields in order only if you dont have a header row 109 | with matching field names - or you want to override it, eg. 110 | 111 | --mappings = '1=shared_code,2=org(otherapp.Organisation|name),3=date' 112 | 113 | where (model|foreign key field) is used to specify relations if again, you want to 114 | override what would be looked up from your models. 115 | 116 | If you have no real field names in your csv file, then you can use 117 | --mappings='none' and it will assume the fields are named col_1, col_2 ... etc. 118 | 119 | Note that if you have a header row and specify mappings then it will treat the header as a data row, so delete it first. 120 | 121 | Admin interface import 122 | ---------------------- 123 | 124 | Just add a csvimport item, fill in the form and submit. 125 | Failed import rows are added to the log field. 126 | 127 | Demonstration installation instructions 128 | --------------------------------------- 129 | 130 | To see how it works, you can install a demo easily enough eg. via virtual environment, 131 | then use the tests settings to have some sample models for importing data, and the fixtures are sample csv files. 132 | 133 | - Run the following in your shell: 134 | 135 | >>> virtualenv mysite 136 | ... cd mysite 137 | ... pip install django 138 | ... pip install django-csvimport 139 | ... 140 | ... cat > bin/django-admin.py << EOF 141 | ... #!/usr/bin/env python 142 | ... from django.core import management 143 | ... import os 144 | ... os.environ["DJANGO_SETTINGS_MODULE"] = "csvimport.settings" 145 | ... if __name__ == "__main__": 146 | ... management.execute_from_command_line() 147 | ... EOF 148 | ... 149 | ... django-admin.py migrate 150 | ... django-admin.py runserver 151 | 152 | - Go to http://127.0.0.1:8000/admin/ in your browser - pay attention to the trailing / ! 153 | - Click on add Csvimport 154 | - Pick the django-csvimport/csvimport/tests/fixtures/countries.csv [1] and upload it 155 | - Check to see if the Country model is now populated. 156 | 157 | [1] also available from https://raw.github.com/edcrewe/django-importcsv/master/importcsv/tests/fixtures/countries.csv 158 | 159 | Alternatively you can use the command line to upload 160 | 161 | django-admin.py importcsv --model='csvimport.Country' django-csvimport/csvimport/tests/fixtures/countries.csv --settings='csvimport.settings' 162 | 163 | Tests 164 | ----- 165 | 166 | To run the django-csvimport tests use: 167 | 168 | >>> bin/python3 -m django test --settings='csvimport.settings' csvimport.tests 169 | 170 | Foreign Keys 171 | ------------ 172 | 173 | It is not viable for csvimport to determine complex table relations. 174 | However if it finds something marked as an ForeignKey with a lookup field in its model mappings, then it checks if the data exists already for the related model and pulls back an id for the field or creates a new entry if possible in the fkey model and pulls back a new id. 175 | 176 | For this to be useful then you need a related table that has a unique and more meaningful field that is being used in your data than a numeric primary key. 177 | 178 | eg. for an organisation column, org, that holds the unique name of the organisation from a separate table, you can add 179 | 180 | column2=org(Organisation|name) 181 | 182 | to the mappings, so it knows that the org field relates to a separate Organisation table with a unique name field to be used for it to lookup and replace with org_id FKey 183 | 184 | More complex relations 185 | ---------------------- 186 | 187 | For any more sophisticated relations, eg. multiple keys, many to many fields etc. 188 | The recommended approach is to create a temporary or intermediate import table that holds the data from your CSV file 189 | with the lookup data in as columns, you can use 190 | 191 | inspectcsv importfile.csv > models.py 192 | 193 | to automatically generate the import model from your CSV file. 194 | 195 | Whenever you do an import to that table you would use a bulk insert database query to take the data in it and populate complex relations of the final model tables appropriately. 196 | If imports are happening repeatedly, eg. once a day, you retain your import CSV format table, and can add a database trigger for the table to automatically run your stored data conversion synchronisation query into the target tables. 197 | 198 | DateTime data 199 | -------------- 200 | 201 | Note that the importer uses dateparser to try to convert any datetime types you have in your CSV file. 202 | See https://dateparser.readthedocs.io/en/latest/settings.html for DATEPARSER_SETTINGS env variable as a JSON map. 203 | 204 | Acknowledgements 205 | ---------------- 206 | 207 | This package was created as part of a django dash at the House of Omni, Bristol UK, organised 208 | by Dan Fairs and my local django users group, #DBBUG. It was a core component for an application 209 | for aid agency supply chain sharing, prompted by Fraser Stephens of the HELIOS foundation 210 | and developed by Ed Crewe and Tom Dunham. 211 | 212 | Other Django CSV importers 213 | -------------------------- 214 | 215 | - https://github.com/uptick/django-model-import 216 | - https://pypi.org/project/csvImporter/ 217 | - https://pypi.org/project/django-csv-import/ 218 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | # Need default init 2 | -------------------------------------------------------------------------------- /csvimport/__init__.py: -------------------------------------------------------------------------------- 1 | from pkg_resources import declare_namespace 2 | 3 | declare_namespace("csvimport") 4 | -------------------------------------------------------------------------------- /csvimport/admin.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from django import forms 3 | from django.db import models 4 | from django.contrib import admin 5 | from django.contrib.admin import ModelAdmin 6 | 7 | from csvimport.models import CSVImport 8 | 9 | 10 | class CSVImportAdmin(ModelAdmin): 11 | """Custom model to not have much editable!""" 12 | 13 | readonly_fields = ["file_name", "upload_method", "error_log_html", "import_user"] 14 | fields = [ 15 | "model_name", 16 | "field_list", 17 | "upload_file", 18 | "file_name", 19 | "encoding", 20 | "upload_method", 21 | "error_log_html", 22 | "import_user", 23 | ] 24 | formfield_overrides = { 25 | models.CharField: {"widget": forms.Textarea(attrs={"rows": "1", "cols": "40"})}, 26 | } 27 | 28 | def save_model(self, request, obj, form, change): 29 | """Do save and process command - cant commit False 30 | since then file wont be found for reopening via right charset 31 | """ 32 | form.save() 33 | from csvimport.management.commands.importcsv import Command 34 | 35 | cmd = Command() 36 | if obj.upload_file: 37 | obj.file_name = obj.upload_file.name 38 | defaults = self.filename_defaults(obj.file_name) 39 | cmd.setup( 40 | mappings=obj.field_list, 41 | modelname=obj.model_name, 42 | charset=obj.encoding, 43 | uploaded=obj.upload_file, 44 | defaults=defaults, 45 | ) 46 | errors = cmd.run(logid=obj.id) 47 | if errors: 48 | obj.error_log = "\n".join(errors) 49 | obj.import_user = str(request.user) 50 | obj.import_date = datetime.now() 51 | obj.save() 52 | 53 | def filename_defaults(self, filename): 54 | """Override this method to supply filename based data""" 55 | defaults = [] 56 | splitters = {"/": -1, ".": 0, "_": 0} 57 | for splitter, index in splitters.items(): 58 | if filename.find(splitter) > -1: 59 | filename = filename.split(splitter)[index] 60 | return defaults 61 | 62 | 63 | admin.site.register(CSVImport, CSVImportAdmin) 64 | 65 | # To see sample model in admin from test settings uncomment the following... 66 | # from csvimport.tests.models import Item 67 | # admin.site.register(Item) 68 | -------------------------------------------------------------------------------- /csvimport/app.py: -------------------------------------------------------------------------------- 1 | import os 2 | from django.apps import AppConfig 3 | from django.conf import settings 4 | 5 | """The csvimport tool uses two models for the admin interface 6 | The tests also have models ... the runner copies in their migrations 7 | If new test models are added, edit MODELS to add csvimport.tests.models and run 8 | makemigrations csvimport - then move the generated migration file to tests/migrations 9 | """ 10 | 11 | 12 | class CSVImportConf(AppConfig): 13 | name = "csvimport" 14 | verbose_name = "CSV importer" 15 | MODELS = ["csvimport.models"] 16 | MEDIA_ROOT = settings.MEDIA_ROOT 17 | path = os.path.dirname(os.path.abspath(__file__)) 18 | -------------------------------------------------------------------------------- /csvimport/make_model.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | import keyword 3 | 4 | MESSYMAP = { 5 | "Decimal": "DecimalField", 6 | "Integer": "IntegerField", 7 | "Bool": "BooleanField", 8 | "String": "CharField", 9 | } 10 | 11 | 12 | class MakeModel(object): 13 | """Generates a simple model from a definition of fields""" 14 | 15 | def to_django(self, mtype): 16 | djtype = MESSYMAP.get(str(mtype), "") 17 | if not djtype: 18 | if str(mtype).startswith("Date("): 19 | djtype = "DateTimeField" 20 | return djtype 21 | 22 | def table2model(self, table_name): 23 | """CamelCase from a table name""" 24 | breaks = ["_", " ", "-"] 25 | parts = [ 26 | table_name, 27 | ] 28 | klass = "" 29 | schema = table_name.split(".") 30 | table_name = schema[-1] 31 | for breakpart in breaks: 32 | if table_name.find(breakpart) > -1: 33 | parts = table_name.split(breakpart) 34 | if parts: 35 | break 36 | for part in parts: 37 | klass += part.title() 38 | return klass 39 | 40 | def model_from_table(self, table_name, fieldset): 41 | """Generates model code and writes it to files, 42 | also clone code and sql if config requires it 43 | Note - follows the same format as 44 | introspection.get_table_description(cursor, table_name) 45 | [(column_name, messy_type, bytes, max_length, max_digits, decimal_places) , ...] 46 | """ 47 | classname = self.table2model(table_name) 48 | filename = classname.lower() 49 | comment = " ''' Autogenerated model file %s %s '''\n\n" % ( 50 | filename, 51 | datetime.now().ctime(), 52 | ) 53 | code = "\nclass %s(models.Model):\n" % classname 54 | code += comment 55 | 56 | for i, row in enumerate(fieldset): 57 | column_name = row[0] 58 | att_name = column_name.lower() 59 | field_type = self.to_django(row[1]) 60 | comment_notes = ( 61 | [] 62 | ) # Holds Field notes, to be displayed in a Python comment. 63 | extra_params = {} # Holds Field parameters such as 'db_column'. 64 | 65 | # If the column name can't be used verbatim as a Python 66 | # attribute, set the "db_column" for this Field. 67 | if ( 68 | " " in att_name 69 | or "-" in att_name 70 | or keyword.iskeyword(att_name) 71 | or column_name != att_name 72 | ): 73 | extra_params["db_column"] = column_name 74 | 75 | # Modify the field name to make it Python-compatible. 76 | if " " in att_name: 77 | att_name = att_name.replace(" ", "_") 78 | comment_notes.append("Field renamed to remove spaces.") 79 | if "-" in att_name: 80 | att_name = att_name.replace("-", "_") 81 | comment_notes.append("Field renamed to remove dashes.") 82 | # Dunder not allowed in field names 83 | att_name = att_name.replace("__", "_") 84 | if keyword.iskeyword(att_name): 85 | att_name += "_field" 86 | comment_notes.append( 87 | "Field renamed because it was a Python reserved word." 88 | ) 89 | if column_name != att_name: 90 | comment_notes.append("Field name made lowercase.") 91 | 92 | # Add max_length for all CharFields. 93 | if field_type == "CharField" and row[3]: 94 | extra_params["max_length"] = row[3] 95 | 96 | if field_type == "DecimalField": 97 | # Add fix for Non specified precision, scale numbers get set to NUMBER(0, 0) 98 | # and fail when in fact they take any number up to the limits ie. (38, 30) 99 | # but we will go for a less space hungry (16, 4) 100 | if int(row[4]) > 0: 101 | extra_params["max_digits"] = row[4] 102 | else: 103 | extra_params["decimal_places"] = 4 104 | extra_params["max_digits"] = 16 105 | # Add fix for Oracle number introspection 106 | if int(row[5]) > -1: 107 | extra_params["decimal_places"] = row[5] 108 | else: 109 | if extra_params["max_digits"]: 110 | extra_params["decimal_places"] = 0 111 | else: 112 | extra_params["decimal_places"] = 0 113 | extra_params["max_digits"] = 16 114 | field_type += "(" 115 | 116 | # Don't output 'id = meta.AutoField(primary_key=True)', because 117 | # that's assumed if it doesn't exist. 118 | if ( 119 | att_name == "id" 120 | and field_type == "AutoField(" 121 | and extra_params == {"primary_key": True} 122 | ): 123 | continue 124 | 125 | # FIXME:Just always assume first column is primary key 126 | if i == 0: 127 | extra_params["primary_key"] = True 128 | 129 | # Add 'null' and 'blank', if the 'null_ok' flag was present in the 130 | # table description. 131 | if row[6]: # If it's NULL... 132 | extra_params["blank"] = True 133 | if not field_type in ("TextField(", "CharField("): 134 | extra_params["null"] = True 135 | if "primary_key" in extra_params or "unique" in extra_params: 136 | for key in ("null", "blank"): 137 | extra_params[key] = False 138 | if row[7]: 139 | if field_type in ("TextField(", "CharField("): 140 | extra_params["default"] = "" 141 | elif field_type in ("DecimalField(", "IntegerField("): 142 | extra_params["default"] = 0 143 | elif field_type == "BooleanField(": 144 | extra_params["default"] = False 145 | field_desc = "%s = models.%s" % (att_name, field_type) 146 | if extra_params: 147 | if not field_desc.endswith("("): 148 | field_desc += ", " 149 | field_desc += ", ".join( 150 | ["%s=%r" % (k, v) for k, v in extra_params.items()] 151 | ) 152 | field_desc += ")" 153 | if comment_notes: 154 | field_desc += " # " + " ".join(comment_notes) 155 | field_desc = " %s\n" % field_desc 156 | code += field_desc 157 | meta = "\n class Meta:\n" 158 | code += meta 159 | code += " managed = False\n" 160 | quoted = [] 161 | for part in table_name.split("."): 162 | quoted.append('"%s"' % part) 163 | table_name = ".".join(quoted) 164 | code += """ db_table = u'%s'""" % table_name 165 | return code 166 | -------------------------------------------------------------------------------- /csvimport/management/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | -------------------------------------------------------------------------------- /csvimport/management/commands/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/edcrewe/django-csvimport/5edab101eb6e172221c8ae730e7afacc07254a42/csvimport/management/commands/__init__.py -------------------------------------------------------------------------------- /csvimport/management/commands/importcsv.py: -------------------------------------------------------------------------------- 1 | """ Developed for www.heliosfoundation.org by Ed Crewe and Tom Dunham 2 | Django command to import CSV files 3 | """ 4 | import os 5 | import re 6 | import json 7 | from datetime import datetime 8 | import dateparser 9 | import django 10 | from distutils.version import StrictVersion 11 | 12 | from django.db import DatabaseError 13 | from django.db import transaction 14 | from django.core.exceptions import ObjectDoesNotExist 15 | from django.core.management.base import LabelCommand, BaseCommand, CommandError 16 | from optparse import make_option 17 | from django.db import models 18 | from django.contrib.contenttypes.models import ContentType 19 | from django.utils import timezone 20 | 21 | CURRENT_TIMEZONE = timezone.get_current_timezone() 22 | try: 23 | from django.db.models.loading import get_model 24 | except ImportError: 25 | from django.apps import apps 26 | 27 | get_model = apps.get_model 28 | 29 | from django.conf import settings 30 | from csvimport.parser import CSVParser 31 | from csvimport.signals import imported_csv, importing_csv 32 | 33 | CSVIMPORT_LOG = getattr(settings, "CSVIMPORT_LOG", "screen") 34 | if CSVIMPORT_LOG == "logger": 35 | import logging 36 | 37 | logger = logging.getLogger(__name__) 38 | 39 | INTEGER = [ 40 | "BigIntegerField", 41 | "IntegerField", 42 | "SmallIntegerField", 43 | "AutoField", 44 | "PositiveIntegerField", 45 | "PositiveSmallIntegerField", 46 | ] 47 | FLOAT = ["DecimalField", "FloatField"] 48 | NUMERIC = INTEGER + FLOAT 49 | SMALLINT_DBS = ["sqlite", "sqlite3", "sqlite4"] 50 | DATE = ["DateField", "TimeField", "DateTimeField"] 51 | BOOLEAN = ["BooleanField", "NullBooleanField"] 52 | BOOLEAN_TRUE = [1, "1", "Y", "Yes", "yes", "True", "true", "T", "t"] 53 | 54 | # Check if there is a Json Map for dateparser settings, turn it into Python map 55 | DATEPARSER_SETTINGS = os.environ.get("DATEPARSER_SETTINGS") 56 | if DATEPARSER_SETTINGS: 57 | try: 58 | DATEPARSER_SETTINGS = json.loads(DATEPARSER_SETTINGS) 59 | except: 60 | DATEPARSER_SETTINGS = {} 61 | else: 62 | DATEPARSER_SETTINGS = {} 63 | 64 | cleancol = re.compile("[^0-9a-zA-Z]+") # cleancol.sub('_', s) 65 | 66 | from django import dispatch 67 | 68 | imported_csv = dispatch.Signal() 69 | importing_csv = dispatch.Signal() 70 | 71 | 72 | # Note if mappings are manually specified they are of the following form ... 73 | # MAPPINGS = "column1=shared_code,column2=org(Organisation|name),column3=description" 74 | # statements = re.compile(r";[ \t]*$", re.M) 75 | 76 | 77 | def save_csvimport(props=None, instance=None): 78 | """To avoid circular imports do saves here""" 79 | try: 80 | if not instance: 81 | from csvimport.models import CSVImport 82 | 83 | csvimp = CSVImport() 84 | if props: 85 | for key, value in props.items(): 86 | setattr(csvimp, key, value) 87 | csvimp.save() 88 | return csvimp.id 89 | except: 90 | # Running as command line 91 | if instance.charset: 92 | print("Assumed charset = %s\n" % instance.charset) 93 | print("\n------------ %s ------------" % instance.model.__name__) 94 | string_types = (type(""), type("")) 95 | for line in instance.loglist: 96 | if type(line) not in string_types: 97 | for subline in line: 98 | print(subline) 99 | else: 100 | print(line) 101 | return 102 | 103 | 104 | class Command(LabelCommand, CSVParser): 105 | """ 106 | Parse and import a CSV resource to a Django model. 107 | 108 | Notice that the doc tests are merely illustrational, and will not run 109 | as is. 110 | """ 111 | 112 | options = { 113 | "mappings": { 114 | "default": False, 115 | "help": """Provide comma separated column names or format like 116 | (column1=field1(ForeignKey|field),column2=field2(ForeignKey|field), ...) 117 | for the import (use none for no names -> col_#)""", 118 | }, 119 | "defaults": { 120 | "default": False, 121 | "help": """Provide comma separated defaults for the import 122 | (field1=value,field3=value, ...)""", 123 | }, 124 | "model": { 125 | "default": "csvimport.Country", 126 | "help": "Please provide the model to import to", 127 | }, 128 | "charset": { 129 | "default": False, 130 | "help": "Force the charset conversion used rather than detect it", 131 | }, 132 | "delimiter": { 133 | "default": ",", 134 | "help": "Specify the CSV delimiter - default is comma, use \t for tab", 135 | }, 136 | "clean": { 137 | "default": True, 138 | "help": "If its invalid, change numeric and date data to valid min/max values", 139 | }, 140 | "bulk": { 141 | "default": False, 142 | "help": "If True, all csv rows are created at once by a bulk create, so can fail if any have data issues, but its faster", 143 | }, 144 | } 145 | 146 | # Use 1.10 or later arguments method 147 | def add_arguments(self, parser): 148 | parser.add_argument( 149 | "csvfile", 150 | help="The file system path to the CSV file with the data to import", 151 | ) 152 | for arg in self.options: 153 | parser.add_argument("--%s" % arg, **self.options[arg]) 154 | 155 | # Support for Django 1.9 or earlier 156 | if StrictVersion(django.get_version()) < StrictVersion("1.10.0"): 157 | make_options = [] 158 | for arg in options: 159 | make_options.append(make_option("--%s" % arg, **options[arg])) 160 | option_list = BaseCommand.option_list + tuple(make_options) 161 | 162 | help = "Imports a CSV file to a model" 163 | 164 | def __init__(self): 165 | """Set default attributes data types""" 166 | super(Command, self).__init__() 167 | self.props = {} 168 | self.debug = False 169 | self.errors = [] 170 | self.loglist = [] 171 | self.mappings = [] 172 | self.defaults = [] 173 | self.app_label = "" 174 | self.model = "" 175 | self.fieldmap = {} 176 | self.file_name = "" 177 | self.nameindexes = False 178 | self.deduplicate = True 179 | self.csvfile = [] 180 | self.charset = "" 181 | self.filehandle = None 182 | self.makemodel = "" 183 | self.start = 1 184 | self.db_backend = "" 185 | 186 | def handle(self, *args, **options): 187 | if args: 188 | label = args[0] 189 | else: 190 | label = options.get("csvfile") 191 | self.handle_label(label, **options) 192 | 193 | def handle_label(self, label, **options): 194 | """Handle the circular reference by passing the nested 195 | save_csvimport function 196 | """ 197 | self.loglist = [] 198 | filename = label 199 | mappings = options.get("mappings", []) 200 | defaults = options.get("defaults", []) 201 | modelname = options.get("model", "Item") 202 | charset = options.get("charset", "") 203 | delimiter = options.get("delimiter", ",") 204 | clean = options.get("clean", True) 205 | bulk = options.get("bulk", False) 206 | # show_traceback = options.get('traceback', True) 207 | warn = self.setup( 208 | mappings=mappings, 209 | modelname=modelname, 210 | charset=charset, 211 | csvfile=filename, 212 | defaults=defaults, 213 | delimiter=delimiter, 214 | clean=clean, 215 | bulk=bulk, 216 | ) 217 | if not warn and not hasattr(self.model, "_meta"): 218 | warn = ( 219 | "Sorry your model could not be found please check app_label.modelname = %s" 220 | % modelname 221 | ) 222 | if warn: 223 | try: 224 | print(warn) 225 | except: 226 | self.loglist.append(warn) 227 | raise CommandError(warn) 228 | return 229 | self.loglist.extend(self.run()) 230 | if self.props: 231 | save_csvimport(self.props, self) 232 | return 233 | 234 | def setup( 235 | self, 236 | mappings, 237 | modelname, 238 | charset, 239 | csvfile="", 240 | defaults="", 241 | uploaded=None, 242 | nameindexes=False, 243 | deduplicate=True, 244 | delimiter=",", 245 | reader=True, 246 | clean=True, 247 | bulk=False, 248 | ): 249 | """Setup up the attributes for running the import""" 250 | self.clean = clean 251 | self.bulk = bulk 252 | self.defaults = self.set_mappings(defaults) 253 | if modelname.find(".") > -1: 254 | app_label, model = modelname.rsplit(".", 1) 255 | if uploaded: 256 | self.csvfile = self.open_csvfile( 257 | uploaded.path, delimiter=delimiter, reader=reader 258 | ) 259 | else: 260 | failed = self.check_filesystem(csvfile, delimiter=delimiter, reader=reader) 261 | if failed: 262 | return failed 263 | self.charset = charset 264 | self.app_label = app_label 265 | self.model = get_model(app_label, model) 266 | if not self.model: 267 | return "No model found for %s.%s" % (app_label, model) 268 | try: 269 | db_name = self.model()._state.db or "default" 270 | self.db_backend = settings.DATABASES[db_name]["ENGINE"].split(".")[-1] 271 | except: 272 | pass 273 | for field in self.model._meta.fields: 274 | self.fieldmap[field.name] = field 275 | if field.__class__ == models.ForeignKey: 276 | self.fieldmap[field.name + "_id"] = field 277 | if mappings: 278 | if mappings == "none": 279 | # Use auto numbered cols instead - eg. from create_new_model 280 | mappings = self.parse_header( 281 | ["col_%s" % num for num in range(1, len(self.csvfile[0]))] 282 | ) 283 | # Test for column=name or just name list format 284 | if mappings.find("=") == -1: 285 | mappings = self.parse_header(mappings.split(",")) 286 | self.mappings = self.set_mappings(mappings) 287 | self.nameindexes = bool(nameindexes) 288 | self.file_name = csvfile 289 | self.deduplicate = deduplicate 290 | return 291 | 292 | def make_row(self, row, csvimportid, index, loglist, clean=True): 293 | """Create an instance of the model and populate it with the rows data""" 294 | model_instance = self.model() 295 | model_instance.csvimport_id = csvimportid 296 | 297 | for column, field, foreignkey in self.mappings: 298 | if self.nameindexes: 299 | column = indexes.index(column) 300 | else: 301 | column = int(column) - 1 302 | 303 | if foreignkey: 304 | if len(row) <= column: 305 | msg = ( 306 | "row %s: FKey %s couldnt be set for row - because the row is not parsable - skipping it" 307 | % (index, field) 308 | ) 309 | loglist.append(msg) 310 | return None 311 | else: 312 | row[column] = self.insert_fkey(foreignkey, row[column]) 313 | 314 | if self.debug: 315 | loglist.append( 316 | '%s.%s = "%s"' % (self.model.__name__, field, row[column]) 317 | ) 318 | try: 319 | if clean: 320 | row[column] = self.type_clean(field, row[column], loglist, index) 321 | except: 322 | pass 323 | try: 324 | model_instance.__setattr__(field, row[column]) 325 | except: 326 | try: 327 | field = getattr(model_instance, field) 328 | if field: 329 | value = field.to_python(row[column]) 330 | except: 331 | if not msg: 332 | msg = "row %s: Column %s = %s couldnt be set for row" % ( 333 | index, 334 | field, 335 | row[column], 336 | ) 337 | loglist.append(msg) 338 | 339 | return model_instance 340 | 341 | def run(self, logid=0): 342 | """Run the csvimport""" 343 | loglist = [] 344 | if self.nameindexes: 345 | indexes = self.csvfile.pop(0) 346 | counter = 0 347 | if logid: 348 | csvimportid = logid 349 | else: 350 | csvimportid = 0 351 | 352 | if self.mappings: 353 | self.start = 0 354 | loglist.append("Manually entered mapping list") 355 | else: 356 | mappingstr = self.parse_header(self.csvfile[0]) 357 | if mappingstr: 358 | loglist.append("Mapping from first, header, row of CSV file") 359 | self.mappings = self.set_mappings(mappingstr) 360 | if not self.mappings: 361 | if not self.model: 362 | loglist.append("Outputting setup message") 363 | else: 364 | warn = ( 365 | "No fields in the CSV file match " 366 | + self.model._meta.app_label 367 | + "." 368 | + self.model.__name__ 369 | ) 370 | warn += " - you must add a header field name row to the CSV file or supply a mapping list" 371 | loglist.append(warn) 372 | return loglist 373 | 374 | # count before import 375 | rowcount = self.model.objects.count() 376 | models = [] 377 | for i, row in enumerate(self.csvfile[self.start :]): 378 | if CSVIMPORT_LOG == "logger": 379 | logger.info("Import %s %i", self.model.__name__, counter) 380 | counter += 1 381 | model_instance = self.make_row(row, csvimportid, i, loglist, self.clean) 382 | if self.bulk: 383 | models.append(model_instance) 384 | else: 385 | with transaction.atomic(): 386 | try: 387 | self.row_insert(row, model_instance, loglist) 388 | except Exception as err: 389 | loglist.append(str(err)) 390 | # loglist = [] 391 | if models and self.bulk: 392 | models[0].__class__.objects.bulk_create(models) 393 | # count after import 394 | rowcount = self.model.objects.count() - rowcount 395 | countmsg = "Imported %s rows to %s" % (rowcount, self.model.__name__) 396 | if CSVIMPORT_LOG == "logger": 397 | logger.info(countmsg) 398 | if loglist: 399 | loglist.append(countmsg) 400 | self.props = { 401 | "file_name": self.file_name, 402 | "import_user": "cron", 403 | "upload_method": "cronjob", 404 | "error_log": "\n".join(loglist), 405 | "import_date": datetime.now(), 406 | } 407 | return loglist 408 | else: 409 | return ["No logging"] 410 | 411 | def row_insert(self, row, model_instance, loglist): 412 | """Insert a row - separate function for transaction wrapping""" 413 | msg = "" 414 | if model_instance: 415 | if self.defaults: 416 | for field, value, foreignkey in self.defaults: 417 | value = self.type_clean(field, value, loglist) 418 | try: 419 | done = model_instance.getattr(field) 420 | except: 421 | done = False 422 | if not done: 423 | if foreignkey: 424 | value = self.insert_fkey(foreignkey, value) 425 | if value: 426 | model_instance.__setattr__(field, value) 427 | 428 | if self.deduplicate: 429 | matchdict = {} 430 | for column, field, foreignkey in self.mappings: 431 | matchdict[field + "__exact"] = getattr(model_instance, field, None) 432 | try: 433 | self.model.objects.get(**matchdict) 434 | return 435 | except: 436 | pass 437 | try: 438 | importing_csv.send( 439 | sender=model_instance, row=dict(zip(self.csvfile[:1][0], row)) 440 | ) 441 | model_instance.save() 442 | imported_csv.send( 443 | sender=model_instance, row=dict(zip(self.csvfile[:1][0], row)) 444 | ) 445 | except DatabaseError as err: 446 | try: 447 | error_number, error_message = err 448 | except: 449 | error_message = err 450 | error_number = 0 451 | # Catch duplicate key error. 452 | if error_number != 1062: 453 | loglist.append( 454 | "Database Error: %s, Number: %d" % (error_message, error_number) 455 | ) 456 | except ValueError as err: 457 | # Usually only occurs if clean=False 458 | loglist.append(str(err)) 459 | # except OverflowError: 460 | # pass 461 | if CSVIMPORT_LOG == "logger": 462 | for line in loglist: 463 | logger.info(line) 464 | 465 | def type_clean(self, field, value, loglist, row=0): 466 | """Data value clean up - type formatting""" 467 | if not self.fieldmap.get(field): 468 | raise Exception( 469 | "Fieldmap is not populated for %s -\n%s" % (field, self.fieldmap) 470 | ) 471 | field_type = self.fieldmap.get(field).get_internal_type() 472 | 473 | try: 474 | value = value.strip() 475 | except AttributeError: 476 | pass 477 | 478 | # Tidy up boolean data 479 | if field_type in BOOLEAN: 480 | value = value in BOOLEAN_TRUE 481 | # sqlite fix since it just uses int under the hood 482 | if self.db_backend in SMALLINT_DBS: 483 | if value: 484 | value = 1 485 | else: 486 | value = 0 487 | 488 | # Tidy up numeric data 489 | if field_type in NUMERIC: 490 | if not value: 491 | value = 0 492 | else: 493 | try: 494 | value = float(value) 495 | except: 496 | loglist.append( 497 | "row %s: Column %s = %s is not a number so is set to 0" 498 | % (row, field, value) 499 | ) 500 | value = 0 501 | if field_type in INTEGER: 502 | # 1e+28 = 9999999999999999583119736832L 503 | if value > 9223372036854775807: 504 | intmsg = ( 505 | "row %s: Column %s = %s more than the max integer 9223372036854775807" 506 | % (row, field, value) 507 | ) 508 | if self.db_backend in SMALLINT_DBS: 509 | intmsg += " sqlite may error with big integers so rounded down" 510 | value = 9223372036854775807 511 | loglist.append(intmsg) 512 | if str(value).lower() in ("nan", "inf", "+inf", "-inf"): 513 | loglist.append( 514 | "row %s: Column %s = %s is not an integer so is set to 0" 515 | % (row, field, value) 516 | ) 517 | value = 0 518 | value = int(value) 519 | if value < 0 and field_type.startswith("Positive"): 520 | loglist.append( 521 | "row %s: Column %s = %s, less than zero so set to 0" 522 | % (row, field, value) 523 | ) 524 | value = 0 525 | # date data - remove the date if it doesn't convert so null=True can work 526 | if field_type in DATE: 527 | datevalue = dateparser.parse(value, settings=DATEPARSER_SETTINGS) 528 | if datevalue: 529 | value = timezone.make_aware(datevalue, CURRENT_TIMEZONE) 530 | else: 531 | # loglist.append('row %s: Column %s = %s not date format' % (i, field, value)) 532 | value = None 533 | return value 534 | 535 | def parse_header(self, headlist): 536 | """Parse the list of headings and match with self.fieldmap""" 537 | mapping = [] 538 | found = [] 539 | headlist = [cleancol.sub("_", col) for col in headlist] 540 | logstr = "" 541 | for i, heading in enumerate(headlist): 542 | for key in ( 543 | (heading, heading.lower()) if heading != heading.lower() else (heading,) 544 | ): 545 | if key in self.fieldmap: 546 | found.append(key.lower()) 547 | field = self.fieldmap[key] 548 | key = self.check_fkey(key, field) 549 | mapping.append("column%s=%s" % (i + 1, key)) 550 | for key in headlist: 551 | if key.lower() not in found: 552 | logstr += ", %s" % key 553 | if mapping: 554 | mappingstr = ",".join(mapping) 555 | if logstr: 556 | logmsg = "CSV header unmatched ignored cols = %s" % logstr[2:] 557 | else: 558 | logmsg = "CSV header matched all cols" 559 | self.loglist.append(logmsg) 560 | return mappingstr 561 | return "" 562 | 563 | def insert_fkey(self, foreignkey, rowcol): 564 | """Add fkey if not present 565 | If there is corresponding data in the model already, 566 | we do not need to add more, since we are dealing with 567 | foreign keys, therefore foreign data 568 | """ 569 | fk_key, fk_field = foreignkey 570 | if fk_key and fk_field and rowcol != "": 571 | # Allow users to specify app label for fk model if they want 572 | if fk_key.find(".") > -1: 573 | new_app_label, fk_key = fk_key.split(".") 574 | else: 575 | try: 576 | new_app_label = ContentType.objects.get(model=fk_key).app_label 577 | except: 578 | new_app_label = self.app_label 579 | fk_model = get_model(new_app_label, fk_key) 580 | matches = fk_model.objects.filter(**{fk_field + "__exact": rowcol}) 581 | 582 | if not matches: 583 | key = fk_model() 584 | key.__setattr__(fk_field, rowcol) 585 | key.save() 586 | 587 | rowcol = fk_model.objects.filter(**{fk_field + "__exact": rowcol})[0] 588 | return rowcol 589 | 590 | def check_fkey(self, key, field): 591 | """Build fkey mapping via introspection of models""" 592 | # TODO fix to find related field name rather than assume second field 593 | if not key.endswith("_id"): 594 | if field.__class__ == models.ForeignKey: 595 | try: 596 | parent = field.remote_field.model 597 | except AttributeError: 598 | try: 599 | parent = field.related.parent_model 600 | except AttributeError: 601 | parent = field.related.model 602 | key += "(%s|%s)" % (parent.__name__, parent._meta.fields[1].name) 603 | return key 604 | 605 | def error(self, message, type=1): 606 | """ 607 | Types: 608 | 0. A fatal error. The most drastic one. Will quit the program. 609 | 1. A notice. Some minor thing is in disorder. 610 | """ 611 | 612 | types = (("Fatal error", FatalError), ("Notice", None)) 613 | 614 | self.errors.append((message, type)) 615 | 616 | if type == 0: 617 | # There is nothing to do. We have to quit at this point 618 | raise Exception(types[0][1], message) 619 | elif self.debug: 620 | print("%s: %s" % (types[type][0], message)) 621 | 622 | 623 | class FatalError(Exception): 624 | """ 625 | Something really bad happened. 626 | """ 627 | 628 | def __init__(self, value): 629 | self.value = value 630 | 631 | def __str__(self): 632 | return repr(self.value) 633 | -------------------------------------------------------------------------------- /csvimport/management/commands/inspectcsv.py: -------------------------------------------------------------------------------- 1 | """ Developed for www.heliosfoundation.org by Ed Crewe and Tom Dunham 2 | Django command to import CSV files 3 | """ 4 | import re 5 | import os 6 | import django 7 | from distutils.version import StrictVersion 8 | 9 | from optparse import make_option 10 | from django.core.management.base import LabelCommand, BaseCommand 11 | 12 | from csvimport.messytables.types import type_guess 13 | from csvimport.make_model import MakeModel 14 | 15 | 16 | cleancol = re.compile("[^0-9a-zA-Z]+") # cleancol.sub('_', s) 17 | 18 | from csvimport.parser import CSVParser 19 | 20 | 21 | class Command(LabelCommand, CSVParser): 22 | """ 23 | Inspect a CSV resource to generate the code for a Django model. 24 | """ 25 | 26 | make_options = ( 27 | make_option( 28 | "--defaults", 29 | default="", 30 | help="""Provide comma separated defaults for the import 31 | (field1=value,field3=value, ...)""", 32 | ), 33 | make_option( 34 | "--model", default="", help="Please provide the model to import to" 35 | ), 36 | make_option( 37 | "--charset", 38 | default="", 39 | help="Force the charset conversion used rather than detect it", 40 | ), 41 | ) 42 | 43 | # Adding support for Django 1.10+ 44 | if StrictVersion(django.get_version()) >= StrictVersion("1.10.0"): 45 | option_list = getattr(BaseCommand, "option_list", ()) + make_options 46 | else: 47 | option_list = BaseCommand.option_list + make_options 48 | 49 | help = "Analyses CSV file date to generate a Django model" 50 | 51 | def __init__(self): 52 | """Set default attributes data types""" 53 | super(Command, self).__init__() 54 | self.csvfile = [] 55 | self.charset = "" 56 | self.filehandle = None 57 | self.makemodel = "" 58 | self.errors = [] 59 | 60 | def handle_label(self, label, **options): 61 | """Handle the circular reference by passing the nested 62 | save_csvimport function 63 | """ 64 | csvfile = label 65 | defaults = options.get("defaults", []) 66 | model = options.get("model", "") 67 | if not model: 68 | model = os.path.basename(csvfile) 69 | model = model.rsplit(".", 1)[0] 70 | model = model.replace(" ", "_") 71 | charset = options.get("charset", "") 72 | self.defaults = self.set_mappings(defaults) 73 | self.check_filesystem(csvfile) 74 | if model.find(".") > -1: 75 | app_label, model = model.split(".", 1) 76 | else: 77 | app_label = "csvimport" 78 | 79 | model_definition = self.create_new_model(model, app_label) 80 | if self.errors: 81 | print(self.errors) 82 | return 83 | 84 | self.makemodel = ( 85 | '""" A django model generated with django-csvimport csvinspect\n' 86 | ) 87 | self.makemodel += ' which used code from OKN messytables to guess data types - may need some manual tweaks!\n"""' 88 | self.makemodel += "\nfrom django.db import models\n\n" 89 | self.makemodel += model_definition 90 | print(self.makemodel) 91 | return 92 | 93 | def create_new_model(self, modelname, app_label): 94 | """Use messytables to guess field types and build a new model""" 95 | 96 | nocols = False 97 | cols = self.csvfile[0] 98 | for col in cols: 99 | if not col: 100 | nocols = True 101 | if nocols: 102 | cols = ["col_%s" % num for num in range(1, len(cols))] 103 | print("No column names for %s columns" % len(cols)) 104 | else: 105 | # strip quotes at ends and replace internal spaces with underscores 106 | cols = [col.strip("\r") for col in cols] 107 | cols = [col.strip('"') for col in cols] 108 | cols = [col.strip("'") for col in cols] 109 | cols = [cleancol.sub("_", col).lower() for col in cols] 110 | # Get untyped sample table content 111 | # table_set = any_tableset(self.filehandle) 112 | # row_set = table_set.tables[0] 113 | # sample = row_set.sample 114 | if nocols: 115 | types = type_guess(self.csvfile) 116 | else: 117 | types = type_guess(self.csvfile[1:]) 118 | try: 119 | types = [str(typeobj) for typeobj in types] 120 | # If the header has more cols than the data has cols - ignore the end ones 121 | if len(cols) > len(types): 122 | cols = cols[: len(types)] 123 | except Exception as err: 124 | self.errors.append("messytables code could not run due to error") 125 | self.errors.append(str(err)) 126 | self.modelname = "" 127 | return 128 | 129 | fieldset = [] 130 | maximums = self.get_maxlengths(cols) 131 | for i, col in enumerate(cols): 132 | length = maximums[i] 133 | if types[i] == "String" and length > 255: 134 | types[i] = "Text" 135 | integer = length 136 | decimal = int(length / 2) 137 | if decimal > 10: 138 | decimal = 10 139 | blank = True 140 | default = True 141 | column = (col, types[i], length, length, integer, decimal, blank, default) 142 | fieldset.append(column) 143 | 144 | maker = MakeModel() 145 | return maker.model_from_table("%s_%s" % (app_label, modelname), fieldset) 146 | 147 | def get_maxlengths(self, cols): 148 | """Get maximum column length values to avoid truncation 149 | -- can always manually reduce size of fields after auto model creation 150 | """ 151 | len_cols = len(cols) 152 | maximums = [0] * len_cols 153 | for line in self.csvfile[1:100]: 154 | for i in range(0, len_cols): 155 | if i < len(line): 156 | value = line[i] 157 | if value and len(value) > maximums[i]: 158 | maximums[i] = len(value) 159 | if maximums[i] > 10: 160 | maximums[i] += 10 161 | if not maximums[i]: 162 | maximums[i] = 10 163 | return maximums 164 | -------------------------------------------------------------------------------- /csvimport/messytables/README.md: -------------------------------------------------------------------------------- 1 | # Original Code from Open Knowledge Labs MessyTables 2 | 3 | A library for dealing with messy tabular data in several formats, guessing types and detecting headers. 4 | 5 | See the documentation at: https://messytables.readthedocs.io 6 | 7 | Find the package at: https://pypi.python.org/pypi/messytables 8 | 9 | **Contact**: Open Knowledge Labs - http://okfnlabs.org/contact/. We especially recommend the forum: http://discuss.okfn.org/category/open-knowledge-labs/ 10 | 11 | ## Why copy messytable core code into csvimport? 12 | 13 | Open Knowledge Foundation have stopped maintaining messytables having replaced it with qsv (in Rust) 14 | see: 15 | 16 | https://github.com/dathere/datapusher-plus 17 | 18 | Hence it no longer works with latest Python 3.12 19 | 20 | To retain this simple type guesstimation for model generation in csvimport with latest Python and Django 5, the two core files required from messytables are moved here, with a tweak to make it just use the simple self.csvfile type for rows = [[value, value], [value, value]] that is all that type_quess needs. 21 | 22 | Note this means it no longer opens the csvfile again with messytables, and it no longer only uses a sample set of rows, but instead uses all rows. So if the CSV file is very large, inspectcsv may take a long time. 23 | In which case it may be better to just chop out the first few thousand lines as a manual sample file used for generating the Django model. 24 | 25 | This removes messytables as a separate dependency. 26 | 27 | Thanks to the Open Knowledge Foundation, and all rights reserved by them, for this messytables subfolder. 28 | 29 | - Ed Crewe, Dec 2023 30 | -------------------------------------------------------------------------------- /csvimport/messytables/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | -------------------------------------------------------------------------------- /csvimport/messytables/dateparser.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | date_regex = re.compile(r"""^\d{1,4}[-\/\.\s]\S+[-\/\.\s]\S+""") 4 | 5 | 6 | def is_date(value): 7 | return len(value) != 1 and date_regex.match(value) 8 | 9 | 10 | def create_date_formats(day_first=True): 11 | """generate combinations of time and date 12 | formats with different delimeters 13 | - use this for fast check of dates, because dateparser is too slow for checking all string fields 14 | """ 15 | 16 | if day_first: 17 | date_formats = ["dd/mm/yyyy", "dd/mm/yy", "yyyy/mm/dd"] 18 | python_date_formats = ["%d/%m/%Y", "%d/%m/%y", "%Y/%m/%d"] 19 | else: 20 | date_formats = ["mm/dd/yyyy", "mm/dd/yy", "yyyy/mm/dd"] 21 | python_date_formats = ["%m/%d/%Y", "%m/%d/%y", "%Y/%m/%d"] 22 | 23 | date_formats += [ 24 | # Things with words in 25 | "dd/bb/yyyy", 26 | "dd/bbb/yyyy", 27 | ] 28 | python_date_formats += [ 29 | # Things with words in 30 | "%d/%b/%Y", 31 | "%d/%B/%Y", 32 | ] 33 | 34 | both_date_formats = list(zip(date_formats, python_date_formats)) 35 | 36 | time_formats = "hh:mm:ssz hh:mm:ss hh:mm:sstzd".split() 37 | python_time_formats = "%H:%M%Z %H:%M:%S %H:%M:%S%Z %H:%M%z %H:%M:%S%z".split() 38 | both_time_formats = list(zip(time_formats, python_time_formats)) 39 | 40 | date_separators = ["-", ".", "/", " "] 41 | 42 | all_date_formats = [] 43 | 44 | for separator in date_separators: 45 | for date_format, python_date_format in both_date_formats: 46 | all_date_formats.append( 47 | ( 48 | date_format.replace("/", separator), 49 | python_date_format.replace("/", separator), 50 | ) 51 | ) 52 | 53 | all_formats = {} 54 | 55 | for date_format, python_date_format in all_date_formats: 56 | all_formats[date_format] = python_date_format 57 | for time_format, python_time_format in both_time_formats: 58 | all_formats[date_format + time_format] = ( 59 | python_date_format + python_time_format 60 | ) 61 | 62 | all_formats[date_format + "T" + time_format] = ( 63 | python_date_format + "T" + python_time_format 64 | ) 65 | 66 | all_formats[date_format + " " + time_format] = ( 67 | python_date_format + " " + python_time_format 68 | ) 69 | return list(all_formats.values()) 70 | 71 | 72 | DATE_FORMATS = create_date_formats() 73 | -------------------------------------------------------------------------------- /csvimport/messytables/types.py: -------------------------------------------------------------------------------- 1 | import decimal 2 | import datetime 3 | from collections import defaultdict 4 | import locale 5 | import sys 6 | 7 | from csvimport.messytables.dateparser import DATE_FORMATS, is_date 8 | 9 | 10 | class CellType(object): 11 | """A cell type maintains information about the format 12 | of the cell, providing methods to check if a type is 13 | applicable to a given value and to convert a value to the 14 | type.""" 15 | 16 | guessing_weight = 1 17 | # the type that the result will have 18 | result_type = None 19 | 20 | def test(self, value): 21 | """Test if the value is of the given type. The 22 | default implementation calls ``cast`` and checks if 23 | that throws an exception. True or False""" 24 | if isinstance(value, self.result_type): 25 | return True 26 | try: 27 | self.cast(value) 28 | return True 29 | except Exception: 30 | return False 31 | 32 | @classmethod 33 | def instances(cls): 34 | return [cls()] 35 | 36 | def cast(self, value): 37 | """Convert the value to the type. This may throw 38 | a quasi-random exception if conversion fails.""" 39 | return value 40 | 41 | def __eq__(self, other): 42 | return self.__class__ == other.__class__ 43 | 44 | def __hash__(self): 45 | return hash(self.__class__) 46 | 47 | def __repr__(self): 48 | return self.__class__.__name__.rsplit("Type", 1)[0] 49 | 50 | 51 | class StringType(CellType): 52 | """A string or other unconverted type.""" 53 | 54 | result_type = str 55 | 56 | def cast(self, value): 57 | if value is None: 58 | return None 59 | if isinstance(value, self.result_type): 60 | return value 61 | try: 62 | return str(value) 63 | except UnicodeEncodeError: 64 | return str(value) 65 | 66 | 67 | class IntegerType(CellType): 68 | """An integer field.""" 69 | 70 | guessing_weight = 6 71 | result_type = int 72 | 73 | def cast(self, value): 74 | if value in ("", None): 75 | return None 76 | 77 | try: 78 | value = float(value) 79 | except Exception: 80 | # get rid of thousands separators 81 | # e.g. "1,000" 82 | return locale.atoi(value) 83 | 84 | if value.is_integer(): 85 | return int(value) 86 | else: 87 | raise ValueError("Invalid integer: %s" % value) 88 | 89 | 90 | class DecimalType(CellType): 91 | """Decimal number, ``decimal.Decimal`` or float numbers.""" 92 | 93 | guessing_weight = 4 94 | result_type = decimal.Decimal 95 | 96 | def cast(self, value): 97 | if value in ("", None): 98 | return None 99 | try: 100 | return decimal.Decimal(value) 101 | except Exception: 102 | # get rid of thousands separators 103 | # e.g. "1,000.00" 104 | value = locale.atof(value) 105 | if sys.version_info < (2, 7): 106 | value = str(value) 107 | return decimal.Decimal(value) 108 | 109 | 110 | class PercentageType(DecimalType): 111 | """Decimal number, ``decimal.Decimal`` or float numbers.""" 112 | 113 | guessing_weight = 0 114 | 115 | def cast(self, value): 116 | result = DecimalType.cast(self, value) 117 | if result: 118 | result = result / decimal.Decimal("100") 119 | return result 120 | 121 | 122 | class CurrencyType(DecimalType): 123 | guessing_weight = 0 124 | result_type = decimal.Decimal 125 | 126 | def cast(self, value): 127 | value_without_currency = value.split(" ")[0] 128 | return DecimalType.cast(self, value_without_currency) 129 | 130 | 131 | class FloatType(DecimalType): 132 | """FloatType is deprecated""" 133 | 134 | pass 135 | 136 | 137 | class BoolType(CellType): 138 | """A boolean field. Matches true/false, yes/no and 0/1 by default, 139 | but a custom set of values can be optionally provided. 140 | """ 141 | 142 | guessing_weight = 7 143 | result_type = bool 144 | true_values = ("yes", "true", "0") 145 | false_values = ("no", "false", "1") 146 | 147 | def __init__(self, true_values=None, false_values=None): 148 | if true_values is not None: 149 | self.true_values = true_values 150 | if false_values is not None: 151 | self.false_values = false_values 152 | 153 | def cast(self, value): 154 | s = value.strip().lower() 155 | if value in ("", None): 156 | return None 157 | if s in self.true_values: 158 | return True 159 | if s in self.false_values: 160 | return False 161 | raise ValueError 162 | 163 | 164 | class TimeType(CellType): 165 | result_type = datetime.time 166 | 167 | def cast(self, value): 168 | if isinstance(value, self.result_type): 169 | return value 170 | if value in ("", None): 171 | return None 172 | hour = int(value[2:4]) 173 | minute = int(value[5:7]) 174 | second = int(value[8:10]) 175 | if hour < 24: 176 | return datetime.time(hour, minute, second) 177 | else: 178 | return datetime.timedelta(hours=hour, minutes=minute, seconds=second) 179 | 180 | 181 | class DateType(CellType): 182 | """The date type is special in that it also includes a specific 183 | date format that is used to parse the date, additionally to the 184 | basic type information.""" 185 | 186 | guessing_weight = 3 187 | formats = DATE_FORMATS 188 | result_type = datetime.datetime 189 | 190 | def __init__(self, format): 191 | self.format = format 192 | 193 | @classmethod 194 | def instances(cls): 195 | return [cls(v) for v in cls.formats] 196 | 197 | def test(self, value): 198 | if isinstance(value, str) and not is_date(value): 199 | return False 200 | return CellType.test(self, value) 201 | 202 | def cast(self, value): 203 | if isinstance(value, self.result_type): 204 | return value 205 | if value in ("", None): 206 | return None 207 | if self.format is None: 208 | return value 209 | return datetime.datetime.strptime(value, self.format) 210 | 211 | def __eq__(self, other): 212 | return isinstance(other, DateType) and self.format == other.format 213 | 214 | def __repr__(self): 215 | return "Date(%s)" % self.format 216 | 217 | def __hash__(self): 218 | return hash(self.__class__) + hash(self.format) 219 | 220 | 221 | TYPES = [ 222 | StringType, 223 | DecimalType, 224 | IntegerType, 225 | DateType, 226 | BoolType, 227 | TimeType, 228 | CurrencyType, 229 | PercentageType, 230 | ] 231 | 232 | 233 | def type_guess(rows, types=TYPES): 234 | """The type guesser aggregates the number of successful 235 | conversions of each column to each type, weights them by a 236 | fixed type priority and select the most probable type for 237 | each column based on that figure. It returns a list of 238 | ``CellType``. Empty cells are ignored. 239 | 240 | Modified original messytables.type_quess 241 | to use simple rows = list of lists of cell values 242 | ie inspectcsv self.csvfile with cols first row removed if exists 243 | 244 | All columns are checked for all types so this can be slow for huge CSV files 245 | so make a sample file with a subset of the data for this use case. 246 | """ 247 | guesses = [] 248 | type_instances = [i for t in types for i in t.instances()] 249 | for i, row in enumerate(rows): 250 | diff = len(row) - len(guesses) 251 | for _ in range(diff): 252 | guesses.append(defaultdict(int)) 253 | for i, cell in enumerate(row): 254 | # add string guess so that we have at least one guess 255 | guesses[i][StringType()] = guesses[i].get(StringType(), 0) 256 | if not cell: 257 | continue 258 | for type in type_instances: 259 | if type.test(cell): 260 | guesses[i][type] += type.guessing_weight 261 | _columns = [] 262 | for guess in guesses: 263 | # this first creates an array of tuples because we want the types to be 264 | # sorted. Even though it is not specified, python chooses the first 265 | # element in case of a tie 266 | # See: http://stackoverflow.com/a/6783101/214950 267 | guesses_tuples = [(t, guess[t]) for t in type_instances if t in guess] 268 | _columns.append(max(guesses_tuples, key=lambda t_n: t_n[1])[0]) 269 | return _columns 270 | -------------------------------------------------------------------------------- /csvimport/migrations/0001_initial.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.0.5 on 2020-05-23 11:43 2 | 3 | import django.core.files.storage 4 | from django.db import migrations, models 5 | import django.db.models.deletion 6 | 7 | 8 | class Migration(migrations.Migration): 9 | initial = True 10 | 11 | dependencies = [] 12 | 13 | operations = [ 14 | migrations.CreateModel( 15 | name="CSVImport", 16 | fields=[ 17 | ( 18 | "id", 19 | models.AutoField( 20 | auto_created=True, 21 | primary_key=True, 22 | serialize=False, 23 | verbose_name="ID", 24 | ), 25 | ), 26 | ( 27 | "model_name", 28 | models.CharField( 29 | choices=[], 30 | default="csvimport.Item", 31 | help_text="Please specify the app_label.model_name", 32 | max_length=255, 33 | ), 34 | ), 35 | ( 36 | "field_list", 37 | models.TextField( 38 | blank=True, 39 | help_text='Enter list of fields in order only if\n you dont have a header row with matching field names, eg.\n "column1=shared_code,column2=org(Organisation|name)"', 40 | ), 41 | ), 42 | ( 43 | "upload_file", 44 | models.FileField( 45 | storage=django.core.files.storage.FileSystemStorage( 46 | location="" 47 | ), 48 | upload_to="csv", 49 | ), 50 | ), 51 | ("file_name", models.CharField(blank=True, max_length=255)), 52 | ("encoding", models.CharField(blank=True, max_length=32)), 53 | ( 54 | "upload_method", 55 | models.CharField( 56 | choices=[("manual", "manual"), ("cronjob", "cronjob")], 57 | default="manual", 58 | max_length=50, 59 | ), 60 | ), 61 | ( 62 | "error_log", 63 | models.TextField(help_text="Each line is an import error"), 64 | ), 65 | ("import_date", models.DateField(auto_now=True)), 66 | ( 67 | "import_user", 68 | models.CharField( 69 | blank=True, 70 | default="anonymous", 71 | help_text="User id as text", 72 | max_length=255, 73 | ), 74 | ), 75 | ], 76 | ), 77 | migrations.CreateModel( 78 | name="ImportModel", 79 | fields=[ 80 | ( 81 | "id", 82 | models.AutoField( 83 | auto_created=True, 84 | primary_key=True, 85 | serialize=False, 86 | verbose_name="ID", 87 | ), 88 | ), 89 | ("numeric_id", models.PositiveIntegerField()), 90 | ("natural_key", models.CharField(max_length=100)), 91 | ( 92 | "csvimport", 93 | models.ForeignKey( 94 | on_delete=django.db.models.deletion.CASCADE, 95 | to="csvimport.CSVImport", 96 | ), 97 | ), 98 | ], 99 | ), 100 | ] 101 | -------------------------------------------------------------------------------- /csvimport/migrations/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/edcrewe/django-csvimport/5edab101eb6e172221c8ae730e7afacc07254a42/csvimport/migrations/__init__.py -------------------------------------------------------------------------------- /csvimport/models.py: -------------------------------------------------------------------------------- 1 | from django.apps import apps 2 | from django.db import models 3 | from csvimport.app import settings 4 | from copy import deepcopy 5 | from django.core.files.storage import FileSystemStorage 6 | import re 7 | 8 | fs = FileSystemStorage(location=settings.MEDIA_ROOT) 9 | CHOICES = (("manual", "manual"), ("cronjob", "cronjob")) 10 | MODELS = [] 11 | 12 | 13 | def get_models(): 14 | """Cannot load at module level for later djangos - since its too early""" 15 | global MODELS 16 | if MODELS: 17 | return MODELS 18 | # Create your models here. 19 | if not getattr(settings, "CSVIMPORT_MODELS", []): 20 | allmodels = [] 21 | for app_name in settings.INSTALLED_APPS: 22 | app_parts = app_name.split(".") 23 | if app_name.startswith("django."): 24 | app_name = app_parts[-1] 25 | else: 26 | app_name = app_parts[0] 27 | try: 28 | allmodels.extend(apps.get_app_config(app_name).get_models()) 29 | except: 30 | allmodels = [] 31 | if allmodels: 32 | MODELS = [ 33 | "%s.%s" % (m._meta.app_label, m.__name__) 34 | for m in allmodels 35 | if m._meta.app_label != "contenttypes" 36 | ] 37 | else: 38 | MODELS = deepcopy(settings.CSVIMPORT_MODELS) 39 | MODELS = tuple([(m, m) for m in MODELS]) 40 | return MODELS 41 | 42 | 43 | class CSVImport(models.Model): 44 | """Logging model for importing files""" 45 | 46 | model_choice = [] 47 | model_name = models.CharField( 48 | max_length=255, 49 | blank=False, 50 | default="app_label.model_name", 51 | help_text="Please specify the app_label.model_name", 52 | # choices=get_models(), 53 | ) 54 | field_list = models.TextField( 55 | blank=True, 56 | help_text='''Enter list of fields in order only if 57 | you dont have a header row with matching field names, eg. 58 | "column1=shared_code,column2=org(Organisation|name)"''', 59 | ) 60 | upload_file = models.FileField(upload_to="csv", storage=fs) 61 | file_name = models.CharField(max_length=255, blank=True) 62 | encoding = models.CharField(max_length=32, blank=True) 63 | upload_method = models.CharField( 64 | blank=False, max_length=50, default="manual", choices=CHOICES 65 | ) 66 | error_log = models.TextField(help_text="Each line is an import error") 67 | import_date = models.DateField(auto_now=True) 68 | import_user = models.CharField( 69 | max_length=255, default="anonymous", help_text="User id as text", blank=True 70 | ) 71 | 72 | def error_log_html(self): 73 | return re.sub("\n", "
", self.error_log) 74 | 75 | error_log_html.allow_tags = True 76 | 77 | def __unicode__(self): 78 | return self.upload_file.name 79 | 80 | def __str__(self): 81 | return self.__unicode__() 82 | 83 | 84 | class ImportModel(models.Model): 85 | """Optional one to one mapper of import file to Model""" 86 | 87 | csvimport = models.ForeignKey(CSVImport, on_delete=models.CASCADE) 88 | numeric_id = models.PositiveIntegerField() 89 | natural_key = models.CharField(max_length=100) 90 | -------------------------------------------------------------------------------- /csvimport/parser.py: -------------------------------------------------------------------------------- 1 | """ Core CSV parser class that is used by the management commands """ 2 | import os 3 | import re 4 | import csv 5 | import sys 6 | import codecs 7 | import re 8 | 9 | pyversion = sys.version_info[0] # python 2 or 3 10 | 11 | 12 | class CSVParser(object): 13 | """Open a CSV file, check its encoding and parse it into memory 14 | and set up the map of the fields 15 | """ 16 | 17 | csvfile = [] 18 | charset = "" 19 | filehandle = None 20 | check_cols = False 21 | string_types = (type(""), type("")) 22 | 23 | def list_rows(self, rows): 24 | """CSV Reader returns an iterable, but as we possibly need to 25 | perform list commands and since list is an acceptable iterable, 26 | we'll just transform it. 27 | Also do optional column count consistency check here 28 | """ 29 | if rows and self.check_cols: 30 | rowlen = 0 31 | for row in rows: 32 | if not rowlen: 33 | rowlen = len(row) 34 | else: 35 | if rowlen != len(row): 36 | self.error( 37 | """Sorry you have inconsistent numbers of cols in your CSV rows 38 | But you have requested column count checking - so no data has been imported 39 | """ 40 | ) 41 | return [] 42 | return list(rows) 43 | 44 | def open_csvfile(self, datafile, delimiter=",", reader=True): 45 | """Detect file encoding and open appropriately""" 46 | self.filehandle = open(datafile, "rb") 47 | if not self.charset: 48 | import chardet 49 | 50 | diagnose = chardet.detect(self.filehandle.read()) 51 | self.charset = diagnose["encoding"] 52 | rows = [] 53 | if reader: 54 | try: 55 | csvfile = codecs.open(datafile, "r", self.charset) 56 | except IOError: 57 | self.error( 58 | "Could not open specified csv file, %s, or it does not exist" 59 | % datafile, 60 | 0, 61 | ) 62 | else: 63 | try: 64 | csvgenerator = self.charset_csv_reader( 65 | csv_data=csvfile, charset=self.charset, delimiter=delimiter 66 | ) 67 | rows = [row for row in csvgenerator] 68 | self.filehandle.close() 69 | return self.list_rows(rows) 70 | except: 71 | pass 72 | self.filehandle.close() 73 | # Sometimes encoding is too mashed to be able to open the file as text with csv_reader 74 | # ... especially in Python 3 - its a lot stricter 75 | # so reopen as raw unencoded and just try and get lines out one by one 76 | output = [] 77 | count = 0 78 | # TODO: Fix use of a quoted field with commas next to escaped quotes - eg. "field1=SOAP, ""200 g"", bar","field2" 79 | expression = r"""(['"]*)(.*?)\1(""" + delimiter + r"""|$)""" 80 | csvsplit = re.compile(expression) 81 | if not rows: 82 | content = None 83 | try: 84 | with open(datafile, "rb") as content_file: 85 | content = content_file.readlines() 86 | except: 87 | self.loglist.append("Failed to open file %s" % datafile) 88 | if type(content) not in self.string_types and len(content) == 1: 89 | content = content[0] 90 | content_type = type(content) 91 | 92 | if content_type in self.string_types: 93 | endings = ("\r\n", "\r", "\\r", "\n") 94 | elif isinstance(b"", content_type): # string in python2 / bytes in python3 95 | endings = (b"\r\n", b"\r", b"\\r", b"\n") 96 | else: 97 | endings = None 98 | 99 | if endings: 100 | for ending in endings: 101 | if content.find(ending) > -1: 102 | rows = content.split(ending) 103 | break 104 | if not rows: 105 | rows = content 106 | 107 | if rows: 108 | for row in rows: 109 | if not row.rstrip(): 110 | continue 111 | if pyversion == 3: 112 | row = row.decode(self.charset) 113 | if type(row) in self.string_types: 114 | if not row: 115 | continue 116 | matches = csvsplit.findall(row) 117 | # Fix CSV repeat quote used for escaping within quotes 118 | row = [match[1].replace('""', '"') for match in matches][:-1] 119 | if pyversion == 2: 120 | try: 121 | row = [unicode(item, self.charset) for item in row] 122 | except: 123 | row = [] 124 | if row: 125 | count += 1 126 | try: 127 | output.append(row) 128 | except: 129 | self.loglist.append("Failed to parse row %s" % count) 130 | return self.list_rows(output) 131 | 132 | def charset_csv_reader( 133 | self, csv_data, dialect=csv.excel, charset="utf-8", delimiter=",", **kwargs 134 | ): 135 | csv_reader = csv.reader( 136 | self.charset_encoder(csv_data, charset), 137 | dialect=dialect, 138 | delimiter=delimiter, 139 | **kwargs 140 | ) 141 | for row in csv_reader: 142 | # decode charset back to Unicode, cell by cell: 143 | yield [unicode(cell, charset) for cell in row] 144 | 145 | def charset_encoder(self, csv_data, charset="utf-8"): 146 | """Check passed a valid charset then encode""" 147 | test_string = "test_real_charset" 148 | try: 149 | test_string.encode(charset) 150 | except: 151 | charset = "utf-8" 152 | for line in csv_data: 153 | yield line.encode(charset) 154 | 155 | def set_mappings(self, mappings): 156 | """ 157 | Parse the mappings, and return a list of them. 158 | """ 159 | if not mappings: 160 | return [] 161 | 162 | def parse_mapping(args): 163 | """ 164 | Parse the custom mapping syntax (column1=field1(ForeignKey|field), 165 | etc.) 166 | 167 | >>> parse_mapping('a=b(c|d)') 168 | [('a', 'b', '(c|d)')] 169 | """ 170 | # value = word or date format match 171 | pattern = re.compile( 172 | r"(\w+)=(\d+/\d+/\d+|\d+-\d+-\d+|\w+)(\(\w+\.*\w*\|\w+\))?" 173 | ) 174 | self.loglist.append("Using column mappings: %s" % args) 175 | mappings = pattern.findall(args) 176 | mappings = list(mappings) 177 | for mapping in mappings: 178 | mapp = mappings.index(mapping) 179 | mappings[mapp] = list( 180 | mappings[mapp] 181 | ) # [unicode(item) for item in list(mappings[mapp])] 182 | mappings[mapp][2] = parse_foreignkey(mapping[2]) 183 | mappings[mapp] = tuple(mappings[mapp]) 184 | mappings = list(mappings) 185 | return mappings 186 | 187 | def parse_foreignkey(key): 188 | """ 189 | Parse the foreignkey syntax (Key|field) 190 | 191 | >>> parse_foreignkey('(a|b)') 192 | ('a', 'b') 193 | """ 194 | 195 | pattern = re.compile(r"(\w.+)\|(\w+)", re.U) 196 | if key.startswith("(") and key.endswith(")"): 197 | key = key[1:-1] 198 | 199 | found = pattern.search(key) 200 | 201 | if found is not None: 202 | return (found.group(1), found.group(2)) 203 | else: 204 | return None 205 | 206 | mappings = mappings.replace(",", " ") 207 | mappings = mappings.replace("column", "") 208 | return parse_mapping(mappings) 209 | 210 | def check_filesystem(self, csvfile, delimiter=",", reader=True): 211 | """Check for files on the file system""" 212 | if csvfile and os.path.exists(csvfile): 213 | if os.path.isdir(csvfile): 214 | self.csvfile = [] 215 | for afile in os.listdir(csvfile): 216 | if afile.endswith(".csv"): 217 | filepath = os.path.join(csvfile, afile) 218 | try: 219 | lines = self.open_csvfile( 220 | filepath, delimiter=delimiter, reader=reader 221 | ) 222 | self.csvfile.extend(lines) 223 | except: 224 | pass 225 | else: 226 | self.csvfile = self.open_csvfile( 227 | csvfile, delimiter=delimiter, reader=reader 228 | ) 229 | if not getattr(self, "csvfile", []): 230 | return 'File "%s" not found' % csvfile 231 | return "" 232 | -------------------------------------------------------------------------------- /csvimport/settings.py: -------------------------------------------------------------------------------- 1 | # Settings to be used when running unit tests 2 | # ./bin/django-admin test --settings=csvimport.settings csvimport.tests 3 | 4 | # Build paths inside the project like this: os.path.join(BASE_DIR, ...) 5 | import os 6 | 7 | BASE_DIR = os.path.dirname(os.path.dirname(__file__)) 8 | 9 | # Quick-start development settings - unsuitable for production 10 | # See https://docs.djangoproject.com/en/1.6/howto/deployment/checklist/ 11 | 12 | # SECURITY WARNING: keep the secret key used in production secret! 13 | SECRET_KEY = "(2^xk$^us_v$zd-qhd1_z8a!89*cc415b(*%*o(med4bk^w3ui" 14 | 15 | # SECURITY WARNING: don't run with debug turned on in production! 16 | DEBUG = True 17 | 18 | DEFAULT_AUTO_FIELD = "django.db.models.AutoField" 19 | 20 | ALLOWED_HOSTS = [] 21 | 22 | TEMPLATES = [ 23 | { 24 | "BACKEND": "django.template.backends.django.DjangoTemplates", 25 | "DIRS": [ 26 | # insert your TEMPLATE_DIRS here 27 | ], 28 | "APP_DIRS": True, 29 | "OPTIONS": { 30 | "context_processors": [ 31 | # Insert your TEMPLATE_CONTEXT_PROCESSORS here or use this 32 | # list if you haven't customized them: 33 | "django.template.context_processors.request", 34 | "django.contrib.auth.context_processors.auth", 35 | "django.template.context_processors.debug", 36 | "django.template.context_processors.i18n", 37 | "django.template.context_processors.media", 38 | "django.template.context_processors.static", 39 | "django.template.context_processors.tz", 40 | "django.contrib.messages.context_processors.messages", 41 | ] 42 | }, 43 | } 44 | ] 45 | 46 | # Application definition 47 | 48 | INSTALLED_APPS = ( 49 | "django.contrib.admin", 50 | "django.contrib.auth", 51 | "django.contrib.contenttypes", 52 | "django.contrib.sessions", 53 | "django.contrib.messages", 54 | "django.contrib.staticfiles", 55 | "csvimport.app.CSVImportConf", 56 | ) 57 | 58 | MIDDLEWARE = ( 59 | "django.contrib.sessions.middleware.SessionMiddleware", 60 | "django.middleware.common.CommonMiddleware", 61 | "django.middleware.csrf.CsrfViewMiddleware", 62 | "django.contrib.auth.middleware.AuthenticationMiddleware", 63 | "django.contrib.messages.middleware.MessageMiddleware", 64 | "django.middleware.clickjacking.XFrameOptionsMiddleware", 65 | ) 66 | 67 | SITE_ID = 1 68 | 69 | # This merely needs to be present - as long as your test case specifies a 70 | # urls attribute, it does not need to be populated. 71 | ROOT_URLCONF = "csvimport.tests.urls" 72 | 73 | WSGI_APPLICATION = "csvimport.wsgi.application" 74 | 75 | # Database 76 | # https://docs.djangoproject.com/en/1.6/ref/settings/#databases 77 | 78 | DATABASES = { 79 | "default": { 80 | "ENGINE": "django.db.backends.sqlite3", 81 | "NAME": os.path.join(BASE_DIR, "db.sqlite3"), 82 | } 83 | } 84 | 85 | # Internationalization 86 | # https://docs.djangoproject.com/en/1.6/topics/i18n/ 87 | 88 | LANGUAGE_CODE = "en-us" 89 | 90 | TIME_ZONE = "UTC" 91 | 92 | USE_I18N = True 93 | 94 | USE_L10N = True 95 | 96 | USE_TZ = True 97 | 98 | # Static files (CSS, JavaScript, Images) 99 | # https://docs.djangoproject.com/en/1.7/howto/static-files/ 100 | 101 | STATIC_URL = "/static/" 102 | 103 | 104 | # If not set or CSVIMPORT = 'screen' then it only sends loglines to Admin UI display 105 | CSVIMPORT_LOG = "logger" 106 | # Turn on logger usage and log to a text file to check for in tests ... 107 | LOGGING = { 108 | "version": 1, 109 | "disable_existing_loggers": False, 110 | "handlers": { 111 | "logfile": { 112 | "class": "logging.handlers.WatchedFileHandler", 113 | "filename": os.path.join(os.path.dirname(__file__), "csvimport_test.log"), 114 | } 115 | }, 116 | "loggers": { 117 | "csvimport": {"handlers": ["logfile"], "level": "DEBUG", "propagate": False} 118 | }, 119 | } 120 | 121 | TEST_RUNNER = "csvimport.tests.testrunner.CSVImportRunner" 122 | MIGRATION_MODULES = {"csvimport": "csvimport.tests.migrations"} 123 | -------------------------------------------------------------------------------- /csvimport/signals.py: -------------------------------------------------------------------------------- 1 | from django import dispatch 2 | 3 | imported_csv = dispatch.Signal() 4 | importing_csv = dispatch.Signal() 5 | -------------------------------------------------------------------------------- /csvimport/tests/README.txt: -------------------------------------------------------------------------------- 1 | Tests Readme 2 | ============ 3 | 4 | Test files are imported via the __init__.py 5 | 6 | >>> from foobar_tests import * 7 | 8 | For one off run of tests use 9 | 10 | >>> python3 -m django test --settings='csvimport.settings' csvimport.tests 11 | 12 | Or could use following import of settings for django-admin.py ... 13 | 14 | >>> #!/bin/python 15 | >>> from django.core import management 16 | >>> import os 17 | >>> os.environ['DJANGO_SETTINGS_MODULE'] = 'csvimport.settings' 18 | >>> 19 | >>> 20 | >>> if __name__ == "__main__": 21 | >>> management.execute_from_command_line() 22 | 23 | Then can run via 24 | 25 | >>> bin/django-admin test csvimport.tests 26 | -------------------------------------------------------------------------------- /csvimport/tests/__init__.py: -------------------------------------------------------------------------------- 1 | # import the test suites 2 | 3 | from csvimport.tests.issue_tests import RegressionTest 4 | 5 | from csvimport.tests.optional_tests import CommandArgsTest 6 | from csvimport.tests.parse_tests import CommandParseTest 7 | from csvimport.tests.constraint_tests import ConstraintTest 8 | 9 | from csvimport.tests.performance_tests import PerformanceTest 10 | from csvimport.tests.log_tests import LogTest 11 | from csvimport.tests.admin_tests import AdminTest 12 | -------------------------------------------------------------------------------- /csvimport/tests/admin.py: -------------------------------------------------------------------------------- 1 | from django.contrib import admin 2 | from csvimport.tests.models import Country, UnitOfMeasure, Organisation, Item 3 | 4 | admin.site.register(Country) 5 | admin.site.register(UnitOfMeasure) 6 | admin.site.register(Organisation) 7 | admin.site.register(Item) 8 | -------------------------------------------------------------------------------- /csvimport/tests/admin_tests.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os, sys 3 | 4 | from django.conf import settings 5 | from django.test import Client 6 | from django.contrib.auth.models import User 7 | from django.core.files.base import ContentFile 8 | 9 | from csvimport.tests.testcase import CommandTestCase 10 | from csvimport.tests.models import Item 11 | from csvimport.models import CSVImport, get_models 12 | 13 | 14 | class AdminTest(CommandTestCase): 15 | """Run test of csvimport admin UI""" 16 | 17 | def test_get_models(self): 18 | """Check that get_models is working to give a list of models for import""" 19 | models = get_models() 20 | self.assertTrue(len(models) > 6) 21 | self.assertIn(("csvimport.Item", "csvimport.Item"), models) 22 | 23 | def test_admin(self): 24 | c = Client() 25 | self.assertEqual(Item.objects.count(), 0) 26 | self.assertEqual(CSVImport.objects.count(), 0) 27 | self.user = User.objects.create_superuser( 28 | "admin", "admin@example.com", "letmein" 29 | ) 30 | login = c.login(username="admin", password="letmein") 31 | fixpath = os.path.join(os.path.dirname(__file__), "fixtures", "test_plain.csv") 32 | with open(fixpath) as fp: 33 | fake_file = ContentFile(fp.read()) 34 | fake_file.name = "test_plain.csv" 35 | data = { 36 | "model_name": "csvimport.Item", 37 | "field_list": "CODE_SHARE,CODE_ORG,ORGANISATION,DESCRIPTION,UOM,QUANTITY,STATUS", 38 | "upload_file": fake_file, 39 | "encoding": "utf-8", 40 | } 41 | response = c.post("/admin/csvimport/csvimport/add/", data, follow=True) 42 | self.assertEqual(response.status_code, 200) 43 | self.assertNotIn( 44 | "csvimport.Item is not one of the available choices", str(response.content) 45 | ) 46 | self.assertNotIn("This field is required", str(response.content)) 47 | self.assertEqual(CSVImport.objects.count(), 1) 48 | self.assertEqual(Item.objects.count(), 9) 49 | return 50 | -------------------------------------------------------------------------------- /csvimport/tests/constraint_tests.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Use unicode source code to make test character string writing easier 3 | from csvimport.tests.testcase import CommandTestCase 4 | from csvimport.tests.models import Country 5 | import sys 6 | from django.core.exceptions import ObjectDoesNotExist 7 | import django 8 | 9 | 10 | class ConstraintTest(CommandTestCase): 11 | """Run test of file parsing""" 12 | 13 | def get_country(self, country_code): 14 | try: 15 | country = Country.objects.get(code__exact=country_code) 16 | except ObjectDoesNotExist: 17 | country = None 18 | self.assertTrue( 19 | country, "Failed to get row from imported test csv for countries" 20 | ) 21 | return country 22 | 23 | def test_empty_notnull(self, filename="bad_country.csv"): 24 | """Use custom command to upload a country file with missing long lat data""" 25 | errs = [ 26 | "Field 'latitude' expected a number but got 'null'.", 27 | "could not convert string to float: 'null'", 28 | "Field 'latitude' expected a number but got 'null'.", 29 | "could not convert string to float: 'null'", 30 | "Imported 3 rows to Country", 31 | ] 32 | if django.VERSION[0] == 2: 33 | errs.append("could not convert string to float: 'null'") 34 | errs.append("could not convert string to float: 'null'") 35 | self.command( 36 | csvfile=filename, 37 | modelname="csvimport.Country", 38 | defaults="", 39 | expected_errs=errs, 40 | clean=False, 41 | ) 42 | self.assertEqual(Country.objects.count(), 3) 43 | country = self.get_country("K1") 44 | self.assertTrue(country.name, "Montserrat") 45 | Country.objects.all().delete() 46 | -------------------------------------------------------------------------------- /csvimport/tests/fixtures/bad_country.csv: -------------------------------------------------------------------------------- 1 | "name","code","latitude","longitude","alias" 2 | "NOLAT","XX",null,1,"Moon","Lunar" 3 | "NOLATLONG","OO",null,null,"Sun","Solar" 4 | "OKCOUNTRY1","K1",16.75,-62.2,"OKCountry1" 5 | "OKCOUNTRY2","K2",1.75,-34.2,"OKCountry2" 6 | 'BadPK',null,1,2,'no primary key' 7 | 'BadPK2',null,1,2,'no primary key again' 8 | -------------------------------------------------------------------------------- /csvimport/tests/fixtures/countries.csv: -------------------------------------------------------------------------------- 1 | "name","code","latitude","longitude","alias","some","other","data","that","we","dont","have","in","our","model" 2 | "AFGHANISTAN","AF",33,65,"Afghanistan",,1,0,0,0,0,0,0,0, 3 | "ÅLAND ISLANDS","AX",60.15,20,,,,,,,,,,, 4 | "ALBANIA","AL",41,20,"Albania",,7,1,2,1,0,0,0,0, 5 | "ALGERIA","DZ",28,3,"Algeria",,53,1,1,4,3,0,0,0, 6 | "AMERICAN SAMOA","AS",-14.33,-170,,,,,,,,,,, 7 | "ANDORRA","AD",42.5,1.5,"Andorra",,4,0,0,0,0,0,0,0, 8 | "ANGOLA","AO",-12.5,18.5,"Angola",,3,0,1,0,0,0,0,0, 9 | "ANGUILLA","AI",18.25,-63.17,"Anguilla",,11,0,0,3,0,0,0,0, 10 | "ANTARCTICA","AQ",-90,0,,,,,,,,,,, 11 | "ANTIGUA AND BARBUDA","AG",17.05,-61.8,"Antigua and Barbuda",,1,0,0,0,0,0,0,0, 12 | "ARGENTINA","AR",-34,-64,"Argentina",,26,2,2,0,1,0,0,0, 13 | "ARMENIA","AM",40,45,"Armenia",,0,0,2,0,0,0,0,0, 14 | "ARUBA","AW",12.5,-69.97,,,,,,,,,,, 15 | "AUSTRALIA","AU",-27,133,"Australia",,862,38,10,2,7,8,657,15, 16 | "AUSTRIA","AT",47.33,13.33,"Austria",,124,5,16,0,3,6,131,2, 17 | "AZERBAIJAN","AZ",40.5,47.5,"Azerbaijan",,3,0,5,3,0,0,0,0, 18 | "BAHAMAS","BS",24.25,-76,"Bahamas",,30,0,3,2,0,0,0,0, 19 | "BAHRAIN","BH",26,50.55,"Bahrain",,31,0,5,2,3,0,0,0, 20 | "BANGLADESH","BD",24,90,"Bangladesh",,33,3,3,0,3,0,3,0, 21 | "BARBADOS","BB",13.17,-59.53,"Barbados",,33,0,1,1,0,0,0,0, 22 | "BELARUS","BY",53,28,"Belarus",,3,0,0,0,0,0,0,0, 23 | "BELGIUM","BE",50.83,4,"Belgium",,287,5,25,3,4,10,163,2, 24 | "BELIZE","BZ",17.25,-88.75,"Belize",,6,0,0,0,0,0,0,0, 25 | "BENIN","BJ",9.5,2.25,"Benin",,1,0,0,0,0,0,0,0, 26 | "BERMUDA","BM",32.33,-64.75,"Bermuda",,38,0,6,3,1,0,0,0, 27 | "BHUTAN","BT",27.5,90.5,"Bhutan",,1,0,0,0,0,0,0,0, 28 | "BOLIVIA, PLURINATIONAL STATE OF","BO",-17,-65,"Bolivia",,4,0,0,0,0,0,0,0, 29 | "BOSNIA AND HERZEGOVINA","BA",44,18,"Bosnia and Herzegovina",,4,1,0,1,0,0,0,0, 30 | "BOTSWANA","BW",-22,24,"Botswana",,55,0,0,0,3,0,0,0, 31 | "BOUVET ISLAND","BV",-54.43,3.4,,,,,,,,,,, 32 | "BRAZIL","BR",-10,-55,"Brazil",,96,5,2,3,4,0,134,0, 33 | "BRITISH INDIAN OCEAN TERRITORY","IO",-6,71.5,"British Indian Ocean Territory (BIOT)",,0,1,0,0,0,0,0,0, 34 | "VIRGIN ISLANDS, BRITISH","VG",18.5,-64.5,"British Virgin Islands",,6,0,0,0,0,0,0,0, 35 | "BRUNEI DARUSSALAM","BN",4.5,114.67,"Brunei Darussalam",,42,0,24,9,1,0,0,0, 36 | "BURKINA FASO","BF",13,-2,,,,,,,,,,, 37 | "BURUNDI","BI",-3.5,30,,,,,,,,,,, 38 | "BULGARIA","BG",43,25,"Bulgaria",,21,3,14,4,3,0,0,0, 39 | "CAMBODIA","KH",13,105,"Cambodia",,1,0,0,0,0,0,0,0, 40 | "CAMEROON","CM",6,12,"Cameroon",,14,0,1,0,0,0,0,0, 41 | "CAPE VERDE","CV",16,-24,,,,,,,,,,, 42 | "CANADA","CA",60,-95,"Canada",,613,39,37,18,14,3,499,6, 43 | "CENTRAL AFRICAN REPUBLIC","CF",7,21,,,,,,,,,,, 44 | "CHAD","TD",15,19,,,,,,,,,,, 45 | "CAYMAN ISLANDS","KY",19.5,-80.5,"Cayman Islands",,19,0,3,0,0,0,0,0, 46 | "CHILE","CL",-30,-71,"Chile",,22,4,2,1,9,0,0,1, 47 | "CHRISTMAS ISLAND","CX",-10.5,105.67,,,,,,,,,,, 48 | "COCOS (KEELING) ISLANDS","CC",-12.5,96.83,,,,,,,,,,, 49 | "CHINA","CN",35,105,"China",,1782,54,450,243,89,2,246,9, 50 | "COMOROS","KM",-12.17,44.25,,,,,,,,,,, 51 | "CONGO","CG",-1,15,,,,,,,,,,, 52 | "COLOMBIA","CO",4,-72,"Colombia",,17,2,1,1,2,0,0,0, 53 | "COOK ISLANDS","CK",-21.23,-159.77,,,,,,,,,,, 54 | "COSTA RICA","CR",10,-84,"Costa Rica",,2,0,0,0,0,0,0,0, 55 | "CÔTE D'IVOIRE","CI",8,-5,"Cote d'Ivoire",,1,0,0,0,1,0,0,0, 56 | "CROATIA","HR",45.17,15.5,"Croatia",,11,0,0,0,0,0,0,0, 57 | "CUBA","CU",21.5,-80,"Cuba",,0,0,1,0,0,0,0,0, 58 | "CYPRUS","CY",35,33,"Cyprus",,499,4,79,48,23,1,0,0, 59 | "CZECH REPUBLIC","CZ",49.75,15.5,"Czech Republic",,70,5,9,5,3,2,0,1, 60 | "CONGO, THE DEMOCRATIC REPUBLIC OF THE","CD",0,25,"Democratic Republic of Congo",,1,0,0,0,0,0,0,0, 61 | "DJIBOUTI","DJ",11.5,43,,,,,,,,,,, 62 | "DENMARK","DK",56,10,"Denmark",,74,4,13,2,3,3,134,0, 63 | "DOMINICA","DM",15.42,-61.33,"Dominica",,6,1,0,1,0,0,0,0, 64 | "DOMINICAN REPUBLIC","DO",19,-70.67,"Dominican Republic",,1,0,0,0,0,0,0,0, 65 | "ECUADOR","EC",-2,-77.5,"Ecuador",,3,0,0,0,0,0,0,0, 66 | "EL SALVADOR","SV",13.83,-88.92,,,,,,,,,,, 67 | "EQUATORIAL GUINEA","GQ",2,10,,,,,,,,,,, 68 | "EGYPT","EG",27,30,"Egypt",,37,1,2,0,3,0,0,0, 69 | "ERITREA","ER",15,39,"Eritrea",,24,0,0,0,0,0,0,0, 70 | "ESTONIA","EE",59,26,"Estonia",,15,1,6,3,1,0,0,0, 71 | "ETHIOPIA","ET",8,38,"Ethiopia",,11,0,0,0,0,0,0,2, 72 | "FAROE ISLANDS","FO",6.92,158.25,,,,,,,,,,, 73 | "FALKLAND ISLANDS (MALVINAS)","FK",-18,175,"Falkland Islands",,1,0,0,0,0,0,0,0, 74 | "FIJI","FJ",64,26,"Fiji",,6,0,0,0,0,0,0,0, 75 | "FINLAND","FI",47,8,"Finland",,53,5,5,5,0,1,117,2, 76 | "FRENCH GUIANA","GF",42,43.5,,,,,,,,,,, 77 | "FRENCH POLYNESIA","PF",-15,-140,,,,,,,,,,, 78 | "FRENCH SOUTHERN TERRITORIES","TF",-43,67,,,,,,,,,,, 79 | "GABON","GA",46,2,,,,,,,,,,, 80 | "FRANCE","FR",62,-7,"France",,1585,63,148,37,29,17,603,3, 81 | "GAMBIA","GM",13.47,-16.57,"Gambia",,28,0,0,1,0,0,0,0, 82 | "GEORGIA","GE",12.12,-61.67,"Georgia",,0,0,1,0,0,0,0,0, 83 | "GERMANY","DE",51,9,"Germany",,1111,95,75,29,41,21,1083,11, 84 | "GHANA","GH",8,-2,"Ghana",,60,2,1,7,3,0,0,0, 85 | "GIBRALTAR","GI",36.18,-5.37,"Gibraltar",,35,0,3,2,0,0,0,0, 86 | "GREENLAND","GL",72,-40,,,,,,,,,,, 87 | "GREECE","GR",39,22,"Greece",,1126,19,26,47,33,7,75,4, 88 | "GUADELOUPE","GP",16.25,-61.58,,,,,,,,,,, 89 | "GUAM","GU",13.47,144.78,,,,,,,,,,, 90 | "GRENADA","GD",54,-2,"Grenada",,2,0,1,0,0,0,0,0, 91 | "GUATEMALA","GT",15.5,-90.25,"Guatemala",,2,0,0,0,0,0,0,0, 92 | "GUINEA","GN",11,-10,,,,,,,,,,, 93 | "GUINEA-BISSAU","GW",12,-15,,,,,,,,,,, 94 | "GUERNSEY","GG",4,-53,"Guernsey",,84,0,1,1,2,0,0,0, 95 | "HAITI","HT",19,-72.42,,,,,,,,,,, 96 | "HEARD ISLAND AND MCDONALD ISLANDS","HM",-53.1,72.52,,,,,,,,,,, 97 | "HOLY SEE (VATICAN CITY STATE)","VA",41.9,12.45,,,,,,,,,,, 98 | "GUYANA","GY",5,-59,"Guyana",,5,0,0,0,1,0,0,0, 99 | "HONDURAS","HN",15,-86.5,"Honduras",,1,0,0,0,0,0,0,0, 100 | "HONG KONG","HK",22.25,114.17,"Hong Kong",,1073,1,143,36,177,0,0,0, 101 | "HUNGARY","HU",47,20,"Hungary",,80,7,3,2,5,1,0,0, 102 | "ICELAND","IS",65,-18,"Iceland",,19,0,0,0,1,1,23,0, 103 | "INDIA","IN",20,77,"India",,253,29,32,60,21,1,49,1, 104 | "INDONESIA","ID",-5,120,"Indonesia",,66,1,4,2,3,0,0,0, 105 | "IRAN, ISLAMIC REPUBLIC OF","IR",32,53,"Iran",,50,3,5,6,5,0,0,0, 106 | "IRAQ","IQ",33,44,"Iraq",,6,0,1,0,3,0,0,0, 107 | "IRELAND","IE",53,-8,"Ireland",,285,51,20,42,26,4,1,1, 108 | "ISLE OF MAN","IM",54.23,-4.57,"Isle of Man",,57,3,9,0,2,0,0,0, 109 | "ISRAEL","IL",31.5,34.75,"Israel",,40,4,1,0,2,2,93,0, 110 | "ITALY","IT",42.83,12.83,"Italy",,619,81,66,23,39,15,391,8, 111 | "JAMAICA","JM",31,36,"Jamaica","",10,2,2,1,1,0,0,0, 112 | "JAPAN","JP",1,38,"Japan","",358,14,31,20,6,0,328,3, 113 | "JERSEY","JE",18.25,-77.5,"Jersey",,140,0,29,2,1,0,0,0, 114 | "JORDAN","JO",36,138,"Jordan",,77,2,14,1,1,0,0,0, 115 | "KAZAKHSTAN","KZ",48,68,"Kazakhstan",,30,1,18,8,0,0,0,0, 116 | "KENYA","KE",41,75,"Kenya",,180,2,26,3,3,0,0,0, 117 | "KIRIBATI","KI",1.42,173,"Kiribati",,1,0,0,0,0,0,0,0, 118 | "KOREA, REPUBLIC OF","KR",37,127.5,,,,,,,,,,, 119 | "KOREA, DEMOCRATIC PEOPLE'S REPUBLIC OF","KP",40,127,"Korea (South)",,125,7,21,22,15,0,114,0, 120 | "KUWAIT","KW",29.34,47.66,"Kuwait",,21,0,5,1,6,0,0,0, 121 | "KYRGYZSTAN","KG",13,105,"Kyrgyzstan",,2,0,2,0,0,0,0,0, 122 | "LAO PEOPLE'S DEMOCRATIC REPUBLIC","LA",18,105,"Laos",,2,2,0,0,0,0,0,0, 123 | "LATVIA","LV",57,25,"Latvia",,7,2,4,1,0,0,0,0, 124 | "LEBANON","LB",33.83,35.83,"Lebanon",,10,0,0,0,2,0,0,0, 125 | "LIBERIA","LR",6.5,-9.5,,,,,,,,,,, 126 | "LESOTHO","LS",-29.5,28.5,"Lesotho",,27,0,0,0,0,0,0,0, 127 | "LIECHTENSTEIN","LI",47.17,9.53,,,,,,,,,,, 128 | "LIBYAN ARAB JAMAHIRIYA","LY",25,17,"Libyan Arab Republic",,22,0,7,3,3,0,0,0, 129 | "LITHUANIA","LT",56,24,"Lithuania",,8,0,12,3,0,0,0,0, 130 | "LUXEMBOURG","LU",49.75,6.17,"Luxembourg",,55,1,9,1,1,0,0,0, 131 | "MACAO","MO",22.17,113.55,"Macao",,9,0,3,0,0,0,0,0, 132 | "MADAGASCAR","MG",-20,47,,,55,0,0,1,0,0,0,0, 133 | "MACEDONIA, THE FORMER YUGOSLAV REPUBLIC OF","MK",41.83,22,"Macedonia",,5,0,0,0,0,0,0,0, 134 | "MALAWI","MW",-13.5,34,"Malawi",,,,,,,,,, 135 | "MALAYSIA","MY",2.5,112.5,"Malaysia",,965,5,128,13,43,0,0,1, 136 | "MALDIVES","MV",3.25,73,"Maldive Islands",,13,0,4,1,1,0,0,0, 137 | "MALI","ML",17,-4,"Mali",,0,0,0,1,0,0,0,0, 138 | "MARSHALL ISLANDS","MH",9,168,,,,,,,,,,, 139 | "MALTA","MT",35.83,14.58,"Malta",,33,0,4,3,6,1,0,0, 140 | "MAURITANIA","MR",20,-12,,,,,,,,,,, 141 | "MARTINIQUE","MQ",14.67,-61,"Martinique",,4,0,0,0,0,0,0,0, 142 | "MAYOTTE","YT",-12.83,45.17,,,,,,,,,,, 143 | "MAURITIUS","MU",-20.28,57.55,"Mauritius",,54,0,14,0,2,0,0,0, 144 | "MICRONESIA, FEDERATED STATES OF","FM",-51.75,-59,,,,,,,,,,, 145 | "MEXICO","MX",23,-102,"Mexico",,82,6,5,4,11,0,0,0, 146 | "MOLDOVA, REPUBLIC OF","MD",47,29,"Moldovia",,1,0,1,0,0,0,0,0, 147 | "MONACO","MC",43.73,7.4,"Monaco",,2,0,3,0,0,0,0,0, 148 | "MONTSERRAT","MS",16.75,-62.2,"Montserrat" 149 | "MONTENEGRO","ME",42,19,,,,,,,,,,, 150 | "MONGOLIA","MN",46,105,"Mongolia",,1,0,0,0,0,0,0,0, 151 | "MOROCCO","MA",32,-5,"Morocco",,7,1,2,0,1,0,4,0, 152 | "MOZAMBIQUE","MZ",-18.25,35,"Mozambique",,7,0,0,0,0,0,0,0, 153 | "MYANMAR","MM",22,98,"Myanmar",,3,0,0,0,0,0,0,0, 154 | "NAURU","NR",-0.53,166.92,,,,,,,,,,, 155 | "NAMIBIA","NA",-22,17,"Namibia",,22,0,1,2,0,0,0,0, 156 | "NEPAL","NP",28,84,"Nepal",,13,2,3,2,0,0,0,0, 157 | "NETHERLANDS","NL",52.5,5.75,"Netherlands",,266,33,24,6,11,18,487,4, 158 | "NEW CALEDONIA","NC",-21.5,165.5,,,,,,,,,,, 159 | "NETHERLANDS ANTILLES","AN",12.25,-68.75,"Netherlands Antilles",,1,0,0,0,0,0,0,0, 160 | "NEW ZEALAND","NZ",-41,174,"New Zealand",,278,9,3,0,0,0,153,7, 161 | "NICARAGUA","NI",13,-85,"Nicaragua",,2,0,1,0,0,0,0,0, 162 | "NIGER","NE",16,8,"Niger",,1,0,0,0,0,0,0,0, 163 | "NIUE","NU",-19.03,-169.87,,,,,,,,,,, 164 | "NORFOLK ISLAND","NF",-29.03,167.95,,,,,,,,,,, 165 | "NORTHERN MARIANA ISLANDS","MP",15.2,145.75,,,,,,,,,,, 166 | "NIGERIA","NG",10,8,"Nigeria",,161,1,30,14,5,0,0,0, 167 | "NORWAY","NO",62,10,"Norway",,170,3,18,1,0,9,141,1, 168 | "OMAN","OM",21,57,"Oman",,60,0,3,1,5,0,0,0, 169 | "PALAU","PW",7.5,134.5,,,,,,,,,,, 170 | "PAKISTAN","PK",30,70,"Pakistan",,116,2,8,9,10,0,0,0, 171 | "PALESTINIAN TERRITORY, OCCUPIED","PS",32,35.25,"Palestine",,4,0,0,0,0,0,0,0, 172 | "PANAMA","PA",9,-80,"Panama",,3,0,0,0,0,0,0,0, 173 | "PAPUA NEW GUINEA","PG",-6,147,"Papua New Guinea",,8,0,0,1,0,0,0,0, 174 | "PARAGUAY","PY",-23,-58,"Paraguay",,3,0,0,0,0,0,0,0, 175 | "PERU","PE",-10,-76,"Peru",,14,1,1,0,0,0,0,0, 176 | "PITCAIRN","PN",-24.36,-128.32,,,,,,,,,,, 177 | "PHILIPPINES","PH",13,122,"Philippines",,14,1,2,0,2,0,0,0, 178 | "POLAND","PL",52,20,"Poland",,165,45,22,6,9,2,91,1, 179 | "PORTUGAL","PT",39.5,-8,"Portugal",,169,10,11,4,9,3,0,1, 180 | "PUERTO RICO","PR",18.25,-66.5,"Puerto Rico",,1,0,0,0,0,0,0,0, 181 | "QATAR","QA",25.5,51.25,"Qatar",,9,0,2,0,0,0,0,0, 182 | "RÉUNION","RE",-21.1,55.6,"Reunion",,27,0,0,0,0,0,0,0, 183 | "ROMANIA","RO",46,25,"Romania",,46,6,12,4,4,2,0,1, 184 | "RUSSIAN FEDERATION","RU",60,100,"Russia",,81,16,19,4,3,0,152,1, 185 | "SAINT BARTHÉLEMY","BL",17.9,-62.83,,,,,,,,,,, 186 | "RWANDA","RW",-2,30,"Rwanda",,2,0,1,0,0,0,0,0, 187 | "SAINT HELENA, ASCENSION AND TRISTAN DA CUNHA","SH",46,15,"Saint Helena, Ascencion and Tristan Da Cunha",,2,0,0,0,0,0,0,0, 188 | "SAINT KITTS AND NEVIS","KN",17.33,-62.75,"Saint Kitts and Nevis",,3,0,0,0,0,0,0,0, 189 | "SAINT MARTIN","MF",18.05,-63.08,,,,,,,,,,, 190 | "SAINT PIERRE AND MIQUELON","PM",46.83,-56.33,,,,,,,,,,, 191 | "SAINT LUCIA","LC",13.88,-61.13,"Saint Lucia",,23,0,1,0,0,0,0,0, 192 | "SAINT VINCENT AND THE GRENADINES","VC",13.25,-61.2,"Saint Vincent and The Grenadines",,5,0,0,0,1,0,0,0, 193 | "SAN MARINO","SM",43.77,12.42,,,,,,,,,,, 194 | "SAO TOME AND PRINCIPE","ST",1,7,,,,,,,,,,, 195 | "SAMOA","WS",-13.58,-172.33,"Samoa",,1,0,0,0,1,0,0,0, 196 | "SENEGAL","SN",14,-14,,,,,,,,,,, 197 | "SAUDI ARABIA","SA",25,45,"Saudi Arabia",,72,0,22,11,8,0,0,0, 198 | "SERBIA","RS",44,21,"Serbia And Montenegro",,14,4,3,2,1,0,0,0, 199 | "SEYCHELLES","SC",-4.58,55.67,"Seychelles",,7,0,0,0,0,0,0,0, 200 | "SIERRA LEONE","SL",8.5,-11.5,"Sierra Leone",,17,0,1,1,0,0,0,0, 201 | "SINGAPORE","SG",-15.93,-5.7,"Singapore",,484,3,62,8,2,0,0,3, 202 | "SLOVAKIA","SK",48.67,19.5,"Slovakia",,13,4,8,0,2,0,0,0, 203 | "SLOVENIA","SI",78,20,"Slovenia",,9,0,1,0,0,0,0,0, 204 | "SOLOMON ISLANDS","SB",-8,159,"Solomon Islands",,3,0,1,0,0,0,0,0, 205 | "SOMALIA","SO",10,49,"Somalia",,0,8,0,0,0,0,0,0, 206 | "SOUTH GEORGIA AND THE SOUTH SANDWICH ISLANDS","GS",-54.5,-37,,,,,,,,,,, 207 | "SOUTH AFRICA","ZA",-29,24,"South Africa",,127,3,4,3,3,3,85,2, 208 | "SPAIN","ES",40,-4,"Spain",,826,32,68,12,16,7,234,3, 209 | "SRI LANKA","LK",7,81,"Sri Lanka",,90,3,24,4,3,0,0,0, 210 | "SURINAME","SR",4,-56,,,,,,,,,,, 211 | "SVALBARD AND JAN MAYEN","SJ",78,20,,,,,,,,,,, 212 | "SUDAN","SD",15,30,"Sudan",,24,0,0,3,0,0,0,0, 213 | "SWAZILAND","SZ",-26.5,31.5,"Swaziland",,20,0,0,0,0,0,0,0, 214 | "SWEDEN","SE",62,15,"Sweden",,125,8,7,1,3,5,300,6, 215 | "SWITZERLAND","CH",47,8,"Switzerland",,250,7,19,9,9,2,341,3, 216 | "SYRIAN ARAB REPUBLIC","SY",35,38,"Syrian Arab Republic",,8,0,0,0,3,0,0,0, 217 | "TAJIKISTAN","TJ",39,71,,,,,,,,,,, 218 | "TAIWAN, PROVINCE OF CHINA","TW",23.5,121,"Taiwan",,453,4,24,31,31,0,0,1, 219 | "TANZANIA, UNITED REPUBLIC OF","TZ",-6,35,"Tanzania",,41,0,0,0,3,1,0,0, 220 | "TIMOR-LESTE","TL",-8.5,125.55,,,,,,,,,,, 221 | "TOGO","TG",8,1.17,,,,,,,,,,, 222 | "TOKELAU","TK",-9,-172,,,,,,,,,,, 223 | "TONGA","TO",-20,-175,,,,,,,,,,, 224 | "THAILAND","TH",15,100,"Thailand",,278,5,21,14,28,0,0,1, 225 | "TRINIDAD AND TOBAGO","TT",11,-61,"Trinidad and Tobago",,34,2,6,0,1,0,0,0, 226 | "TUNISIA","TN",34,9,"Tunisia",,3,1,1,0,0,0,0,0, 227 | "TURKEY","TR",39,35,"Turkey",,82,2,2,8,3,0,0,0, 228 | "TURKMENISTAN","TM",40,60,"Turkmenistan",,1,0,1,0,0,0,0,0, 229 | "TUVALU","TV",-8,178,,,,,,,,,,, 230 | "TURKS AND CAICOS ISLANDS","TC",21.75,-71.58,"Turks and Caicos Islands",,10,0,1,0,0,0,0,0, 231 | "UGANDA","UG",1,32,"Uganda",,56,0,2,1,3,0,0,0, 232 | "UKRAINE","UA",49,32,"Ukraine",,11,6,1,0,0,0,0,0, 233 | "UNITED ARAB EMIRATES","AE",24,54,"United Arab Emirates",,107,0,11,3,3,0,0,0, 234 | "UNITED KINGDOM","GB",-1,11.75,"United Kingdom","The University of Bristol has research links with colleagues from universities right across the UK - collaborating on research projects, academic publications, the organisation of conferences and the exchange of knowledge and information.",84679,4715,11717,2099,1362,0,0,0, 235 | "UNITED STATES MINOR OUTLYING ISLANDS","UM",19.28,166.6,,,,,,,,,,, 236 | "URUGUAY","UY",-33,-56,"Uruguay",,1,0,0,0,0,0,0,0, 237 | "UNITED STATES","US",38,-97,"USA",,2422,72,115,37,56,36,3180,61, 238 | "UZBEKISTAN","UZ",41,64,"Uzbekistan",,3,0,2,0,0,0,0,0, 239 | "VANUATU","VU",-16,167,"Vanuatu",,2,0,0,0,0,0,0,0, 240 | "VENEZUELA, BOLIVARIAN REPUBLIC OF","VE",8,-66,"Venezuela",,18,3,1,1,1,0,0,0, 241 | "VIET NAM","VN",16,106,"VietNam",,20,1,17,3,1,0,0,0, 242 | "VIRGIN ISLANDS, U.S.","VI",18.33,-64.83,,,,,,,,,,, 243 | "WALLIS AND FUTUNA","WF",-13.3,-176.2,,,,,,,,,,, 244 | "WESTERN SAHARA","EH",24.5,-13,,,,,,,,,,, 245 | "YEMEN","YE",15,48,"Yemen",,4,0,0,0,1,0,0,0, 246 | "ZAMBIA","ZM",-15,30,"Zambia",,56,2,2,2,0,0,0,0, 247 | "ZIMBABWE","ZW",-20,30,"Zimbabwe",,39,2,4,2,1,0,0,0, 248 | -------------------------------------------------------------------------------- /csvimport/tests/fixtures/issue98.csv: -------------------------------------------------------------------------------- 1 | co_id,co_role,co_level,co_region,tm_role,tm_level,tm_region,co_salary_low,co_salary_mid,co_salary_high,co_equity_low,co_equity_mid,co_equity_high,co_bonus_low,co_bonus_mid,co_bonus_high,co_total_comp_low,co_total_comp_mid,co_total_comp_high 2 | abcd190,Software Engineer,4,San Francisco,Software Engineer,4.5,San Francisco,"$152,500","$175,500","$202,000","$44,000","$50,000","$60,000",15%,15%,15%,"$219,375","$251,825","$292,300" 3 | abcd190,Software Engineer,5,San Francisco,Software Engineer,5,San Francisco,"$175,500","$202,000","$232,500","$57,000","$70,000","$80,000",20%,20%,20%,"$267,600","$312,400","$359,000" 4 | abcd190,Researcher,8,San Francisco,Data Science,6,San Francisco,"$239,000","$275,000","$316,500","$96,000","$115,200","$138,240",25%,25%,25%,"$394,750","$458,950","$533,865" 5 | abcd190,Data Science,1,San Francisco,Data Analyst,1,San Francisco,"$80,000","$92,000","$105,800","$10,000","$12,000","$14,400",15%,15%,15%,"$102,000","$117,800","$136,070" 6 | abcd190,Data Science,2,San Francisco,Data Analyst,2,San Francisco,"$92,000","$106,000","$121,500","$13,000","$15,600","$18,720",15%,15%,15%,"$118,800","$137,500","$158,445" 7 | -------------------------------------------------------------------------------- /csvimport/tests/fixtures/test_broken_rows.csv: -------------------------------------------------------------------------------- 1 | CODE_SHARE,CODE_ORG,ORGANISATION,DESCRIPTION,UOM,QUANTITY,STATUS bucket,WA041,Save UK,Bucket 20 litre with lid,Set,300,Stock BROKEN ROW ... bucketWA041Save UKBucket 20 litre with lid,Set,500 watercan,WA017,Save UK,"Jerry Can, Collapsible (10l, 20l)",Kit,1800,Stock sheeting,RF007,Save UK,"Plastic sheeting, 4*60m, roll",Metre,12000,Stock BROKEN ROW ...I am really not meant to be here row tent,RF024,Save UK,"Tent, Family, 17.5m2",Piece(s),45,Stock tent,RF024,Save UK,"Tent, Family, 17.5m2",Piece(s),15,On Order -------------------------------------------------------------------------------- /csvimport/tests/fixtures/test_char.csv: -------------------------------------------------------------------------------- 1 | CODE_SHARE,CODE_ORG,ORGANISATION,DESCRIPTION,UOM,QUANTITY,STATUS watercan,CWATCONT20F,AID-France,"NOURRICE A EAU, 20 l, pliable, bouchon 5 cm, plastique alim.",pi图e,1000,Stock sheeting,CSHEPLASW4W,AID-France,"PLASTIC SHEETING, 4x60m, blanc/blanc, 6 bandes, rouleau",pi图e,300,Stock tent,CSHETENF12C,AID-France,"TENTE FAMILIALE, 12 m_, COMPLETE (tapis de sol/double toit)",pi图e,180,Stock tent,CSHETENF12C,AID-France,"TENTE FAMILIALE, 12 m_, COMPLETE (tapis de sol/double toit)",pi图e,100,On Order -------------------------------------------------------------------------------- /csvimport/tests/fixtures/test_char2.csv: -------------------------------------------------------------------------------- 1 | CODE_SHARE,CODE_ORG,ORGANISATION,DESCRIPTION,UOM,QUANTITY,STATUS Odstranit aktuální obrázek,CWATCONT20F,AID-France,Изтриване на настоящия образ,inch,1000,Stock Διαγραφή τρέχον αρχείου,CSHEPLASW4W,AID-France,Aquest element està en ús per algú altre i no es pot modificar,Nächster,300,Stock tent,o suba un archivo (el contenido existente será reemplazado),AID-France,"TENTE FAMILIALE, 12 m_, COMPLETE (tapis de sol/double toit)",inche's,180,Stock Cet élément est utilisé par quelqu'un d'autre et ne peux être modifié,CSHETENF12C,AID-France,"TENTE FAMILIALE, 12 m_, COMPLETE (tapis de sol/double toit)",删除当前图片,101,On Order -------------------------------------------------------------------------------- /csvimport/tests/fixtures/test_duplicate.csv: -------------------------------------------------------------------------------- 1 | CODE_SHARE,CODE_ORG,ORGANISATION,DESCRIPTION,UOM,QUANTITY,STATUS tent,RF024,Save UK,"Tent, Family, 17.5m2",Piece(s),45,Stock bucket,WA041,Save UK,Bucket 20 litre with lid,Set,300,Stock bucket,WA041,Save UK,Bucket 20 litre with lid,Set,300,Stock tent,RF024,Save UK,"Tent, Family, 17.5m2",Piece(s),45,Stock watercan,WA017,Save UK,"Jerry Can, Collapsible (10l, 20l)",Kit,1800,Stock tent,RF024,Save UK,"Tent, Family, 17.5m2",Piece(s),45,Stock -------------------------------------------------------------------------------- /csvimport/tests/fixtures/test_headless.csv: -------------------------------------------------------------------------------- 1 | bucket,WA041,Save UK,Bucket 20 litre with lid,Set,300,Stock bucket,WA041,Save UK,Bucket 20 litre with lid,Set,500,ETA 10-AUG-2011 watercan,WA017,Save UK,"Jerry Can, Collapsible (10l, 20l)",Kit,1800,Stock bednet,MD004,Save UK,"Mosquito net, Pre-treated, 190x180x150cm, Long lasting, Blue",Piece(s),55,Stock bednet,MD004,Save UK,"Mosquito net, Pre-treated, 190x180x150cm, Long lasting, Blue",Piece(s),3000,On Order sheeting,RF007,Save UK,"Plastic sheeting, 4*60m, roll",Metre,12000,Stock tent,RF024,Save UK,"Tent, Family, 17.5m2",Piece(s),45,Stock tent,RF024,Save UK,"Tent, Family, 17.5m2",Piece(s),15,On Order -------------------------------------------------------------------------------- /csvimport/tests/fixtures/test_new_model.csv: -------------------------------------------------------------------------------- 1 | wordcol,codecol,textcol,bigtextcol,numbercol,intcol,boolcol,datecol tent,RF024,Save UK,"Tent, Family, 17.5m2",32.45,45,True,20/01/2014 bucket,WA041,Save UK,Bucket 20 litre with lid,-234,300,False,20/01/2014 bucket,WA041,Save UK,Bucket 20 litre with lid,12.45,300,True,20/01/2014 tent,RF024,Save UK,"Tent, Family, 17.5m2",33.23,45,False,21/01/1985 watercan,WA017,Save UK,"Jerry Can, Collapsible (10l, 20l)",2222.3,1800,True,20/01/2014 tent,RF024,Save UK,"Tent, Family, 17.5m2",123,45,True,23/03/2014 -------------------------------------------------------------------------------- /csvimport/tests/fixtures/test_number.csv: -------------------------------------------------------------------------------- 1 | CODE_SHARE,CODE_ORG,ORGANISATION,DESCRIPTION,UOM,QUANTITY,STATUS tent,RF024,Save UK,"Tent, Family, 17.5m2",Piece(s),-23,Stock bucket,WA041,Save UK,Bucket 20 litre with lid,Set,,Stock tent,RF024,Save UK,"Tent, Family, 17.5m2",Piece(s),45,Stock watercan,WA017,Save UK,"Jerry Can, Collapsible (10l, 20l)",Kit,33.333,Stock tent,RF028,Save UK,"Tent, Family, 17.5m2",Piece(s),9999999999999999999999999999,Stock bucket,RF028,Save UK,"Tent, Family, 5m2",Piece(s),Not_a_Number,Stock bucket,RF029,Save UK,"Tent, Family, 5m2",Piece(s),NaN,Stock -------------------------------------------------------------------------------- /csvimport/tests/fixtures/test_plain.csv: -------------------------------------------------------------------------------- 1 | CODE_SHARE,CODE_ORG,ORGANISATION,DESCRIPTION,UOM,QUANTITY,STATUS bucket,WA041,Save UK,Bucket 20 litre with lid,Set,300,Stock bucket,WA041,Save UK,Bucket 20 litre with lid,Set,500,ETA 10-AUG-2011 watercan,WA017,Save UK,"Jerry Can, Collapsible (10l, 20l)",Kit,1800,Stock bednet,MD004,Save UK,"Mosquito net, Pre-treated, 190x180x150cm, Long lasting, Blue",Piece(s),55,Stock bednet,MD004,Save UK,"Mosquito net, Pre-treated, 190x180x150cm, Long lasting, Blue",Piece(s),3000,On Order sheeting,RF007,Save UK,"Plastic sheeting, 4*60m, roll",Metre,12000,Stock tent,RF024,Save UK,"Tent, Family, 17.5m2",Piece(s),45,Stock tent,RF024,Save UK,"Tent, Family, 17.5m2",Piece(s),15,On Order -------------------------------------------------------------------------------- /csvimport/tests/fixtures/test_quoted.csv: -------------------------------------------------------------------------------- 1 | "CODE_SHARE","CODE_ORG","ORGANISATION","DESCRIPTION","UOM","QUANTITY","STATUS" 2 | "heater","CWATCONT20F","AID-France","","pie",1000,"Goods" 3 | "blanket","CSHEPLASW4W","AID-France","PLASTIC SHEETING, blanc/rouge, 6 bandes, rouleau","pie","","Order" 4 | "shed","CSHETENF12C","AID-France","TENTE FAMILIALE, COMPLETE (tapis de sol/double toit)","pie","180","Stock" 5 | "soap","DEXTSOAP1B2","AID-France","SOAP, ""200 g"" bar","pie","30","Goods" -------------------------------------------------------------------------------- /csvimport/tests/fixtures/test_single_row.csv: -------------------------------------------------------------------------------- 1 | "CODE_SHARE","CODE_ORG","ORGANISATION","DESCRIPTION","UOM","QUANTITY","STATUS" 2 | "extralinereturns","CWATCONT20F","AID-France","","pie",1000,"Goods" 3 | 4 | -------------------------------------------------------------------------------- /csvimport/tests/fixtures/test_tab.csv: -------------------------------------------------------------------------------- 1 | CODE_SHARE CODE_ORG ORGANISATION DESCRIPTION UOM QUANTITY STATUS bucket WA041 Save UK Bucket 20 litre with lid Set 300 Stock bucket WA041 Save UK Bucket 20 litre with lid Set 500 ETA 10-AUG-2011 watercan WA017 Save UK "Jerry Can Collapsible (10l 20l)" Kit 1800 Stock bednet MD004 Save UK "Mosquito net, Pre-treated 190x180x150cm Long lasting Blue" Piece(s) 55 Stock bednet MD004 Save UK "Mosquito net, Pre-treated 190x180x150cm Long lasting Blue" Piece(s) 3000 On Order sheeting RF007 Save UK "Plastic sheeting, 4*60m, roll" Metre 12000 Stock tent RF024 Save UK "Tent Family 17.5m2" Piece(s) 45 Stock tent RF024 Save UK "Tent Family 17.5m2" Piece(s) 15 On Order -------------------------------------------------------------------------------- /csvimport/tests/issue_tests.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Use unicode source code to make test character string writing easier 3 | from csvimport.tests.testcase import CommandTestCase 4 | from csvimport.tests.models import Issue98 5 | import sys 6 | 7 | 8 | class RegressionTest(CommandTestCase): 9 | """Run tests with data from reported issue that last col is empty in github bug tracker 10 | Could not replicate - all fields are populated without any trailing comma needed 11 | """ 12 | 13 | def test_issue_98(self, filename="issue98.csv"): 14 | """Test if last column is lost - check all rows and all values of a sample row""" 15 | info = ["Imported 5 rows to Issue98"] 16 | testrow = [ 17 | "abcd190", 18 | "Data Science", 19 | 2, 20 | "San Francisco", 21 | "Data Analyst", 22 | 2, 23 | "San Francisco", 24 | "$92,000", 25 | "$106,000", 26 | "$121,500", 27 | "$13,000", 28 | "$15,600", 29 | "$18,720", 30 | "15%", 31 | "15%", 32 | "15%", 33 | "$118,800", 34 | "$137,500", 35 | "$158,445", 36 | ] 37 | self.command(filename, "csvimport.Issue98", bulk=True, expected_errs=info) 38 | objs = Issue98.objects.all() 39 | self.assertEqual(len(objs), 5) 40 | for obj in objs: 41 | self.assertTrue(len(obj.co_total_comp_high) > 6) 42 | obj = Issue98.objects.get(co_level=2) 43 | fields = Issue98._meta.get_fields() 44 | for i, field in enumerate(fields): 45 | self.assertEqual(getattr(obj, field.name), testrow[i]) 46 | -------------------------------------------------------------------------------- /csvimport/tests/log_tests.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Use unicode source code to make test character string writing easier 3 | import os 4 | 5 | from csvimport.management.commands.importcsv import CSVIMPORT_LOG 6 | from csvimport.tests.testcase import CommandTestCase 7 | from django.conf import settings 8 | 9 | 10 | class LogTest(CommandTestCase): 11 | """Run test of file parsing""" 12 | 13 | logpath = "" 14 | 15 | def get_log_path(self): 16 | """Get the log file that should of been written by the parse tests""" 17 | if CSVIMPORT_LOG != "logger": 18 | print( 19 | """CSVIMPORT_LOG is not set to 'logger' in settings 20 | - assume not using csvimport.settings 21 | - so cannot test the log""" 22 | ) 23 | return False 24 | logging = getattr(settings, "LOGGING", "") 25 | if logging: 26 | handlers = logging.get("handlers", {}) 27 | if handlers: 28 | logfile = handlers.get("logfile", {}) 29 | if logfile: 30 | self.logpath = logfile.get("filename", "") 31 | if self.logpath.endswith(".log"): 32 | if os.path.exists(self.logpath): 33 | print("Found csvimport_test.log") 34 | return True 35 | print( 36 | """cvsimport logging is not set up for %s from 37 | csvimport.tests.settings so cannot test the log""" 38 | % self.logpath 39 | ) 40 | return False 41 | 42 | def test_log(self): 43 | """Check the log is there and then remove it""" 44 | if self.get_log_path(): 45 | csvlog = open(self.logpath) 46 | lines = csvlog.read() 47 | self.assertIn("Column quantity = -23, less than zero so set to 0", lines) 48 | os.remove(self.logpath) 49 | print("Deleted csvimport_test.log") 50 | return 51 | 52 | def test_new_model(self, filename="test_new_model.csv"): 53 | """Use custom command to upload file and create model""" 54 | pkey = "wordcol = models.CharField(max_length=8, null=False, primary_key=True, blank=False)" 55 | makemodel = self.inspectcsv(csvfile=filename, model="create_new_model.shiny") 56 | self.assertIn("wordcol = models.CharField", makemodel) 57 | self.assertIn("class CreateNewModelShiny", makemodel) 58 | self.assertIn("create_new_model_shiny", makemodel) 59 | -------------------------------------------------------------------------------- /csvimport/tests/migrations/0001_initial.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.0.5 on 2020-05-23 11:43 2 | 3 | import django.core.files.storage 4 | from django.db import migrations, models 5 | import django.db.models.deletion 6 | 7 | 8 | class Migration(migrations.Migration): 9 | initial = True 10 | 11 | dependencies = [] 12 | 13 | operations = [ 14 | migrations.CreateModel( 15 | name="CSVImport", 16 | fields=[ 17 | ( 18 | "id", 19 | models.AutoField( 20 | auto_created=True, 21 | primary_key=True, 22 | serialize=False, 23 | verbose_name="ID", 24 | ), 25 | ), 26 | ( 27 | "model_name", 28 | models.CharField( 29 | choices=[], 30 | default="csvimport.Item", 31 | help_text="Please specify the app_label.model_name", 32 | max_length=255, 33 | ), 34 | ), 35 | ( 36 | "field_list", 37 | models.TextField( 38 | blank=True, 39 | help_text='Enter list of fields in order only if\n you dont have a header row with matching field names, eg.\n "column1=shared_code,column2=org(Organisation|name)"', 40 | ), 41 | ), 42 | ( 43 | "upload_file", 44 | models.FileField( 45 | storage=django.core.files.storage.FileSystemStorage( 46 | location="" 47 | ), 48 | upload_to="csv", 49 | ), 50 | ), 51 | ("file_name", models.CharField(blank=True, max_length=255)), 52 | ("encoding", models.CharField(blank=True, max_length=32)), 53 | ( 54 | "upload_method", 55 | models.CharField( 56 | choices=[("manual", "manual"), ("cronjob", "cronjob")], 57 | default="manual", 58 | max_length=50, 59 | ), 60 | ), 61 | ( 62 | "error_log", 63 | models.TextField(help_text="Each line is an import error"), 64 | ), 65 | ("import_date", models.DateField(auto_now=True)), 66 | ( 67 | "import_user", 68 | models.CharField( 69 | blank=True, 70 | default="anonymous", 71 | help_text="User id as text", 72 | max_length=255, 73 | ), 74 | ), 75 | ], 76 | ), 77 | migrations.CreateModel( 78 | name="ImportModel", 79 | fields=[ 80 | ( 81 | "id", 82 | models.AutoField( 83 | auto_created=True, 84 | primary_key=True, 85 | serialize=False, 86 | verbose_name="ID", 87 | ), 88 | ), 89 | ("numeric_id", models.PositiveIntegerField()), 90 | ("natural_key", models.CharField(max_length=100)), 91 | ( 92 | "csvimport", 93 | models.ForeignKey( 94 | on_delete=django.db.models.deletion.CASCADE, 95 | to="csvimport.CSVImport", 96 | ), 97 | ), 98 | ], 99 | ), 100 | ] 101 | -------------------------------------------------------------------------------- /csvimport/tests/migrations/0002_country_issue98_item_organisation_unitofmeasure.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.0.5 on 2020-05-23 11:52 2 | 3 | from django.db import migrations, models 4 | import django.db.models.deletion 5 | 6 | 7 | class Migration(migrations.Migration): 8 | dependencies = [ 9 | ("csvimport", "0001_initial"), 10 | ] 11 | 12 | operations = [ 13 | migrations.CreateModel( 14 | name="Issue98", 15 | fields=[ 16 | ( 17 | "co_id", 18 | models.CharField(default="", max_length=7, serialize=False), 19 | ), 20 | ("co_role", models.CharField(blank=True, max_length=16, null=True)), 21 | ("co_level", models.IntegerField(blank=True, default=0, null=True)), 22 | ("co_region", models.CharField(blank=True, max_length=16, null=True)), 23 | ("tm_role", models.CharField(blank=True, max_length=16, null=True)), 24 | ( 25 | "tm_level", 26 | models.DecimalField( 27 | blank=True, decimal_places=1, default=0, max_digits=3, null=True 28 | ), 29 | ), 30 | ("tm_region", models.CharField(blank=True, max_length=16, null=True)), 31 | ( 32 | "co_salary_low", 33 | models.CharField(blank=True, default="", max_length=8), 34 | ), 35 | ( 36 | "co_salary_mid", 37 | models.CharField(blank=True, default="", max_length=8), 38 | ), 39 | ( 40 | "co_salary_high", 41 | models.CharField(blank=True, default="", max_length=8), 42 | ), 43 | ( 44 | "co_equity_low", 45 | models.CharField(blank=True, default="", max_length=8), 46 | ), 47 | ( 48 | "co_equity_mid", 49 | models.CharField(blank=True, default="", max_length=8), 50 | ), 51 | ( 52 | "co_equity_high", 53 | models.CharField(blank=True, default="", max_length=8), 54 | ), 55 | ( 56 | "co_bonus_low", 57 | models.CharField(blank=True, default="", max_length=3), 58 | ), 59 | ( 60 | "co_bonus_mid", 61 | models.CharField(blank=True, default="", max_length=3), 62 | ), 63 | ( 64 | "co_bonus_high", 65 | models.CharField(blank=True, default="", max_length=3), 66 | ), 67 | ( 68 | "co_total_comp_low", 69 | models.CharField(blank=True, default="", max_length=8), 70 | ), 71 | ( 72 | "co_total_comp_mid", 73 | models.CharField(blank=True, default="", max_length=8), 74 | ), 75 | ( 76 | "co_total_comp_high", 77 | models.CharField(blank=True, default="", max_length=8), 78 | ), 79 | ], 80 | options={ 81 | "db_table": "csvtests_issue98", 82 | "managed": True, 83 | }, 84 | ), 85 | migrations.CreateModel( 86 | name="Country", 87 | fields=[ 88 | ( 89 | "code", 90 | models.CharField(max_length=4, primary_key=True, serialize=False), 91 | ), 92 | ("name", models.CharField(max_length=255)), 93 | ("latitude", models.FloatField(default=0, null=True)), 94 | ("longitude", models.FloatField(default=0, null=True)), 95 | ("alias", models.CharField(max_length=255, null=True)), 96 | ], 97 | options={ 98 | "db_table": '"csvtests_country"', 99 | "managed": True, 100 | }, 101 | ), 102 | migrations.CreateModel( 103 | name="Organisation", 104 | fields=[ 105 | ( 106 | "id", 107 | models.AutoField( 108 | auto_created=True, 109 | primary_key=True, 110 | serialize=False, 111 | verbose_name="ID", 112 | ), 113 | ), 114 | ("name", models.CharField(max_length=255)), 115 | ], 116 | options={ 117 | "db_table": "csvtests_organisation", 118 | "managed": True, 119 | }, 120 | ), 121 | migrations.CreateModel( 122 | name="UnitOfMeasure", 123 | fields=[ 124 | ( 125 | "id", 126 | models.AutoField( 127 | auto_created=True, 128 | primary_key=True, 129 | serialize=False, 130 | verbose_name="ID", 131 | ), 132 | ), 133 | ("name", models.CharField(max_length=32)), 134 | ], 135 | options={ 136 | "db_table": "csvtests_unitofmeasure", 137 | "managed": True, 138 | }, 139 | ), 140 | migrations.CreateModel( 141 | name="Item", 142 | fields=[ 143 | ( 144 | "id", 145 | models.AutoField( 146 | auto_created=True, 147 | primary_key=True, 148 | serialize=False, 149 | verbose_name="ID", 150 | ), 151 | ), 152 | ("TYPE", models.PositiveIntegerField(default=0)), 153 | ( 154 | "code_share", 155 | models.CharField( 156 | help_text="Cross-organization item code", max_length=32 157 | ), 158 | ), 159 | ( 160 | "code_org", 161 | models.CharField( 162 | help_text="Organization-specfific item code", max_length=32 163 | ), 164 | ), 165 | ("description", models.TextField(null=True)), 166 | ("quantity", models.PositiveIntegerField(default=1)), 167 | ("status", models.CharField(max_length=10, null=True)), 168 | ("date", models.DateField(auto_now=True, null=True)), 169 | ( 170 | "country", 171 | models.ForeignKey( 172 | null=True, 173 | on_delete=django.db.models.deletion.CASCADE, 174 | to="csvimport.Country", 175 | ), 176 | ), 177 | ( 178 | "organisation", 179 | models.ForeignKey( 180 | on_delete=django.db.models.deletion.CASCADE, 181 | to="csvimport.Organisation", 182 | ), 183 | ), 184 | ( 185 | "uom", 186 | models.ForeignKey( 187 | help_text="Unit of Measure", 188 | on_delete=django.db.models.deletion.CASCADE, 189 | to="csvimport.UnitOfMeasure", 190 | ), 191 | ), 192 | ], 193 | options={ 194 | "db_table": "csvtests_item", 195 | "managed": True, 196 | }, 197 | ), 198 | ] 199 | -------------------------------------------------------------------------------- /csvimport/tests/migrations/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/edcrewe/django-csvimport/5edab101eb6e172221c8ae730e7afacc07254a42/csvimport/tests/migrations/__init__.py -------------------------------------------------------------------------------- /csvimport/tests/models.py: -------------------------------------------------------------------------------- 1 | # Test case models for cvsimport - add 'csvimport.tests' to installed apps to run 2 | from django.db import models 3 | 4 | 5 | class Issue98(models.Model): 6 | """Autogenerated model file csvimportissue98 Sun May 3 11:00:37 2020""" 7 | 8 | co_id = models.CharField( 9 | max_length=7, primary_key=True, blank=False, null=False, default="" 10 | ) 11 | co_role = models.CharField(max_length=16, blank=True, null=True) 12 | co_level = models.IntegerField(blank=True, null=True, default=0) 13 | co_region = models.CharField(max_length=16, blank=True, null=True) 14 | tm_role = models.CharField(max_length=16, blank=True, null=True) 15 | tm_level = models.DecimalField( 16 | max_digits=3, decimal_places=1, blank=True, null=True, default=0 17 | ) 18 | tm_region = models.CharField(max_length=16, blank=True, null=True) 19 | co_salary_low = models.CharField(max_length=8, blank=True, default="") 20 | co_salary_mid = models.CharField(max_length=8, blank=True, default="") 21 | co_salary_high = models.CharField(max_length=8, blank=True, default="") 22 | co_equity_low = models.CharField(max_length=8, blank=True, default="") 23 | co_equity_mid = models.CharField(max_length=8, blank=True, default="") 24 | co_equity_high = models.CharField(max_length=8, blank=True, default="") 25 | co_bonus_low = models.CharField(max_length=3, blank=True, default="") 26 | co_bonus_mid = models.CharField(max_length=3, blank=True, default="") 27 | co_bonus_high = models.CharField(max_length=3, blank=True, default="") 28 | co_total_comp_low = models.CharField(max_length=8, blank=True, default="") 29 | co_total_comp_mid = models.CharField(max_length=8, blank=True, default="") 30 | co_total_comp_high = models.CharField(max_length=8, blank=True, default="") 31 | 32 | class Meta: 33 | app_label = "csvimport" 34 | db_table = '"csvtests_issue98"' 35 | managed = True 36 | 37 | 38 | class Country(models.Model): 39 | """ 40 | ISO country (location) codes. 41 | and lat long for Geopoint Mapping 42 | """ 43 | 44 | code = models.CharField(max_length=4, primary_key=True) 45 | name = models.CharField(max_length=255) 46 | latitude = models.FloatField(null=True, default=0) 47 | longitude = models.FloatField(null=True, default=0) 48 | alias = models.CharField(max_length=255, null=True) 49 | 50 | class Meta: 51 | app_label = "csvimport" 52 | db_table = '"csvtests_country"' 53 | managed = True 54 | 55 | def __str__(self): 56 | return "%s (%s)" % (self.name, self.code) 57 | 58 | 59 | class UnitOfMeasure(models.Model): 60 | name = models.CharField(max_length=32) 61 | 62 | class Meta: 63 | app_label = "csvimport" 64 | db_table = "csvtests_unitofmeasure" 65 | managed = True 66 | 67 | def __str__(self): 68 | return self.name 69 | 70 | 71 | class Organisation(models.Model): 72 | name = models.CharField(max_length=255) 73 | 74 | def __str__(self): 75 | return self.name 76 | 77 | class Meta: 78 | app_label = "csvimport" 79 | db_table = "csvtests_organisation" 80 | managed = True 81 | 82 | 83 | class Item(models.Model): 84 | TYPE = models.PositiveIntegerField(default=0) 85 | code_share = models.CharField( 86 | max_length=32, help_text="Cross-organization item code" 87 | ) 88 | code_org = models.CharField( 89 | max_length=32, help_text="Organization-specfific item code" 90 | ) 91 | description = models.TextField(null=True) 92 | quantity = models.PositiveIntegerField(default=1) 93 | uom = models.ForeignKey( 94 | UnitOfMeasure, on_delete=models.CASCADE, help_text="Unit of Measure" 95 | ) 96 | organisation = models.ForeignKey(Organisation, on_delete=models.CASCADE) 97 | status = models.CharField(max_length=10, null=True) 98 | date = models.DateField(auto_now=True, null=True, validators=[]) 99 | country = models.ForeignKey(Country, on_delete=models.CASCADE, null=True) 100 | 101 | class Meta: 102 | app_label = "csvimport" 103 | db_table = "csvtests_item" 104 | managed = True 105 | 106 | def __str__(self): 107 | return self.description 108 | -------------------------------------------------------------------------------- /csvimport/tests/optional_tests.py: -------------------------------------------------------------------------------- 1 | """ Test use of optional command line args """ 2 | from csvimport.tests.testcase import CommandTestCase 3 | from csvimport.tests.models import Item 4 | 5 | 6 | class CommandArgsTest(CommandTestCase): 7 | """Run test of use of optional command line args - mappings, default and charset""" 8 | 9 | def test_mappings(self, filename="test_headless.csv"): 10 | """Use custom command to upload file and parse it into Items 11 | Handle either mapping format 12 | TODO: add handling of spaces in defaults? 13 | """ 14 | # header equivalent only mapping 15 | mappings = "CODE_SHARE,CODE_ORG,ORGANISATION,DESCRIPTION,UOM,QUANTITY,STATUS" 16 | # errs = ['Using manually entered mapping list'] 17 | self.command( 18 | filename, "csvimport.Item", "country=KE(Country|code)", mappings=mappings 19 | ) 20 | item = self.get_item("sheeting") 21 | # Check a couple of the fields in Item 22 | self.assertEqual(item.code_org, "RF007") 23 | self.assertEqual(item.description, "Plastic sheeting, 4*60m, roll") 24 | # Check related Organisation model is created 25 | self.assertEqual(item.organisation.name, "Save UK") 26 | Item.objects.all().delete() 27 | 28 | # full mapping 29 | mappings = """column1=code_share,column2=code_org, 30 | column3=organisation(Organisation|name), 31 | column5=uom(UnitOfMeasure|name),column7=status""" 32 | defaults = "country=KE(Country|code),quantity=5,description=stuff" 33 | errs = ["Using manually entered mapping list"] 34 | self.command( 35 | filename, 36 | "csvimport.Item", 37 | mappings=mappings, 38 | defaults=defaults, 39 | expected_errs=errs, 40 | ) 41 | item = self.get_item("sheeting") 42 | # Check a couple of the fields in Item 43 | self.assertEqual(item.quantity, 5) 44 | self.assertEqual(item.code_org, "RF007") 45 | self.assertEqual(item.description, "stuff") 46 | # Check related Organisation model is created 47 | self.assertEqual(item.organisation.name, "Save UK") 48 | Item.objects.all().delete() 49 | 50 | def test_default(self, filename="test_char.csv"): 51 | """Check the default values over-ride those in the file 52 | NB: Should we add an option to only make defaults change null values? 53 | ... maybe although all of that could be done post import anyway so 54 | this is more normally used to allow setting values for missing columns 55 | """ 56 | defaults = "code_org=ALLTHESAME,quantity=58" 57 | self.command(filename, "csvimport.Item", defaults=defaults) 58 | item = self.get_item("watercan") 59 | self.assertNotEqual(item.code_org, "CWATCONT20F") 60 | self.assertEqual(item.code_org, "ALLTHESAME") 61 | self.assertNotEqual(item.quantity, 1000) 62 | self.assertEqual(item.quantity, 58) 63 | self.assertEqual(item.organisation.name, "AID-France") 64 | Item.objects.all().delete() 65 | -------------------------------------------------------------------------------- /csvimport/tests/parse_tests.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Use unicode source code to make test character string writing easier 3 | from csvimport.tests.testcase import CommandTestCase 4 | from csvimport.tests.models import Item 5 | import sys 6 | 7 | pyversion = sys.version_info[0] # python 2 or 3 8 | 9 | 10 | class CommandParseTest(CommandTestCase): 11 | """Run test of file parsing""" 12 | 13 | def test_plain(self, filename="test_plain.csv"): 14 | """Use custom command to upload file and parse it into Items""" 15 | self.command(filename, "csvimport.Item", "country=KE(Country|code)") 16 | item = self.get_item("sheeting") 17 | # Check a couple of the fields in Item 18 | self.assertEqual(item.code_org, "RF007") 19 | self.assertEqual(item.description, "Plastic sheeting, 4*60m, roll") 20 | # Check related Organisation model is created 21 | self.assertEqual(item.organisation.name, "Save UK") 22 | Item.objects.all().delete() 23 | 24 | def test_local_parser(self, filename="test_plain.csv"): 25 | """Use custom command to upload file and parse it into Items 26 | Use reader = False to use local parser not csv lib reader 27 | Note that Python 3 csv reader is far less format tolerant so tends to use local parser 28 | """ 29 | self.command( 30 | filename, "csvimport.Item", "country=KE(Country|code)", reader=False 31 | ) 32 | item = self.get_item("sheeting") 33 | # Check a couple of the fields in Item 34 | self.assertEqual(item.code_org, "RF007") 35 | self.assertEqual(item.description, "Plastic sheeting, 4*60m, roll") 36 | # Check related Organisation model is created 37 | self.assertEqual(item.organisation.name, "Save UK") 38 | Item.objects.all().delete() 39 | 40 | def test_tab(self, filename="test_tab.csv"): 41 | """Use custom command to upload file and parse it into Items with different, tab, delimiter""" 42 | self.command( 43 | filename, "csvimport.Item", "country=KE(Country|code)", delimiter="\t" 44 | ) 45 | item = self.get_item("sheeting") 46 | # Check a couple of the fields in Item 47 | self.assertEqual(item.code_org, "RF007") 48 | self.assertEqual(item.description, "Plastic sheeting, 4*60m, roll") 49 | # Check related Organisation model is created 50 | self.assertEqual(item.organisation.name, "Save UK") 51 | Item.objects.all().delete() 52 | 53 | def test_char(self, filename="test_char.csv"): 54 | """Use custom command parse file - test with odd non-ascii character""" 55 | self.command(filename, "csvimport.Item", "country=KE(Country|code)") 56 | item = self.get_item("watercan") 57 | self.assertEqual(item.code_org, "CWATCONT20F") 58 | self.assertEqual(item.quantity, 1000) 59 | if pyversion == 2: 60 | self.assertEqual(unicode(item.uom), "pi图e") 61 | else: 62 | self.assertEqual(str(item.uom), "pi图e") 63 | self.assertEqual(item.organisation.name, "AID-France") 64 | Item.objects.all().delete() 65 | 66 | def test_char2(self, filename="test_char2.csv"): 67 | """Use custom command to parse file with range of unicode characters""" 68 | self.command(filename, "csvimport.Item", "country=KE(Country|code)") 69 | item = self.get_item( 70 | "Cet élément est utilisé par quelqu'un d'autre et ne peux être modifié" 71 | ) 72 | self.assertEqual( 73 | item.description, 74 | "TENTE FAMILIALE, 12 m_, COMPLETE (tapis de sol/double toit)", 75 | ) 76 | self.assertEqual(item.quantity, 101) 77 | if pyversion == 2: 78 | self.assertEqual(unicode(item.uom), "删除当前图片") 79 | else: 80 | self.assertEqual(str(item.uom), "删除当前图片") 81 | self.assertEqual(item.organisation.name, "AID-France") 82 | Item.objects.all().delete() 83 | 84 | def test_duplicate(self, filename="test_duplicate.csv"): 85 | """Use custom command to upload file and parse it into Items""" 86 | self.deduplicate = True 87 | self.command( 88 | filename, 89 | "csvimport.Item", 90 | "country=KE(Country|code)", 91 | expected_errs=["Imported 3 rows to Item"], 92 | ) 93 | items = Item.objects.all().order_by("code_share") 94 | self.assertEqual(len(items), 3) 95 | # Check a couple of the fields in Item 96 | codes = ("bucket", "tent", "watercan") 97 | for i, item in enumerate(items): 98 | self.assertEqual(item.code_share, codes[i]) 99 | self.command( 100 | filename, 101 | "csvimport.Item", 102 | "country=KE(Country|code)", 103 | expected_errs=["Imported 6 rows to Item"], 104 | deduplicate=False, 105 | ) 106 | items = Item.objects.all().order_by("code_share") 107 | self.assertEqual(len(items), 3 + 6) 108 | Item.objects.all().delete() 109 | 110 | def test_number(self, filename="test_number.csv"): 111 | """Use command to parse file with problem numeric fields 112 | Missing field value, negative, fractions and too big 113 | """ 114 | errs = [ 115 | "row 0: Column quantity = -23, less than zero so set to 0", 116 | "row 4: Column quantity = 1e+28 more than the max integer 9223372036854775807 sqlite may error with big integers so rounded down", 117 | "row 5: Column quantity = Not_a_Number is not a number so is set to 0", 118 | "row 6: Column quantity = nan is not an integer so is set to 0", 119 | ] 120 | self.command( 121 | filename, "csvimport.Item", "country=KE(Country|code)", expected_errs=errs 122 | ) 123 | # check fractional numbers into integers 124 | items = Item.objects.filter(code_org="WA017") 125 | self.assertEqual(items[0].quantity, 33) 126 | # check empty values into zeros 127 | items = Item.objects.filter(code_org="WA041") 128 | self.assertEqual(items[0].quantity, 0) 129 | # 9223372036854775807 is the reliable limit so this wont work 130 | # test is to ensure that 1e+28 error above is reported 131 | items = Item.objects.filter(code_org="RF028") 132 | self.assertNotEqual(items[0].quantity, 9999999999999999999999999999) 133 | Item.objects.all().delete() 134 | 135 | def test_quoted(self, filename="test_quoted.csv"): 136 | """Use custom command parse file - test always double quote except some numbers 137 | - test empty double quotes doesnt make the import skip a column""" 138 | errs = ["Imported 4 rows to Item"] 139 | self.command( 140 | filename, "csvimport.Item", "country=KE(Country|code)", expected_errs=errs 141 | ) 142 | item = self.get_item("heater") 143 | self.assertEqual(item.code_org, "CWATCONT20F") 144 | self.assertEqual(item.status, "Goods") 145 | self.assertEqual(item.quantity, 1000) 146 | self.assertEqual(item.organisation.name, "AID-France") 147 | self.assertEqual(str(item.uom), "pie") 148 | self.assertEqual(item.description, "") 149 | item = self.get_item("blanket") 150 | self.assertEqual(item.quantity, 0) # empty double quote 151 | self.assertEqual(item.status, "Order") 152 | item = self.get_item("shed") 153 | self.assertEqual(item.quantity, 180) 154 | item = self.get_item("soap") 155 | self.assertEqual(item.description, 'SOAP, "200 g" bar') 156 | Item.objects.all().delete() 157 | 158 | def test_row_increment(self, filename="test_broken_rows.csv"): 159 | """Test parsing a file with 2 rows that are mashed up 160 | see if it does 5 of 7 also check pkey increment wrt. 161 | https://github.com/edcrewe/django-csvimport/issues/30 162 | """ 163 | errs = [ 164 | "row 1: FKey uom couldnt be set for row - because the row is not parsable - skipping it", 165 | "row 4: FKey organisation couldnt be set for row - because the row is not parsable - skipping it", 166 | "Imported 5 rows to Item", 167 | ] 168 | self.command( 169 | filename, "csvimport.Item", "country=KE(Country|code)", expected_errs=errs 170 | ) 171 | item = self.get_item("sheeting") 172 | # Check a field in item 173 | self.assertEqual(item.description, "Plastic sheeting, 4*60m, roll") 174 | # Check we have 5 of 7 175 | self.assertEqual(Item.objects.count(), 5) 176 | # Confirm that the maximum value used for PKey is not 7 177 | self.assertEqual(Item.objects.latest("id").id, 5) 178 | Item.objects.all().delete() 179 | 180 | def test_single_row(self, filename="test_single_row.csv"): 181 | """Check that single row is fine based on issue https://github.com/edcrewe/django-csvimport/issues/106""" 182 | errs = ["Imported 1 rows to Item"] 183 | self.command( 184 | filename, "csvimport.Item", "country=KE(Country|code)", expected_errs=errs 185 | ) 186 | self.assertEqual(Item.objects.count(), 1) 187 | Item.objects.all().delete() 188 | -------------------------------------------------------------------------------- /csvimport/tests/performance_tests.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Use unicode source code to make test character string writing easier 3 | from csvimport.tests.testcase import CommandTestCase 4 | from csvimport.tests.models import Country 5 | import sys 6 | import timeit 7 | from django.core.exceptions import ObjectDoesNotExist 8 | 9 | pyversion = sys.version_info[0] # python 2 or 3 10 | 11 | 12 | class PerformanceTest(CommandTestCase): 13 | """Run test of file parsing""" 14 | 15 | def test_time_load(self, filename="countries.csv"): 16 | """Time the upload of a country file""" 17 | errs = ["Imported 246 rows to Country"] 18 | bulk_time = self.command( 19 | csvfile=filename, 20 | modelname="csvimport.Country", 21 | defaults="", 22 | expected_errs=errs, 23 | clean=False, 24 | bulk=True, 25 | time=True, 26 | ) 27 | self.assertTrue(bulk_time < 0.4) 28 | self.assertTrue(Country.objects.count() > 240) 29 | Country.objects.all().delete() 30 | single_time = self.command( 31 | csvfile=filename, 32 | modelname="csvimport.Country", 33 | defaults="", 34 | expected_errs=errs, 35 | clean=False, 36 | bulk=False, 37 | time=True, 38 | ) 39 | self.assertTrue(single_time > bulk_time) 40 | print( 41 | "Time to run bulk countries import was %s faster than %s" 42 | % ( 43 | bulk_time, 44 | single_time, 45 | ) 46 | ) 47 | -------------------------------------------------------------------------------- /csvimport/tests/testcase.py: -------------------------------------------------------------------------------- 1 | """ Base test case for command line manage.py csvimport """ 2 | import os 3 | import timeit 4 | from django.test import TestCase 5 | from django.core.exceptions import ObjectDoesNotExist 6 | 7 | from csvimport.management.commands.importcsv import Command as ImportCommand 8 | from csvimport.management.commands.inspectcsv import Command as InspectCommand 9 | from csvimport.tests.models import Item 10 | 11 | DEFAULT_ERRS = [ 12 | "Columns = CODE_SHARE, CODE_ORG, ORGANISATION, DESCRIPTION, UOM, QUANTITY, STATUS", 13 | "Mapping from first, header, row of CSV file", 14 | "Imported 4 rows to Item", 15 | "Imported 6 rows to Item", 16 | "Imported 7 rows to Item", 17 | "Imported 8 rows to Item", 18 | "Outputting setup message", 19 | "Manually entered mapping list", 20 | ] 21 | 22 | 23 | class DummyFileObj: 24 | """Use to replace html upload / or command arg 25 | with test fixtures files 26 | """ 27 | 28 | path = "" 29 | 30 | def set_path(self, filename): 31 | self.path = os.path.join(os.path.dirname(__file__), "fixtures", filename) 32 | 33 | 34 | class CommandTestCase(TestCase): 35 | """Run test of use of optional command line args - mappings, default and charset""" 36 | 37 | def inspectcsv(self, csvfile, model="", charset="", defaults=""): 38 | """Run inspectcsv command to parse file""" 39 | cmd = InspectCommand() 40 | uploaded = DummyFileObj() 41 | uploaded.set_path(csvfile) 42 | cmd.csvfile = cmd.open_csvfile(uploaded.path) 43 | cmd.handle_label( 44 | csvfile, **{"model": model, "charset": charset, "defaults": defaults} 45 | ) 46 | return cmd.makemodel 47 | 48 | def command( 49 | self, 50 | csvfile=None, 51 | modelname="", 52 | defaults="", 53 | mappings="", 54 | charset="", 55 | expected_errs=[], 56 | uploaded=None, 57 | nameindexes=False, 58 | deduplicate=True, 59 | delimiter=",", 60 | reader=True, 61 | clean=True, 62 | bulk=False, 63 | time=False, 64 | ): 65 | """Run core csvimport command to parse file""" 66 | cmd = ImportCommand() 67 | uploaded = DummyFileObj() 68 | uploaded.set_path(csvfile) 69 | cmd.setup( 70 | mappings=mappings, 71 | modelname=modelname, 72 | charset=charset, 73 | defaults=defaults, 74 | uploaded=uploaded, 75 | nameindexes=nameindexes, 76 | deduplicate=deduplicate, 77 | delimiter=delimiter, 78 | reader=reader, 79 | clean=clean, 80 | bulk=bulk, 81 | ) 82 | 83 | # Report back any unnexpected parse errors 84 | # and confirm those that are expected. 85 | # Fail test if they are not matching 86 | if time: 87 | return timeit.Timer(cmd.run).timeit(number=1) 88 | errors = cmd.run("commandtest") 89 | expected = [err for err in DEFAULT_ERRS] 90 | if expected_errs: 91 | expected.extend(expected_errs) 92 | for err in expected: 93 | try: 94 | errors.remove(err) 95 | except: 96 | pass 97 | if errors: 98 | for err in errors: 99 | if err.startswith("Matched Columns"): 100 | errors.remove(err) 101 | else: 102 | print(err) 103 | self.assertEqual(errors, []) 104 | 105 | def get_item(self, code_share="sheeting"): 106 | """Get item for confirming import is OK""" 107 | try: 108 | item = Item.objects.get(code_share__exact=code_share) 109 | except ObjectDoesNotExist: 110 | item = None 111 | self.assertTrue(item, "Failed to get row from imported test.csv Items") 112 | return item 113 | -------------------------------------------------------------------------------- /csvimport/tests/testrunner.py: -------------------------------------------------------------------------------- 1 | # Make our own testrunner that by default only tests our own apps 2 | import shutil 3 | from django.conf import settings 4 | from django.test.runner import DiscoverRunner 5 | 6 | 7 | class CSVImportRunner(DiscoverRunner): 8 | def setup_databases(self, *args, **kwargs): 9 | print("Add the csvimport test models migrations") 10 | settings.MIGRATION_MODULES["csvimport"] = "csvimport.tests.migrations" 11 | return super(CSVImportRunner, self).setup_databases(*args, **kwargs) 12 | -------------------------------------------------------------------------------- /csvimport/tests/urls.py: -------------------------------------------------------------------------------- 1 | from django.contrib import admin 2 | from django.conf import settings 3 | from csvimport.tests.views import index 4 | 5 | admin.autodiscover() 6 | 7 | # URL patterns for test django-csvimport install 8 | try: 9 | # 4 or later 10 | from django.urls import re_path 11 | 12 | urlpatterns = [re_path(r"^admin/", admin.site.urls), re_path(r"^.*", index)] 13 | except: 14 | from django.conf.urls import url, include 15 | 16 | try: 17 | # 1.9 or later 18 | urlpatterns = [url(r"^admin/", admin.site.urls), url(r"^.*", index)] 19 | except: 20 | # 1.8 or earlier 21 | urlpatterns = patterns( 22 | "", (r"^admin/", include(admin.site.urls)), (r"^.*", index) 23 | ) 24 | if settings.DEBUG: 25 | urlpatterns += patterns( 26 | "", 27 | url( 28 | r"^(?P.*)$", 29 | "django.views.static.serve", 30 | { 31 | "document_root": settings.MEDIA_ROOT, 32 | }, 33 | ), 34 | ) 35 | -------------------------------------------------------------------------------- /csvimport/tests/views.py: -------------------------------------------------------------------------------- 1 | from django.http import HttpResponse 2 | 3 | 4 | def index(request, template="README.txt", **kwargs): 5 | return HttpResponse( 6 | """

django-csvimport Test app

7 |

You have installed the test django-csvimport 8 | application. Click on the admin 9 | to try it

10 |

NB: you must edit csvimport.settings to add
11 | MIGRATION_MODULES={"csvimport": "csvimport.tests.migrations"}
12 | Then run:
13 | django-admin.py migrate --settings=csvimport.settings
14 | to create the test models. 15 |

Click on csvimport in the admin

16 |

Try importing data via the test csv files in 17 | django-csvimport/csvimport/tests/fixtures folder

18 |

Click on Add csvimport

19 |

For example select Models name: tests.Country and upload the countries.csv file

20 | """ 21 | ) 22 | -------------------------------------------------------------------------------- /csvimport/wsgi.py: -------------------------------------------------------------------------------- 1 | """ 2 | WSGI config for foobar project. 3 | 4 | It exposes the WSGI callable as a module-level variable named ``application``. 5 | 6 | For more information on this file, see 7 | https://docs.djangoproject.com/en/1.7/howto/deployment/wsgi/ 8 | """ 9 | 10 | import os 11 | 12 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "csvimport.settings") 13 | 14 | from django.core.wsgi import get_wsgi_application 15 | 16 | application = get_wsgi_application() 17 | -------------------------------------------------------------------------------- /docs/HISTORY.txt: -------------------------------------------------------------------------------- 1 | Changelog 2 | ========= 3 | 4 | Contributors listed as [github.com username] where they are on github 5 | 6 | 3.2 - Use dateparser for import - 26 Dec 2023 7 | --------------------------------------------------------- 8 | 9 | #. Use dateparser for importing a much wider range of mixed date formats without specifying a format 10 | #. Remove old tz monkeypatch 11 | 12 | 3.1 - Add messytables for django 5 - 23 Dec 2023 13 | ------------------------------------------------ 14 | 15 | #. Use dateparser rather than python-dateutil and add to setup.py dependencies 16 | 17 | 3.0 - Add messytables for django 5 - 23 Dec 2023 18 | ------------------------------------------------ 19 | 20 | #. Upgrade to Django 5, hence Python 3.12 21 | #. Add csvimport/messytables with two files to provide functionality used from OKN messytables, since the whole package is no longer maintained for Python 3.12 [OKN] 22 | #. Autoformat the code with black 23 | 24 | 2.17 - Fix for django 4 - 2 Jan 2023 25 | ------------------------------------ 26 | 27 | #. Fix incompatible methods for django 4 28 | #. Add a test for single row CSV and fix issue of trailing line returns 29 | #. Minor issue PRs [edouard-gv] 30 | 31 | 2.16 - Fix and add tests - 28 June 2020 32 | --------------------------------------- 33 | 34 | #. Move test fixture migrations to separate migration folder picked up by custom testrunner 35 | #. Add regression tests for all reported issues 36 | #. Add admin UI tests 37 | #. Enable and fix all existing tests 38 | #. Use regex match group for failover CSV splitter 39 | 40 | 2.15 - Fix compatiblity issue for Django 3 - 27 April 2020 41 | ---------------------------------------------------------- 42 | 43 | 2.13 - Fix issue with inspectcsv for different header and data line number of cols - 26th Sept 2019 44 | --------------------------------------------------------------------------------------------------- 45 | 46 | 2.12 - Fix for latest django required on_delete and duplicated log msgs - 24 April 2019 47 | --------------------------------------------------------------------------------------- 48 | 49 | 2.11 - Fix command line summary messages - 14 July 2017 50 | ------------------------------------------------------- 51 | 52 | 2.10 - Bug with models var not set if not bulk import - 6 July 2017 53 | -------------------------------------------------------------------- 54 | 55 | 2.9 - Fix for using foreign keys from other apps - 4 July 2017 56 | -------------------------------------------------------------- 57 | 58 | #. Handle boolean types actually being ints for sqlite 59 | #. Allow dots in --mappings so app labels of foreign keys can be set by the user, since model lookup may assume wrong label. 60 | 61 | 2.8 - Minor fixes - 19 June 2017 62 | -------------------------------- 63 | 64 | #. Force datetimes to local timezone to prevent warning on insert 65 | #. Rebuild initial migrations file 66 | #. Add option to do bulk create with --bulk to speed up import 67 | 68 | 2.7 - Merge in pull requests for minor fixes and test with Django 1.11 - 30 May 2017 69 | ------------------------------------------------------------------------------------ 70 | 71 | #. Fix uninitiated variable bug if type not detected 72 | #. Fix command line arg usage for Django 1.10 or 1.11 and make it backwards compatible down to 1.7 73 | #. For multiple django version compatibility code try excepts make sure new syntax is first to avoid deprecation warnings 74 | #. Add test shell script to make it easy to run test suite and a command line test 75 | #. Report back matched and unmatched cols in CSV rather than just showing list of all of them 76 | #. Allow clean=false option to not fix numeric values to match allowed range 77 | #. Use atomic to wrap row inserts so we can still report count of inserts Ok after a transactional exception 78 | 79 | 2.5 - Merge in various pull requests and test with Django 1.7 to 1.10 - 15 Nov 2016 80 | ----------------------------------------------------------------------------------- 81 | 82 | #. Fix for all recent versions of Django and Python 83 | #. Add in delimiter option eg. for tab separated values 84 | #. Add migrations 85 | #. Make importcsv parsing more tolerant of unparsable rows in CSV 86 | 87 | 2.4 - Fix app config path so that management commands are found - 11 Nov 2014 88 | ----------------------------------------------------------------------------- 89 | 90 | #. Fix log print for commands 91 | 92 | 2.3 - Make compatible with Python 3 - 29 October 2014 93 | ----------------------------------------------------- 94 | 95 | 2.2 - Move to core appconfig usage - 28 October 2014 96 | ---------------------------------------------------- 97 | 98 | #. Add appconfig path to cater for module install test running 99 | #. Check for SMALLINT_DBS including sqlite - since can fail big int imports 100 | 101 | 2.1 - Fix for django 1.7 - 27 October 2014 102 | ------------------------------------------ 103 | 104 | 2.0 - Added model creation and datetime clean up - 29th Sept 2014 105 | ----------------------------------------------------------------- 106 | 107 | #. Generate django model files based on CSV data type sniffing with messytables 108 | #. Use flag to generate model code eg. --model=create_new_model.modelname and add test 109 | #. Use the settings date formats and add clean up for them 110 | #. Factor out type clean up method for data values 111 | #. Allow defaults to be passed as a command line argument 112 | #. Use --mappings=none for CSV created models with no column name row 113 | #. Handle badly encoded files as raw 114 | #. Set date limit in case of strftime bug for pre-1900 dates 115 | 116 | [edcrewe] 117 | 118 | 1.1 - Added mappings and defaults tests - 18 March 2014 119 | ------------------------------------------------------- 120 | 121 | #. Allow mapping format to be simple header row style 122 | #. Refactored heading parsing and added tests for optional args 123 | 124 | [edcrewe] 125 | 126 | 1.0 - Added signals and code cleanup - 29th January 2013 127 | -------------------------------------------------------- 128 | 129 | #. Fixed number overflow test and tested with django 1.5.1 [edcrewe] 130 | #. Added signals to the app, to allow the model to custom format a row. 131 | #. added appconf to allow the app to be more customizable - eg. select models for import. 132 | #. Removed except all and return database errors. 133 | 134 | [cwood] 135 | 136 | 0.9 - Missed import of re for change to display of error log - 9th Dec 2012 137 | --------------------------------------------------------------------------- 138 | 139 | #. Import re in csvimport/models.py [blorenz] 140 | 141 | 0.8 - Tidy up logging and Boolean handling - 9th Dec 2012 142 | --------------------------------------------------------- 143 | 144 | #. Fix display of error log so there are newlines for each entry [smeyfroi] 145 | #. Log processed rows to the csvimport.management.commands.csvimport logger [smeyfroi] 146 | #. Fix for boolean data [smeyfroi] 147 | #. Add setting for using standard logging machinery for Admin UI log lines [edcrewe] 148 | #. Add test to check creation and content of csvimport log file [edcrewe] 149 | 150 | 0.7 - Fixes and merge in changes for running up the test app - 24th Nov 2012 151 | ---------------------------------------------------------------------------- 152 | 153 | #. Merge in changes for 1.4 templates in settings [elena] 154 | #. Merge in doc changes [elena, valhallasw] 155 | #. Fix bug with columns that are floats [imposeren] 156 | #. Fix imports via the admin not having charset encoding argument [edcrewe] 157 | 158 | 0.6 - Handle text not number or special float to integer - 7th March 2012 159 | ------------------------------------------------------------------------- 160 | 161 | #. Handle numeric columns with text in - zero if not nan or inf 162 | 163 | [edcrewe] 164 | 165 | 0.5 - Add command line encoding option and error return - 6th March 2012 166 | ------------------------------------------------------------------------ 167 | 168 | #. Make sure command line usage returns errors and warnings 169 | #. Add the option to force --charset=utf-8 for example rather than chardet 170 | #. Add numeric type handling for empty fields or other issues plus tests 171 | 172 | [Tessa Alexander, edcrewe] 173 | 174 | 0.4 - Add settings to allow demo site install from tests - 4th October 2011 175 | --------------------------------------------------------------------------- 176 | 177 | #. Add demo countries.csv file 178 | #. Add MEDIA_ROOT for file uploads 179 | 180 | [edcrewe] 181 | 182 | 0.3 - Fix issue with adding in admin - 25th Sept 2011 183 | ----------------------------------------------------- 184 | 185 | #. Fix empty mapping submitted 186 | #. Remove non-generic filename country processing code 187 | #. Use get_models to populate drop down for admin import form 188 | #. Add upload message about whether mapping is from CSV header row 189 | #. Ensure header row only used if mapping is not supplied. 190 | 191 | [edcrewe] 192 | 193 | 0.2 - Initial beta release version - 29th July 2011 194 | --------------------------------------------------- 195 | 196 | #. Build it as a django-csvimport egg 197 | #. Create csvimport management command 198 | #. Improve character set detection and unicode handling 199 | #. Add admin csvimport model to upload CSV files with logging of import 200 | #. Use file upload save to trigger csvimport command 201 | #. Autodetect column mappings by matching model fields and CSV header text 202 | #. Add a deduplicate feature 203 | #. Start to add test suite with data / app for HELIOS cross agency supply chain 204 | data sharing http://www.helios-foundation.org/cbha-project/index_html 205 | 206 | [edcrewe] 207 | 208 | 0.1 - Unreleased 209 | ---------------- 210 | 211 | #. Use django snippet as starting point 212 | http://djangosnippets.org/snippets/633/ 213 | 214 | [Jonathan Holst] 215 | -------------------------------------------------------------------------------- /docs/test_script.sh: -------------------------------------------------------------------------------- 1 | echo "Test shell script for running the shell command to import a CSV file and then check it worked by using commandline sqlite3" 2 | echo "Run from the root of your django install" 3 | export root=src/django-csvimport 4 | # export root=lib/python2.7/site-packages 5 | rm $root/db.sqlite3 6 | bin/django-admin.py migrate --settings='csvimport.settings' 7 | bin/django-admin.py importcsv --settings='csvimport.settings' --model='csvimport.Country' $root/csvimport/tests/fixtures/countries.csv 8 | sqlite3 $root/db.sqlite3 "select * from csvtests_country" 9 | echo "Run test suite" 10 | bin/django-admin.py test --settings='csvimport.settings' csvimport.tests 11 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [egg_info] 2 | tag_build = 3 | tag_svn_revision = false 4 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | try: 4 | from setuptools import setup 5 | except ImportError: 6 | from distutils.core import setup 7 | 8 | version = "3.2" 9 | 10 | with open("README.rst", "r") as fp: 11 | csvimport_description = fp.read() + "\n" 12 | for fname in ("HISTORY.txt",): 13 | with open(os.path.join("docs", fname), "r") as fp: 14 | csvimport_description += fp.read() + "\n" 15 | 16 | setup( 17 | name="django-csvimport", 18 | version=version, 19 | description="Import CSV files to django models", 20 | long_description_content_type="text/x-rst", 21 | long_description=csvimport_description, 22 | # Get more strings from http://www.python.org/pypi?%3Aaction=list_classifiers 23 | classifiers=[ 24 | "Programming Language :: Python", 25 | "Topic :: Software Development :: Libraries :: Python Modules", 26 | "Development Status :: 5 - Production/Stable", 27 | "Framework :: Django", 28 | "Programming Language :: Python :: 2.6", 29 | "Programming Language :: Python :: 2.7", 30 | "Programming Language :: Python :: 3.3", 31 | "Programming Language :: Python :: 3.4", 32 | "Programming Language :: Python :: 3.5", 33 | "Programming Language :: Python :: 3.6", 34 | "Programming Language :: Python :: 3.7", 35 | "Programming Language :: Python :: 3.8", 36 | "Programming Language :: Python :: 3.9", 37 | "Programming Language :: Python :: 3.12", 38 | "License :: OSI Approved :: Apache Software License", 39 | ], 40 | keywords="CVS import django fixture", 41 | author="Ed Crewe", 42 | author_email="edmundcrewe@gmail.com", 43 | url="https://github.com/edcrewe/django-csvimport", 44 | license="Apache", 45 | packages=["csvimport"], 46 | include_package_data=True, 47 | namespace_packages=["csvimport"], 48 | # this line always breaks install? 49 | # package_data = {'csvimport': ['*.csv', '*.rst']}, 50 | zip_safe=False, 51 | install_requires=["django>=2.2.5", "chardet", "dateparser"], 52 | entry_points=""" 53 | # -*- Entry points: -*- 54 | """, 55 | ) 56 | --------------------------------------------------------------------------------