├── .github
    └── workflows
    │   ├── publish.yml
    │   └── test.yml
├── .gitignore
├── Dockerfile
├── LICENSE
├── README.md
├── csv_diff
    ├── __init__.py
    └── cli.py
├── setup.py
└── tests
    ├── __init__.py
    ├── test_cli.py
    ├── test_csv_diff.py
    └── test_human_text.py


/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
 1 | name: Publish Python Package
 2 | 
 3 | on:
 4 |   release:
 5 |     types: [created]
 6 | 
 7 | permissions:
 8 |   contents: read
 9 | 
10 | jobs:
11 |   test:
12 |     runs-on: ubuntu-latest
13 |     strategy:
14 |       matrix:
15 |         python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
16 |     steps:
17 |     - uses: actions/checkout@v4
18 |     - name: Set up Python ${{ matrix.python-version }}
19 |       uses: actions/setup-python@v5
20 |       with:
21 |         python-version: ${{ matrix.python-version }}
22 |         cache: pip
23 |         cache-dependency-path: setup.py
24 |     - name: Install dependencies
25 |       run: |
26 |         pip install '.[test]'
27 |     - name: Run tests
28 |       run: |
29 |         pytest
30 |   deploy:
31 |     runs-on: ubuntu-latest
32 |     needs: [test]
33 |     environment: release
34 |     permissions:
35 |       id-token: write
36 |     steps:
37 |     - uses: actions/checkout@v4
38 |     - name: Set up Python
39 |       uses: actions/setup-python@v5
40 |       with:
41 |         python-version: "3.12"
42 |         cache: pip
43 |         cache-dependency-path: setup.py
44 |     - name: Install dependencies
45 |       run: |
46 |         pip install setuptools wheel build
47 |     - name: Build
48 |       run: |
49 |         python -m build
50 |     - name: Publish
51 |       uses: pypa/gh-action-pypi-publish@release/v1
52 | 


--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | name: Test
 2 | 
 3 | on: [push, pull_request]
 4 | 
 5 | permissions:
 6 |   contents: read
 7 | 
 8 | jobs:
 9 |   test:
10 |     runs-on: ubuntu-latest
11 |     strategy:
12 |       matrix:
13 |         python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
14 |     steps:
15 |     - uses: actions/checkout@v4
16 |     - name: Set up Python ${{ matrix.python-version }}
17 |       uses: actions/setup-python@v5
18 |       with:
19 |         python-version: ${{ matrix.python-version }}
20 |         cache: pip
21 |         cache-dependency-path: setup.py
22 |     - name: Install dependencies
23 |       run: |
24 |         pip install '.[test]'
25 |     - name: Run tests
26 |       run: |
27 |         pytest
28 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .venv
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | venv
 6 | .eggs
 7 | .pytest_cache
 8 | *.egg-info
 9 | .DS_Store
10 | .schema
11 | .vscode
12 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.12-alpine
2 | RUN pip install csv-diff
3 | WORKDIR /files
4 | ENTRYPOINT ["csv-diff"]
5 | CMD ["--help"]
6 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # csv-diff
  2 | 
  3 | [![PyPI](https://img.shields.io/pypi/v/csv-diff.svg)](https://pypi.org/project/csv-diff/)
  4 | [![Changelog](https://img.shields.io/github/v/release/simonw/csv-diff?include_prereleases&label=changelog)](https://github.com/simonw/csv-diff/releases)
  5 | [![Tests](https://github.com/simonw/csv-diff/workflows/Test/badge.svg)](https://github.com/simonw/csv-diff/actions?query=workflow%3ATest)
  6 | [![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://github.com/simonw/csv-diff/blob/main/LICENSE)
  7 | 
  8 | Tool for viewing the difference between two CSV, TSV or JSON files. See [Generating a commit log for San Francisco’s official list of trees](https://simonwillison.net/2019/Mar/13/tree-history/) (and the [sf-tree-history repo commit log](https://github.com/simonw/sf-tree-history/commits)) for background information on this project.
  9 | 
 10 | ## Installation
 11 | 
 12 |     pip install csv-diff
 13 | 
 14 | ## Usage
 15 | 
 16 | Consider two CSV files:
 17 | 
 18 | `one.csv`
 19 | 
 20 |     id,name,age
 21 |     1,Cleo,4
 22 |     2,Pancakes,2
 23 | 
 24 | `two.csv`
 25 | 
 26 |     id,name,age
 27 |     1,Cleo,5
 28 |     3,Bailey,1
 29 | 
 30 | `csv-diff` can show a human-readable summary of differences between the files:
 31 | 
 32 |     $ csv-diff one.csv two.csv --key=id
 33 |     1 row changed, 1 row added, 1 row removed
 34 | 
 35 |     1 row changed
 36 | 
 37 |       Row 1
 38 |         age: "4" => "5"
 39 | 
 40 |     1 row added
 41 | 
 42 |       id: 3
 43 |       name: Bailey
 44 |       age: 1
 45 | 
 46 |     1 row removed
 47 | 
 48 |       id: 2
 49 |       name: Pancakes
 50 |       age: 2
 51 | 
 52 | The `--key=id` option means that the `id` column should be treated as the unique key, to identify which records have changed.
 53 | 
 54 | The tool will automatically detect if your files are comma- or tab-separated. You can over-ride this automatic detection and force the tool to use a specific format using `--format=tsv` or `--format=csv`.
 55 | 
 56 | You can also feed it JSON files, provided they are a JSON array of objects where each object has the same keys. Use `--format=json` if your input files are JSON.
 57 | 
 58 | Use `--show-unchanged` to include full details of the unchanged values for rows with at least one change in the diff output:
 59 | 
 60 |     % csv-diff one.csv two.csv --key=id --show-unchanged
 61 |     1 row changed
 62 | 
 63 |       id: 1
 64 |         age: "4" => "5"
 65 | 
 66 |         Unchanged:
 67 |           name: "Cleo"
 68 | 
 69 | ### JSON output
 70 | 
 71 | You can use the `--json` option to get a machine-readable difference:
 72 | 
 73 |     $ csv-diff one.csv two.csv --key=id --json
 74 |     {
 75 |         "added": [
 76 |             {
 77 |                 "id": "3",
 78 |                 "name": "Bailey",
 79 |                 "age": "1"
 80 |             }
 81 |         ],
 82 |         "removed": [
 83 |             {
 84 |                 "id": "2",
 85 |                 "name": "Pancakes",
 86 |                 "age": "2"
 87 |             }
 88 |         ],
 89 |         "changed": [
 90 |             {
 91 |                 "key": "1",
 92 |                 "changes": {
 93 |                     "age": [
 94 |                         "4",
 95 |                         "5"
 96 |                     ]
 97 |                 }
 98 |             }
 99 |         ],
100 |         "columns_added": [],
101 |         "columns_removed": []
102 |     }
103 | 
104 | ### Adding templated extras
105 | 
106 | You can specify additional keys to be displayed in the human-readable format using the `--extra` option:
107 | 
108 |     --extra name "Python format string with {id} for variables"
109 | 
110 | For example, to output a link to `https://news.ycombinator.com/latest?id={id}` for each item with an ID, you could use this:
111 | 
112 | ```bash
113 | csv-diff one.csv two.csv --key=id \
114 |   --extra latest "https://news.ycombinator.com/latest?id={id}"
115 | ```
116 | These extras display something like this:
117 | ```
118 | 1 row changed
119 | 
120 |   id: 41459472
121 |     points: "24" => "25"
122 |     numComments: "5" => "6"
123 |   extras:
124 |     latest: https://news.ycombinator.com/latest?id=41459472
125 | ```
126 | 
127 | ## As a Python library
128 | 
129 | You can also import the Python library into your own code like so:
130 | 
131 |     from csv_diff import load_csv, compare
132 |     diff = compare(
133 |         load_csv(open("one.csv"), key="id"),
134 |         load_csv(open("two.csv"), key="id")
135 |     )
136 | 
137 | `diff` will now contain the same data structure as the output in the `--json` example above.
138 | 
139 | If the columns in the CSV have changed, those added or removed columns will be ignored when calculating changes made to specific rows.
140 | 
141 | ## As a Docker container
142 | 
143 | ### Build the image
144 | 
145 |     $ docker build -t csvdiff .
146 | 
147 | ### Run the container
148 | 
149 |     $ docker run --rm -v $(pwd):/files csvdiff
150 | 
151 | Suppose current directory contains two csv files : one.csv two.csv
152 | 
153 |     $ docker run --rm -v $(pwd):/files csvdiff one.csv two.csv
154 |     
155 | ## Alternatives
156 | 
157 | - [csvdiff](https://github.com/aswinkarthik/csvdiff) is a "fast diff tool for comparing CSV files" - you may get better results from this than from `csv-diff` against larger files.
158 | 


--------------------------------------------------------------------------------
/csv_diff/__init__.py:
--------------------------------------------------------------------------------
  1 | import csv
  2 | from dictdiffer import diff
  3 | import json
  4 | import hashlib
  5 | 
  6 | 
  7 | def load_csv(fp, key=None, dialect=None):
  8 |     if dialect is None and fp.seekable():
  9 |         # Peek at first 1MB to sniff the delimiter and other dialect details
 10 |         peek = fp.read(1024**2)
 11 |         fp.seek(0)
 12 |         try:
 13 |             dialect = csv.Sniffer().sniff(peek, delimiters=",\t;")
 14 |         except csv.Error:
 15 |             # Oh well, we tried. Fallback to the default.
 16 |             pass
 17 |     fp = csv.reader(fp, dialect=(dialect or "excel"))
 18 |     headings = next(fp)
 19 |     rows = [dict(zip(headings, line)) for line in fp]
 20 |     if key:
 21 |         keyfn = lambda r: r[key]
 22 |     else:
 23 |         keyfn = lambda r: hashlib.sha1(
 24 |             json.dumps(r, sort_keys=True).encode("utf8")
 25 |         ).hexdigest()
 26 |     return {keyfn(r): r for r in rows}
 27 | 
 28 | 
 29 | def load_json(fp, key=None):
 30 |     raw_list = json.load(fp)
 31 |     assert isinstance(raw_list, list)
 32 |     common_keys = set()
 33 |     for item in raw_list:
 34 |         common_keys.update(item.keys())
 35 |     if key:
 36 |         keyfn = lambda r: r[key]
 37 |     else:
 38 |         keyfn = lambda r: hashlib.sha1(
 39 |             json.dumps(r, sort_keys=True).encode("utf8")
 40 |         ).hexdigest()
 41 |     return {keyfn(r): _simplify_json_row(r, common_keys) for r in raw_list}
 42 | 
 43 | 
 44 | def _simplify_json_row(r, common_keys):
 45 |     # Convert list/dict values into JSON serialized strings
 46 |     for key, value in r.items():
 47 |         if isinstance(value, (dict, tuple, list)):
 48 |             r[key] = json.dumps(value)
 49 |     for key in common_keys:
 50 |         if key not in r:
 51 |             r[key] = None
 52 |     return r
 53 | 
 54 | 
 55 | def compare(previous, current, show_unchanged=False):
 56 |     result = {
 57 |         "added": [],
 58 |         "removed": [],
 59 |         "changed": [],
 60 |         "columns_added": [],
 61 |         "columns_removed": [],
 62 |     }
 63 |     # Have the columns changed?
 64 |     previous_columns = set(next(iter(previous.values())).keys())
 65 |     current_columns = set(next(iter(current.values())).keys())
 66 |     ignore_columns = None
 67 |     if previous_columns != current_columns:
 68 |         result["columns_added"] = [
 69 |             c for c in current_columns if c not in previous_columns
 70 |         ]
 71 |         result["columns_removed"] = [
 72 |             c for c in previous_columns if c not in current_columns
 73 |         ]
 74 |         ignore_columns = current_columns.symmetric_difference(previous_columns)
 75 |     # Have any rows been removed or added?
 76 |     removed = [id for id in previous if id not in current]
 77 |     added = [id for id in current if id not in previous]
 78 |     # How about changed?
 79 |     removed_or_added = set(removed) | set(added)
 80 |     potential_changes = [id for id in current if id not in removed_or_added]
 81 |     changed = [id for id in potential_changes if current[id] != previous[id]]
 82 |     if added:
 83 |         result["added"] = [current[id] for id in added]
 84 |     if removed:
 85 |         result["removed"] = [previous[id] for id in removed]
 86 |     if changed:
 87 |         for id in changed:
 88 |             diffs = list(diff(previous[id], current[id], ignore=ignore_columns))
 89 |             if diffs:
 90 |                 changes = {
 91 |                     "key": id,
 92 |                     "changes": {
 93 |                         # field can be a list if id contained '.' - #7
 94 |                         field[0] if isinstance(field, list) else field: [
 95 |                             prev_value,
 96 |                             current_value,
 97 |                         ]
 98 |                         for _, field, (prev_value, current_value) in diffs
 99 |                     },
100 |                 }
101 |                 if show_unchanged:
102 |                     changes["unchanged"] = {
103 |                         field: value
104 |                         for field, value in previous[id].items()
105 |                         if field not in changes["changes"] and field != "id"
106 |                     }
107 |                 result["changed"].append(changes)
108 |     return result
109 | 
110 | 
111 | def human_text(result, key=None, singular=None, plural=None, current=None, extras=None):
112 |     singular = singular or "row"
113 |     plural = plural or "rows"
114 |     title = []
115 |     summary = []
116 |     show_headers = sum(1 for key in result if result[key]) > 1
117 |     if result["columns_added"]:
118 |         fragment = "{} {} added".format(
119 |             len(result["columns_added"]),
120 |             "column" if len(result["columns_added"]) == 1 else "columns",
121 |         )
122 |         title.append(fragment)
123 |         summary.extend(
124 |             [fragment, ""]
125 |             + ["  {}".format(c) for c in sorted(result["columns_added"])]
126 |             + [""]
127 |         )
128 |     if result["columns_removed"]:
129 |         fragment = "{} {} removed".format(
130 |             len(result["columns_removed"]),
131 |             "column" if len(result["columns_removed"]) == 1 else "columns",
132 |         )
133 |         title.append(fragment)
134 |         summary.extend(
135 |             [fragment, ""]
136 |             + ["  {}".format(c) for c in sorted(result["columns_removed"])]
137 |             + [""]
138 |         )
139 |     if result["changed"]:
140 |         fragment = "{} {} changed".format(
141 |             len(result["changed"]), singular if len(result["changed"]) == 1 else plural
142 |         )
143 |         title.append(fragment)
144 |         if show_headers:
145 |             summary.append(fragment + "\n")
146 |         change_blocks = []
147 |         for details in result["changed"]:
148 |             block = []
149 |             block.append("  {}: {}".format(key, details["key"]))
150 |             for field, (prev_value, current_value) in details["changes"].items():
151 |                 block.append(
152 |                     '    {}: "{}" => "{}"'.format(field, prev_value, current_value)
153 |                 )
154 |             if extras:
155 |                 current_item = current[details["key"]]
156 |                 block.append(human_extras(current_item, extras))
157 |             block.append("")
158 |             change_blocks.append("\n".join(block))
159 |             if details.get("unchanged"):
160 |                 block = []
161 |                 block.append("    Unchanged:")
162 |                 for field, value in details["unchanged"].items():
163 |                     block.append('      {}: "{}"'.format(field, value))
164 |                 block.append("")
165 |                 change_blocks.append("\n".join(block))
166 |         summary.append("\n".join(change_blocks))
167 |     if result["added"]:
168 |         fragment = "{} {} added".format(
169 |             len(result["added"]), singular if len(result["added"]) == 1 else plural
170 |         )
171 |         title.append(fragment)
172 |         if show_headers:
173 |             summary.append(fragment + "\n")
174 |         rows = []
175 |         for row in result["added"]:
176 |             to_append = human_row(row, prefix="  ")
177 |             if extras:
178 |                 to_append += "\n" + human_extras(row, extras)
179 |             rows.append(to_append)
180 |         summary.append("\n\n".join(rows))
181 |         summary.append("")
182 |     if result["removed"]:
183 |         fragment = "{} {} removed".format(
184 |             len(result["removed"]), singular if len(result["removed"]) == 1 else plural
185 |         )
186 |         title.append(fragment)
187 |         if show_headers:
188 |             summary.append(fragment + "\n")
189 |         rows = []
190 |         for row in result["removed"]:
191 |             to_append = human_row(row, prefix="  ")
192 |             if extras:
193 |                 to_append += "\n" + human_extras(row, extras)
194 |             rows.append(to_append)
195 |         summary.append("\n\n".join(rows))
196 |         summary.append("")
197 |     return (", ".join(title) + "\n\n" + ("\n".join(summary))).strip()
198 | 
199 | 
200 | def human_row(row, prefix=""):
201 |     bits = []
202 |     for key, value in row.items():
203 |         bits.append("{}{}: {}".format(prefix, key, value))
204 |     return "\n".join(bits)
205 | 
206 | 
207 | def human_extras(row, extras):
208 |     bits = []
209 |     bits.append("  extras:")
210 |     for key, fmt in extras:
211 |         bits.append("    {}: {}".format(key, fmt.format(**row)))
212 |     return "\n".join(bits)
213 | 


--------------------------------------------------------------------------------
/csv_diff/cli.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | import json as std_json
 3 | from . import load_csv, load_json, compare, human_text
 4 | 
 5 | 
 6 | @click.command()
 7 | @click.version_option()
 8 | @click.argument(
 9 |     "previous",
10 |     type=click.Path(exists=True, file_okay=True, dir_okay=False, allow_dash=False),
11 | )
12 | @click.argument(
13 |     "current",
14 |     type=click.Path(exists=True, file_okay=True, dir_okay=False, allow_dash=False),
15 | )
16 | @click.option(
17 |     "--key", type=str, default=None, help="Column to use as a unique ID for each row"
18 | )
19 | @click.option(
20 |     "--format",
21 |     type=click.Choice(["csv", "tsv", "json"]),
22 |     default=None,
23 |     help="Explicitly specify input format (csv, tsv, json) instead of auto-detecting",
24 | )
25 | @click.option(
26 |     "--json", type=bool, default=False, help="Output changes as JSON", is_flag=True
27 | )
28 | @click.option(
29 |     "--singular",
30 |     type=str,
31 |     default=None,
32 |     help="Singular word to use, e.g. 'tree' for '1 tree'",
33 | )
34 | @click.option(
35 |     "--plural",
36 |     type=str,
37 |     default=None,
38 |     help="Plural word to use, e.g. 'trees' for '2 trees'",
39 | )
40 | @click.option(
41 |     "--show-unchanged",
42 |     is_flag=True,
43 |     help="Show unchanged fields for rows with at least one change",
44 | )
45 | @click.option(
46 |     "extras",
47 |     "--extra",
48 |     type=(str, str),
49 |     multiple=True,
50 |     help="key: format string - define extra fields to display",
51 | )
52 | def cli(previous, current, key, format, json, singular, plural, show_unchanged, extras):
53 |     "Diff two CSV or JSON files"
54 |     dialect = {
55 |         "csv": "excel",
56 |         "tsv": "excel-tab",
57 |     }
58 | 
59 |     if extras and json:
60 |         raise click.UsageError(
61 |             "Extra fields are not supported in JSON output mode",
62 |             ctx=click.get_current_context(),
63 |         )
64 | 
65 |     def load(filename):
66 |         if format == "json":
67 |             return load_json(open(filename), key=key)
68 |         else:
69 |             return load_csv(
70 |                 open(filename, newline=""), key=key, dialect=dialect.get(format)
71 |             )
72 | 
73 |     previous_data = load(previous)
74 |     current_data = load(current)
75 | 
76 |     diff = compare(previous_data, current_data, show_unchanged)
77 |     if json:
78 |         print(std_json.dumps(diff, indent=4))
79 |     else:
80 |         print(
81 |             human_text(diff, key, singular, plural, current=current_data, extras=extras)
82 |         )
83 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | import io
 3 | import os
 4 | 
 5 | VERSION = "1.2"
 6 | 
 7 | 
 8 | def get_long_description():
 9 |     with io.open(
10 |         os.path.join(os.path.dirname(os.path.abspath(__file__)), "README.md"),
11 |         encoding="utf8",
12 |     ) as fp:
13 |         return fp.read()
14 | 
15 | 
16 | setup(
17 |     name="csv-diff",
18 |     description="Python CLI tool and library for diffing CSV and JSON files",
19 |     long_description=get_long_description(),
20 |     long_description_content_type="text/markdown",
21 |     author="Simon Willison",
22 |     version=VERSION,
23 |     license="Apache License, Version 2.0",
24 |     packages=find_packages(),
25 |     install_requires=["click", "dictdiffer"],
26 |     setup_requires=["pytest-runner"],
27 |     extras_require={"test": ["pytest"]},
28 |     entry_points="""
29 |         [console_scripts]
30 |         csv-diff=csv_diff.cli:cli
31 |     """,
32 |     tests_require=["csv-diff[test]"],
33 |     url="https://github.com/simonw/csv-diff",
34 |     classifiers=[
35 |         "Development Status :: 4 - Beta",
36 |         "Intended Audience :: Developers",
37 |         "Intended Audience :: Science/Research",
38 |         "Intended Audience :: End Users/Desktop",
39 |         "License :: OSI Approved :: Apache Software License",
40 |         "Programming Language :: Python :: 3.6",
41 |         "Programming Language :: Python :: 3.7",
42 |     ],
43 | )
44 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/simonw/csv-diff/26903b74eefcd65be761810f51b0e55c033bde66/tests/__init__.py


--------------------------------------------------------------------------------
/tests/test_cli.py:
--------------------------------------------------------------------------------
  1 | from click.testing import CliRunner
  2 | from csv_diff import cli, load_csv
  3 | import csv
  4 | import pytest
  5 | from .test_csv_diff import ONE, ONE_TSV, TWO, TWO_TSV, THREE, FIVE
  6 | import io
  7 | import json
  8 | from textwrap import dedent
  9 | 
 10 | 
 11 | @pytest.fixture
 12 | def tsv_files(tmpdir):
 13 |     one = tmpdir / "one.tsv"
 14 |     one.write(ONE_TSV)
 15 |     two = tmpdir / "two.tsv"
 16 |     two.write(TWO_TSV)
 17 |     return str(one), str(two)
 18 | 
 19 | 
 20 | @pytest.fixture
 21 | def json_files(tmpdir):
 22 |     one = tmpdir / "one.json"
 23 |     one.write(
 24 |         json.dumps(
 25 |             [
 26 |                 {"id": 1, "name": "Cleo", "nested": {"foo": 3}, "extra": 1},
 27 |                 {"id": 2, "name": "Pancakes", "nested": {"foo": 3}},
 28 |             ]
 29 |         )
 30 |     )
 31 |     two = tmpdir / "two.json"
 32 |     two.write(
 33 |         json.dumps(
 34 |             [
 35 |                 {"id": 1, "name": "Cleo", "nested": {"foo": 3, "bar": 5}, "extra": 1},
 36 |                 {"id": 2, "name": "Pancakes!", "nested": {"foo": 3}, "extra": 1},
 37 |             ]
 38 |         )
 39 |     )
 40 |     return str(one), str(two)
 41 | 
 42 | 
 43 | def test_human_cli(tmpdir):
 44 |     one = tmpdir / "one.csv"
 45 |     one.write(ONE)
 46 |     two = tmpdir / "two.csv"
 47 |     two.write(TWO)
 48 |     result = CliRunner().invoke(cli.cli, [str(one), str(two), "--key", "id"])
 49 |     assert 0 == result.exit_code
 50 |     assert (
 51 |         dedent(
 52 |             """
 53 |     1 row changed
 54 | 
 55 |       id: 1
 56 |         age: "4" => "5"
 57 |     """
 58 |         ).strip()
 59 |         == result.output.strip()
 60 |     )
 61 | 
 62 | 
 63 | def test_human_cli_alternative_names(tmpdir):
 64 |     one = tmpdir / "one.csv"
 65 |     one.write(ONE)
 66 |     five = tmpdir / "five.csv"
 67 |     five.write(FIVE)
 68 |     result = CliRunner().invoke(
 69 |         cli.cli,
 70 |         [str(one), str(five), "--key", "id", "--singular", "tree", "--plural", "trees"],
 71 |     )
 72 |     assert 0 == result.exit_code, result.output
 73 |     assert (
 74 |         dedent(
 75 |             """
 76 |     1 tree changed, 2 trees added
 77 | 
 78 |     1 tree changed
 79 | 
 80 |       id: 1
 81 |         age: "4" => "5"
 82 | 
 83 |     2 trees added
 84 | 
 85 |       id: 3
 86 |       name: Bailey
 87 |       age: 1
 88 | 
 89 |       id: 4
 90 |       name: Carl
 91 |       age: 7
 92 |     """
 93 |         ).strip()
 94 |         == result.output.strip()
 95 |     )
 96 | 
 97 | 
 98 | def test_human_cli_json(tmpdir):
 99 |     one = tmpdir / "one.csv"
100 |     one.write(ONE)
101 |     two = tmpdir / "two.csv"
102 |     two.write(TWO)
103 |     result = CliRunner().invoke(cli.cli, [str(one), str(two), "--key", "id", "--json"])
104 |     assert 0 == result.exit_code
105 |     assert {
106 |         "added": [],
107 |         "removed": [],
108 |         "changed": [{"key": "1", "changes": {"age": ["4", "5"]}}],
109 |         "columns_added": [],
110 |         "columns_removed": [],
111 |     } == json.loads(result.output.strip())
112 | 
113 | 
114 | def test_tsv_files(tsv_files):
115 |     one, two = tsv_files
116 |     result = CliRunner().invoke(
117 |         cli.cli, [one, two, "--key", "id", "--json", "--format", "tsv"]
118 |     )
119 |     assert 0 == result.exit_code
120 |     assert {
121 |         "added": [],
122 |         "removed": [],
123 |         "changed": [{"key": "1", "changes": {"age": ["4", "5"]}}],
124 |         "columns_added": [],
125 |         "columns_removed": [],
126 |     } == json.loads(result.output.strip())
127 | 
128 | 
129 | def test_json_files(json_files):
130 |     one, two = json_files
131 |     result = CliRunner().invoke(
132 |         cli.cli,
133 |         [one, two, "--key", "id", "--json", "--format", "json"],
134 |         catch_exceptions=False,
135 |     )
136 |     assert 0 == result.exit_code
137 |     assert {
138 |         "added": [],
139 |         "removed": [],
140 |         "changed": [
141 |             {"key": 1, "changes": {"nested": ['{"foo": 3}', '{"foo": 3, "bar": 5}']}},
142 |             {
143 |                 "key": 2,
144 |                 "changes": {"name": ["Pancakes", "Pancakes!"], "extra": [None, 1]},
145 |             },
146 |         ],
147 |         "columns_added": [],
148 |         "columns_removed": [],
149 |     } == json.loads(result.output.strip())
150 | 
151 | 
152 | def test_sniff_format(tsv_files):
153 |     one, two = tsv_files
154 |     result = CliRunner().invoke(cli.cli, [one, two, "--key", "id", "--json"])
155 |     assert 0 == result.exit_code
156 |     assert {
157 |         "added": [],
158 |         "removed": [],
159 |         "changed": [{"key": "1", "changes": {"age": ["4", "5"]}}],
160 |         "columns_added": [],
161 |         "columns_removed": [],
162 |     } == json.loads(result.output.strip())
163 | 
164 | 
165 | def test_format_overrides_sniff(tsv_files):
166 |     one, two = tsv_files
167 |     result = CliRunner().invoke(
168 |         cli.cli, [one, two, "--key", "id", "--json", "--format", "csv"]
169 |     )
170 |     assert 1 == result.exit_code
171 | 
172 | 
173 | def test_column_containing_dot(tmpdir):
174 |     # https://github.com/simonw/csv-diff/issues/7
175 |     one = tmpdir / "one.csv"
176 |     two = tmpdir / "two.csv"
177 |     one.write(
178 |         dedent(
179 |             """
180 |     id,foo.bar,foo.baz
181 |     1,Dog,Cat
182 |     """
183 |         ).strip()
184 |     )
185 |     two.write(
186 |         dedent(
187 |             """
188 |     id,foo.bar,foo.baz
189 |     1,Dog,Beaver
190 |     """
191 |         ).strip()
192 |     )
193 |     result = CliRunner().invoke(
194 |         cli.cli, [str(one), str(two), "--key", "id", "--json"], catch_exceptions=False
195 |     )
196 |     assert 0 == result.exit_code
197 |     assert {
198 |         "added": [],
199 |         "removed": [],
200 |         "changed": [{"key": "1", "changes": {"foo.baz": ["Cat", "Beaver"]}}],
201 |         "columns_added": [],
202 |         "columns_removed": [],
203 |     } == json.loads(result.output.strip())
204 | 
205 | 
206 | def test_semicolon_delimited(tmpdir):
207 |     # https://github.com/simonw/csv-diff/issues/6
208 |     one = tmpdir / "one.csv"
209 |     two = tmpdir / "two.csv"
210 |     one.write(
211 |         dedent(
212 |             """
213 |     id;name
214 |     1;Mark
215 |     """
216 |         ).strip()
217 |     )
218 |     two.write(
219 |         dedent(
220 |             """
221 |     id;name
222 |     1;Brian
223 |     """
224 |         ).strip()
225 |     )
226 |     result = CliRunner().invoke(
227 |         cli.cli, [str(one), str(two), "--key", "id", "--json"], catch_exceptions=False
228 |     )
229 |     assert 0 == result.exit_code
230 |     assert {
231 |         "added": [],
232 |         "removed": [],
233 |         "changed": [{"key": "1", "changes": {"name": ["Mark", "Brian"]}}],
234 |         "columns_added": [],
235 |         "columns_removed": [],
236 |     } == json.loads(result.output.strip())
237 | 
238 | 
239 | def test_diff_with_extras(tmpdir):
240 |     one = tmpdir / "one.json"
241 |     two = tmpdir / "two.json"
242 |     one.write(
243 |         json.dumps(
244 |             [
245 |                 {"id": 1, "name": "Cleo", "type": "dog"},
246 |                 {"id": 2, "name": "Suna", "type": "chicken"},
247 |             ]
248 |         )
249 |     )
250 |     two.write(
251 |         json.dumps(
252 |             [
253 |                 {"id": 2, "name": "Suna", "type": "pretty chicken"},
254 |                 {"id": 3, "name": "Artie", "type": "bunny"},
255 |             ]
256 |         )
257 |     )
258 |     result = CliRunner().invoke(
259 |         cli.cli,
260 |         [
261 |             str(one),
262 |             str(two),
263 |             "--key",
264 |             "id",
265 |             "--format",
266 |             "json",
267 |             "--extra",
268 |             "search",
269 |             "https://www.google.com/search?q={name}",
270 |         ],
271 |         catch_exceptions=False,
272 |     )
273 |     assert result.exit_code == 0
274 |     expected = dedent(
275 |         """
276 |     1 row changed, 1 row added, 1 row removed
277 | 
278 |     1 row changed
279 | 
280 |       id: 2
281 |         type: "chicken" => "pretty chicken"
282 |       extras:
283 |         search: https://www.google.com/search?q=Suna
284 | 
285 |     1 row added
286 | 
287 |       id: 3
288 |       name: Artie
289 |       type: bunny
290 |       extras:
291 |         search: https://www.google.com/search?q=Artie
292 | 
293 |     1 row removed
294 | 
295 |       id: 1
296 |       name: Cleo
297 |       type: dog
298 |       extras:
299 |         search: https://www.google.com/search?q=Cleo
300 |     """
301 |     ).strip()
302 |     assert result.output.strip() == expected
303 | 


--------------------------------------------------------------------------------
/tests/test_csv_diff.py:
--------------------------------------------------------------------------------
  1 | from csv_diff import load_csv, compare
  2 | import io
  3 | 
  4 | ONE = """id,name,age
  5 | 1,Cleo,4
  6 | 2,Pancakes,2"""
  7 | 
  8 | ONE_TSV = """id\tname\tage
  9 | 1\tCleo\t4
 10 | 2\tPancakes\t2"""
 11 | 
 12 | TWO = """id,name,age
 13 | 1,Cleo,5
 14 | 2,Pancakes,2"""
 15 | 
 16 | TWO_TSV = """id\tname\tage
 17 | 1\tCleo\t5
 18 | 2\tPancakes\t2"""
 19 | 
 20 | THREE = """id,name,age
 21 | 1,Cleo,5"""
 22 | 
 23 | FOUR = """id,name,age
 24 | 1,Cleo,5
 25 | 2,Pancakes,2,
 26 | 3,Bailey,1"""
 27 | 
 28 | FIVE = """id,name,age
 29 | 1,Cleo,5
 30 | 2,Pancakes,2,
 31 | 3,Bailey,1
 32 | 4,Carl,7"""
 33 | 
 34 | SIX = """id,name,age
 35 | 1,Cleo,5
 36 | 3,Bailey,1"""
 37 | 
 38 | SEVEN = """id,name,weight
 39 | 1,Cleo,48
 40 | 3,Bailey,20"""
 41 | 
 42 | EIGHT = """id,name,age,length
 43 | 3,Bailee,1,100
 44 | 4,Bob,7,422"""
 45 | 
 46 | NINE = """id,name,age
 47 | 1,Cleo,5
 48 | 2,Pancakes,4"""
 49 | 
 50 | TEN = """id,name,age
 51 | 1,Cleo,5
 52 | 2,Pancakes,3"""
 53 | 
 54 | 
 55 | def test_row_changed():
 56 |     diff = compare(
 57 |         load_csv(io.StringIO(ONE), key="id"), load_csv(io.StringIO(TWO), key="id")
 58 |     )
 59 |     assert {
 60 |         "added": [],
 61 |         "removed": [],
 62 |         "changed": [{"key": "1", "changes": {"age": ["4", "5"]}}],
 63 |         "columns_added": [],
 64 |         "columns_removed": [],
 65 |     } == diff
 66 | 
 67 | 
 68 | def test_row_added():
 69 |     diff = compare(
 70 |         load_csv(io.StringIO(THREE), key="id"), load_csv(io.StringIO(TWO), key="id")
 71 |     )
 72 |     assert {
 73 |         "changed": [],
 74 |         "removed": [],
 75 |         "added": [{"age": "2", "id": "2", "name": "Pancakes"}],
 76 |         "columns_added": [],
 77 |         "columns_removed": [],
 78 |     } == diff
 79 | 
 80 | 
 81 | def test_row_removed():
 82 |     diff = compare(
 83 |         load_csv(io.StringIO(TWO), key="id"), load_csv(io.StringIO(THREE), key="id")
 84 |     )
 85 |     assert {
 86 |         "changed": [],
 87 |         "removed": [{"age": "2", "id": "2", "name": "Pancakes"}],
 88 |         "added": [],
 89 |         "columns_added": [],
 90 |         "columns_removed": [],
 91 |     } == diff
 92 | 
 93 | 
 94 | def test_columns_changed():
 95 |     diff = compare(
 96 |         load_csv(io.StringIO(SIX), key="id"), load_csv(io.StringIO(SEVEN), key="id")
 97 |     )
 98 |     assert {
 99 |         "changed": [],
100 |         "removed": [],
101 |         "added": [],
102 |         "columns_added": ["weight"],
103 |         "columns_removed": ["age"],
104 |     } == diff
105 | 
106 | 
107 | def test_tsv():
108 |     diff = compare(
109 |         load_csv(io.StringIO(ONE), key="id"), load_csv(io.StringIO(TWO_TSV), key="id")
110 |     )
111 |     assert {
112 |         "added": [],
113 |         "removed": [],
114 |         "changed": [{"key": "1", "changes": {"age": ["4", "5"]}}],
115 |         "columns_added": [],
116 |         "columns_removed": [],
117 |     } == diff
118 | 


--------------------------------------------------------------------------------
/tests/test_human_text.py:
--------------------------------------------------------------------------------
  1 | from csv_diff import load_csv, compare, human_text
  2 | from .test_csv_diff import ONE, TWO, THREE, FOUR, FIVE, SIX, SEVEN, EIGHT, NINE, TEN
  3 | from textwrap import dedent
  4 | import io
  5 | 
  6 | 
  7 | def test_row_changed():
  8 |     diff = compare(
  9 |         load_csv(io.StringIO(ONE), key="id"), load_csv(io.StringIO(TWO), key="id")
 10 |     )
 11 |     assert (
 12 |         dedent(
 13 |             """
 14 |     1 row changed
 15 | 
 16 |       id: 1
 17 |         age: "4" => "5"
 18 |     """
 19 |         ).strip()
 20 |         == human_text(diff, "id")
 21 |     )
 22 | 
 23 | 
 24 | def test_row_changed_show_unchanged():
 25 |     diff = compare(
 26 |         load_csv(io.StringIO(ONE), key="id"),
 27 |         load_csv(io.StringIO(TWO), key="id"),
 28 |         show_unchanged=True,
 29 |     )
 30 |     assert (
 31 |         dedent(
 32 |             """
 33 |     1 row changed
 34 | 
 35 |       id: 1
 36 |         age: "4" => "5"
 37 | 
 38 |         Unchanged:
 39 |           name: "Cleo"
 40 |     """
 41 |         ).strip()
 42 |         == human_text(diff, "id")
 43 |     )
 44 | 
 45 | 
 46 | def test_row_added():
 47 |     diff = compare(
 48 |         load_csv(io.StringIO(THREE), key="id"), load_csv(io.StringIO(TWO), key="id")
 49 |     )
 50 |     assert (
 51 |         dedent(
 52 |             """
 53 |     1 row added
 54 | 
 55 |       id: 2
 56 |       name: Pancakes
 57 |       age: 2
 58 |     """
 59 |         ).strip()
 60 |         == human_text(diff, "id")
 61 |     )
 62 | 
 63 | 
 64 | def test_rows_added():
 65 |     diff = compare(
 66 |         load_csv(io.StringIO(THREE), key="id"), load_csv(io.StringIO(FIVE), key="id")
 67 |     )
 68 |     assert (
 69 |         dedent(
 70 |             """
 71 |     3 rows added
 72 | 
 73 |       id: 2
 74 |       name: Pancakes
 75 |       age: 2
 76 | 
 77 |       id: 3
 78 |       name: Bailey
 79 |       age: 1
 80 | 
 81 |       id: 4
 82 |       name: Carl
 83 |       age: 7
 84 |     """
 85 |         ).strip()
 86 |         == human_text(diff, "id")
 87 |     )
 88 | 
 89 | 
 90 | def test_row_removed():
 91 |     diff = compare(
 92 |         load_csv(io.StringIO(TWO), key="id"), load_csv(io.StringIO(THREE), key="id")
 93 |     )
 94 |     assert (
 95 |         dedent(
 96 |             """
 97 |     1 row removed
 98 | 
 99 |       id: 2
100 |       name: Pancakes
101 |       age: 2
102 |     """
103 |         ).strip()
104 |         == human_text(diff, "id")
105 |     )
106 | 
107 | 
108 | def test_row_changed_and_row_added_and_row_deleted():
109 |     "Should have headers for each section here"
110 |     diff = compare(
111 |         load_csv(io.StringIO(ONE), key="id"), load_csv(io.StringIO(SIX), key="id")
112 |     )
113 |     assert (
114 |         dedent(
115 |             """
116 |     1 row changed, 1 row added, 1 row removed
117 | 
118 |     1 row changed
119 | 
120 |       id: 1
121 |         age: "4" => "5"
122 | 
123 |     1 row added
124 | 
125 |       id: 3
126 |       name: Bailey
127 |       age: 1
128 | 
129 |     1 row removed
130 | 
131 |       id: 2
132 |       name: Pancakes
133 |       age: 2
134 |     """
135 |         ).strip()
136 |         == human_text(diff, "id")
137 |     )
138 | 
139 | 
140 | def test_columns_changed():
141 |     diff = compare(
142 |         load_csv(io.StringIO(SIX), key="id"), load_csv(io.StringIO(SEVEN), key="id")
143 |     )
144 |     assert (
145 |         dedent(
146 |             """
147 |     1 column added, 1 column removed
148 | 
149 |     1 column added
150 | 
151 |       weight
152 | 
153 |     1 column removed
154 | 
155 |       age
156 |     """
157 |         ).strip()
158 |         == human_text(diff, "id")
159 |     )
160 | 
161 | 
162 | def test_columns_and_rows_changed():
163 |     diff = compare(
164 |         load_csv(io.StringIO(SEVEN), key="id"), load_csv(io.StringIO(EIGHT), key="id")
165 |     )
166 |     assert (
167 |         dedent(
168 |             """
169 |     2 columns added, 1 column removed, 1 row changed, 1 row added, 1 row removed
170 | 
171 |     2 columns added
172 | 
173 |       age
174 |       length
175 | 
176 |     1 column removed
177 | 
178 |       weight
179 | 
180 |     1 row changed
181 | 
182 |       id: 3
183 |         name: "Bailey" => "Bailee"
184 | 
185 |     1 row added
186 | 
187 |       id: 4
188 |       name: Bob
189 |       age: 7
190 |       length: 422
191 | 
192 |     1 row removed
193 | 
194 |       id: 1
195 |       name: Cleo
196 |       weight: 48
197 |     """
198 |         ).strip()
199 |         == human_text(diff, "id")
200 |     )
201 | 
202 | 
203 | def test_no_key():
204 |     diff = compare(load_csv(io.StringIO(NINE)), load_csv(io.StringIO(TEN)))
205 |     assert (
206 |         dedent(
207 |             """
208 |         1 row added, 1 row removed
209 | 
210 |         1 row added
211 | 
212 |           id: 2
213 |           name: Pancakes
214 |           age: 3
215 | 
216 |         1 row removed
217 | 
218 |           id: 2
219 |           name: Pancakes
220 |           age: 4
221 |         """
222 |         ).strip()
223 |         == human_text(diff)
224 |     )
225 | 


--------------------------------------------------------------------------------