├── .bzrignore ├── .gitignore ├── .project ├── .pydevproject ├── .rsync-filter ├── .settings └── org.eclipse.core.resources.prefs ├── LICENSE ├── README.md ├── dedupe ├── __init__.py ├── allpairs.py ├── block.py ├── classification │ ├── __init__.py │ ├── distance.py │ ├── examples.py │ ├── kmeans.py │ ├── nearest.py │ └── rulebased.py ├── compat │ ├── __init__.py │ └── _ordereddict.py ├── csv.py ├── dale.py ├── dmetaphone.py ├── encode.py ├── geo.py ├── get.py ├── group.py ├── levenshtein.py ├── linkcsv.py └── sim.py ├── docs ├── Makefile ├── _static │ ├── brand.png │ ├── brand.svg │ ├── default.css │ ├── icon.png │ └── logo.png ├── _templates │ └── layout.html ├── conf.py ├── index.rst ├── install.rst ├── intro.rst ├── make.bat ├── modules.rst └── modules │ ├── block.csv │ ├── classification.rst │ ├── csv.rst │ ├── dale.rst │ ├── dmetaphone.rst │ ├── encode.rst │ ├── geo.rst │ ├── group.rst │ ├── levenshtein.rst │ ├── linkcsv.rst │ └── sim.rst ├── pydedupe.png ├── setup.cfg ├── setup.py └── tests ├── __init__.py └── test_linkcsv.py /.bzrignore: -------------------------------------------------------------------------------- 1 | pydedupe.egg-info 2 | docs/_build/* 3 | .coverage 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.egg-info 3 | /build 4 | /dist 5 | -------------------------------------------------------------------------------- /.project: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gpoulter/pydedupe/HEAD/.project -------------------------------------------------------------------------------- /.pydevproject: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gpoulter/pydedupe/HEAD/.pydevproject -------------------------------------------------------------------------------- /.rsync-filter: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gpoulter/pydedupe/HEAD/.rsync-filter -------------------------------------------------------------------------------- /.settings/org.eclipse.core.resources.prefs: -------------------------------------------------------------------------------- 1 | #Fri Jan 08 17:36:57 SAST 2010 2 | eclipse.preferences.version=1 3 | encoding//dedupe/excel.py=utf8 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gpoulter/pydedupe/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gpoulter/pydedupe/HEAD/README.md -------------------------------------------------------------------------------- /dedupe/__init__.py: -------------------------------------------------------------------------------- 1 | """Identify pairs of similar tuples""" 2 | -------------------------------------------------------------------------------- /dedupe/allpairs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gpoulter/pydedupe/HEAD/dedupe/allpairs.py -------------------------------------------------------------------------------- /dedupe/block.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gpoulter/pydedupe/HEAD/dedupe/block.py -------------------------------------------------------------------------------- /dedupe/classification/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gpoulter/pydedupe/HEAD/dedupe/classification/__init__.py -------------------------------------------------------------------------------- /dedupe/classification/distance.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gpoulter/pydedupe/HEAD/dedupe/classification/distance.py -------------------------------------------------------------------------------- /dedupe/classification/examples.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gpoulter/pydedupe/HEAD/dedupe/classification/examples.py -------------------------------------------------------------------------------- /dedupe/classification/kmeans.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gpoulter/pydedupe/HEAD/dedupe/classification/kmeans.py -------------------------------------------------------------------------------- /dedupe/classification/nearest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gpoulter/pydedupe/HEAD/dedupe/classification/nearest.py -------------------------------------------------------------------------------- /dedupe/classification/rulebased.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gpoulter/pydedupe/HEAD/dedupe/classification/rulebased.py -------------------------------------------------------------------------------- /dedupe/compat/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gpoulter/pydedupe/HEAD/dedupe/compat/__init__.py -------------------------------------------------------------------------------- /dedupe/compat/_ordereddict.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gpoulter/pydedupe/HEAD/dedupe/compat/_ordereddict.py -------------------------------------------------------------------------------- /dedupe/csv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gpoulter/pydedupe/HEAD/dedupe/csv.py -------------------------------------------------------------------------------- /dedupe/dale.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gpoulter/pydedupe/HEAD/dedupe/dale.py -------------------------------------------------------------------------------- /dedupe/dmetaphone.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gpoulter/pydedupe/HEAD/dedupe/dmetaphone.py -------------------------------------------------------------------------------- /dedupe/encode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gpoulter/pydedupe/HEAD/dedupe/encode.py -------------------------------------------------------------------------------- /dedupe/geo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gpoulter/pydedupe/HEAD/dedupe/geo.py -------------------------------------------------------------------------------- /dedupe/get.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gpoulter/pydedupe/HEAD/dedupe/get.py -------------------------------------------------------------------------------- /dedupe/group.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gpoulter/pydedupe/HEAD/dedupe/group.py -------------------------------------------------------------------------------- /dedupe/levenshtein.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gpoulter/pydedupe/HEAD/dedupe/levenshtein.py -------------------------------------------------------------------------------- /dedupe/linkcsv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gpoulter/pydedupe/HEAD/dedupe/linkcsv.py -------------------------------------------------------------------------------- /dedupe/sim.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gpoulter/pydedupe/HEAD/dedupe/sim.py -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gpoulter/pydedupe/HEAD/docs/Makefile -------------------------------------------------------------------------------- /docs/_static/brand.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gpoulter/pydedupe/HEAD/docs/_static/brand.png -------------------------------------------------------------------------------- /docs/_static/brand.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gpoulter/pydedupe/HEAD/docs/_static/brand.svg -------------------------------------------------------------------------------- /docs/_static/default.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gpoulter/pydedupe/HEAD/docs/_static/default.css -------------------------------------------------------------------------------- /docs/_static/icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gpoulter/pydedupe/HEAD/docs/_static/icon.png -------------------------------------------------------------------------------- /docs/_static/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gpoulter/pydedupe/HEAD/docs/_static/logo.png -------------------------------------------------------------------------------- /docs/_templates/layout.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gpoulter/pydedupe/HEAD/docs/_templates/layout.html -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gpoulter/pydedupe/HEAD/docs/conf.py -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gpoulter/pydedupe/HEAD/docs/index.rst -------------------------------------------------------------------------------- /docs/install.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gpoulter/pydedupe/HEAD/docs/install.rst -------------------------------------------------------------------------------- /docs/intro.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gpoulter/pydedupe/HEAD/docs/intro.rst -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gpoulter/pydedupe/HEAD/docs/make.bat -------------------------------------------------------------------------------- /docs/modules.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gpoulter/pydedupe/HEAD/docs/modules.rst -------------------------------------------------------------------------------- /docs/modules/block.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gpoulter/pydedupe/HEAD/docs/modules/block.csv -------------------------------------------------------------------------------- /docs/modules/classification.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gpoulter/pydedupe/HEAD/docs/modules/classification.rst -------------------------------------------------------------------------------- /docs/modules/csv.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gpoulter/pydedupe/HEAD/docs/modules/csv.rst -------------------------------------------------------------------------------- /docs/modules/dale.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gpoulter/pydedupe/HEAD/docs/modules/dale.rst -------------------------------------------------------------------------------- /docs/modules/dmetaphone.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gpoulter/pydedupe/HEAD/docs/modules/dmetaphone.rst -------------------------------------------------------------------------------- /docs/modules/encode.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gpoulter/pydedupe/HEAD/docs/modules/encode.rst -------------------------------------------------------------------------------- /docs/modules/geo.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gpoulter/pydedupe/HEAD/docs/modules/geo.rst -------------------------------------------------------------------------------- /docs/modules/group.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gpoulter/pydedupe/HEAD/docs/modules/group.rst -------------------------------------------------------------------------------- /docs/modules/levenshtein.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gpoulter/pydedupe/HEAD/docs/modules/levenshtein.rst -------------------------------------------------------------------------------- /docs/modules/linkcsv.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gpoulter/pydedupe/HEAD/docs/modules/linkcsv.rst -------------------------------------------------------------------------------- /docs/modules/sim.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gpoulter/pydedupe/HEAD/docs/modules/sim.rst -------------------------------------------------------------------------------- /pydedupe.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gpoulter/pydedupe/HEAD/pydedupe.png -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gpoulter/pydedupe/HEAD/setup.cfg -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gpoulter/pydedupe/HEAD/setup.py -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gpoulter/pydedupe/HEAD/tests/__init__.py -------------------------------------------------------------------------------- /tests/test_linkcsv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gpoulter/pydedupe/HEAD/tests/test_linkcsv.py --------------------------------------------------------------------------------