├── tests ├── __init__.py ├── bounds │ ├── __init__.py │ └── test_utils.py ├── data │ ├── __init__.py │ ├── test_stats.py │ └── test_loader.py ├── shapes │ ├── __init__.py │ ├── bases │ │ ├── __init__.py │ │ ├── test_shape.py │ │ ├── test_point_collection.py │ │ └── test_line_collection.py │ ├── lines │ │ ├── __init__.py │ │ ├── test_high_lines.py │ │ ├── test_horizontal_lines.py │ │ ├── test_slant_up_lines.py │ │ ├── test_wide_lines.py │ │ ├── test_slant_down_lines.py │ │ ├── test_vertical_lines.py │ │ ├── test_star.py │ │ ├── test_diamond.py │ │ ├── test_rectangle.py │ │ ├── test_x_lines.py │ │ └── bases.py │ ├── points │ │ ├── __init__.py │ │ ├── test_scatter.py │ │ ├── test_heart.py │ │ ├── test_spade.py │ │ ├── test_spiral.py │ │ ├── test_club.py │ │ ├── test_figure_eight.py │ │ ├── test_parabola.py │ │ ├── bases.py │ │ └── test_dots_grid.py │ ├── circles │ │ ├── __init__.py │ │ ├── test_circle.py │ │ ├── bases.py │ │ ├── test_bullseye.py │ │ └── test_rings.py │ ├── conftest.py │ └── test_factory.py ├── plotting │ ├── __init__.py │ ├── test_diagnostics.py │ ├── test_style.py │ ├── test_static.py │ └── test_animation.py ├── test_main.py └── conftest.py ├── src └── data_morph │ ├── shapes │ ├── bases │ │ ├── __init__.py │ │ ├── point_collection.py │ │ ├── line_collection.py │ │ └── shape.py │ ├── __init__.py │ ├── circles │ │ ├── __init__.py │ │ ├── bullseye.py │ │ ├── circle.py │ │ └── rings.py │ ├── points │ │ ├── __init__.py │ │ ├── dots_grid.py │ │ ├── heart.py │ │ ├── spiral.py │ │ ├── scatter.py │ │ ├── figure_eight.py │ │ ├── spade.py │ │ ├── parabola.py │ │ └── club.py │ ├── lines │ │ ├── __init__.py │ │ ├── x_lines.py │ │ ├── rectangle.py │ │ ├── vertical_lines.py │ │ ├── horizontal_lines.py │ │ ├── high_lines.py │ │ ├── wide_lines.py │ │ ├── diamond.py │ │ ├── slant_up.py │ │ ├── slant_down.py │ │ └── star.py │ └── factory.py │ ├── bounds │ ├── __init__.py │ ├── _utils.py │ └── interval.py │ ├── plotting │ ├── __init__.py │ ├── config │ │ └── plot_style.mplstyle │ ├── style.py │ └── diagnostics.py │ ├── data │ ├── __init__.py │ ├── stats.py │ └── starter_shapes │ │ ├── dino.csv │ │ └── pi.csv │ ├── __main__.py │ ├── progress.py │ └── __init__.py ├── docs ├── _static │ ├── logo.png │ ├── favicon.ico │ ├── panda-to-star.gif │ ├── tutorials │ │ ├── style.css │ │ ├── example-shapes.png │ │ ├── dataset-creation.jpg │ │ └── easter-egg-to-wide-lines.gif │ ├── panda-to-star-eased.gif │ ├── panda-to-star-classic.gif │ └── switcher.json ├── _templates │ └── autosummary │ │ ├── class.rst │ │ ├── function.rst │ │ ├── method.rst │ │ └── module.rst ├── citation.rst ├── index.rst ├── README.md ├── Makefile ├── api.rst ├── tutorials │ ├── index.rst │ └── custom-datasets.rst ├── cli.rst ├── classroom-ideas.rst └── post_build.py ├── .github ├── release.yml ├── dependabot.yml ├── codecov.yml ├── workflows │ ├── label.yml │ ├── greetings.yml │ ├── stale.yml │ ├── check-pr.yml │ ├── codecov-validate.yml │ ├── citation-validate.yml │ ├── docs.yml │ ├── ci.yml │ ├── generate-morphs.yml │ └── pypi-publish.yml ├── ISSUE_TEMPLATE │ ├── feature_request.md │ └── bug_report.md ├── pull_request_template.md └── labeler.yml ├── .coveragerc ├── CITATION.cff ├── LICENSE ├── .pre-commit-config.yaml ├── bin └── ci.py ├── .gitignore ├── CONTRIBUTING.md ├── CODE_OF_CONDUCT.md └── pyproject.toml /tests/__init__.py: -------------------------------------------------------------------------------- 1 | """Test data_morph.""" 2 | -------------------------------------------------------------------------------- /src/data_morph/shapes/bases/__init__.py: -------------------------------------------------------------------------------- 1 | """Shape base classes.""" 2 | -------------------------------------------------------------------------------- /tests/bounds/__init__.py: -------------------------------------------------------------------------------- 1 | """Test the data_morph.bounds subpackage.""" 2 | -------------------------------------------------------------------------------- /tests/data/__init__.py: -------------------------------------------------------------------------------- 1 | """Test the data_morph.data subpackage.""" 2 | -------------------------------------------------------------------------------- /tests/shapes/__init__.py: -------------------------------------------------------------------------------- 1 | """Test the data_morph.shapes subpackage.""" 2 | -------------------------------------------------------------------------------- /tests/plotting/__init__.py: -------------------------------------------------------------------------------- 1 | """Test the data_morph.plotting subpackage.""" 2 | -------------------------------------------------------------------------------- /tests/shapes/bases/__init__.py: -------------------------------------------------------------------------------- 1 | """Test data_morph.shapes.bases subpackage.""" 2 | -------------------------------------------------------------------------------- /tests/shapes/lines/__init__.py: -------------------------------------------------------------------------------- 1 | """Test data_morph.shapes.lines subpackage.""" 2 | -------------------------------------------------------------------------------- /tests/shapes/points/__init__.py: -------------------------------------------------------------------------------- 1 | """Test data_morph.shapes.points subpackage.""" 2 | -------------------------------------------------------------------------------- /src/data_morph/bounds/__init__.py: -------------------------------------------------------------------------------- 1 | """Classes for representing 1D and 2D bounds.""" 2 | -------------------------------------------------------------------------------- /tests/shapes/circles/__init__.py: -------------------------------------------------------------------------------- 1 | """Test data_morph.shapes.circles subpackage.""" 2 | -------------------------------------------------------------------------------- /src/data_morph/plotting/__init__.py: -------------------------------------------------------------------------------- 1 | """Utility functions for plotting and animating.""" 2 | -------------------------------------------------------------------------------- /docs/_static/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stefmolin/data-morph/HEAD/docs/_static/logo.png -------------------------------------------------------------------------------- /docs/_static/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stefmolin/data-morph/HEAD/docs/_static/favicon.ico -------------------------------------------------------------------------------- /.github/release.yml: -------------------------------------------------------------------------------- 1 | changelog: 2 | exclude: 3 | authors: 4 | - dependabot 5 | - pre-commit-ci 6 | -------------------------------------------------------------------------------- /docs/_static/panda-to-star.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stefmolin/data-morph/HEAD/docs/_static/panda-to-star.gif -------------------------------------------------------------------------------- /docs/_static/tutorials/style.css: -------------------------------------------------------------------------------- 1 | .tutorial-card-image { 2 | height: 225px; 3 | object-fit: cover; 4 | } 5 | -------------------------------------------------------------------------------- /src/data_morph/data/__init__.py: -------------------------------------------------------------------------------- 1 | """Module for data operations like loading and calculating summary statistics.""" 2 | -------------------------------------------------------------------------------- /docs/_static/panda-to-star-eased.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stefmolin/data-morph/HEAD/docs/_static/panda-to-star-eased.gif -------------------------------------------------------------------------------- /docs/_static/panda-to-star-classic.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stefmolin/data-morph/HEAD/docs/_static/panda-to-star-classic.gif -------------------------------------------------------------------------------- /docs/_static/tutorials/example-shapes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stefmolin/data-morph/HEAD/docs/_static/tutorials/example-shapes.png -------------------------------------------------------------------------------- /docs/_static/tutorials/dataset-creation.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stefmolin/data-morph/HEAD/docs/_static/tutorials/dataset-creation.jpg -------------------------------------------------------------------------------- /src/data_morph/__main__.py: -------------------------------------------------------------------------------- 1 | """Enable running `python -m data_morph`.""" 2 | 3 | from .cli import main 4 | 5 | raise SystemExit(main()) 6 | -------------------------------------------------------------------------------- /docs/_templates/autosummary/class.rst: -------------------------------------------------------------------------------- 1 | {{ name | escape | underline}} 2 | 3 | .. currentmodule:: {{ module }} 4 | 5 | .. autoclass:: {{ objname }} 6 | -------------------------------------------------------------------------------- /docs/_static/tutorials/easter-egg-to-wide-lines.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stefmolin/data-morph/HEAD/docs/_static/tutorials/easter-egg-to-wide-lines.gif -------------------------------------------------------------------------------- /docs/_templates/autosummary/function.rst: -------------------------------------------------------------------------------- 1 | {{ name | escape | underline}} 2 | 3 | .. currentmodule:: {{ module }} 4 | 5 | .. autofunction:: {{ objname }} 6 | -------------------------------------------------------------------------------- /docs/_templates/autosummary/method.rst: -------------------------------------------------------------------------------- 1 | {{ objname | escape | underline}} 2 | 3 | .. currentmodule:: {{ module }} 4 | 5 | .. automethod:: {{ objname }} 6 | :noindex: 7 | -------------------------------------------------------------------------------- /src/data_morph/shapes/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Classes for specific shapes that data can be morphed into. 3 | 4 | The :class:`.ShapeFactory` is used to generate shape objects. 5 | Shapes are grouped into modules based on composition. 6 | """ 7 | -------------------------------------------------------------------------------- /src/data_morph/shapes/circles/__init__.py: -------------------------------------------------------------------------------- 1 | """Shapes made up of circles.""" 2 | 3 | from .bullseye import Bullseye 4 | from .circle import Circle 5 | from .rings import Rings 6 | 7 | __all__ = [ 8 | 'Bullseye', 9 | 'Circle', 10 | 'Rings', 11 | ] 12 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | branch = True 3 | 4 | [report] 5 | fail_under = 90 6 | show_missing = true 7 | exclude_lines = 8 | pragma: no cover 9 | @(abc\.)?abstractmethod 10 | if __name__ == .__main__.: 11 | raise NotImplemented 12 | if TYPE_CHECKING: 13 | -------------------------------------------------------------------------------- /tests/shapes/points/test_scatter.py: -------------------------------------------------------------------------------- 1 | """Test the scatter module.""" 2 | 3 | import pytest 4 | 5 | from .bases import PointsModuleTestBase 6 | 7 | pytestmark = [pytest.mark.shapes, pytest.mark.points] 8 | 9 | 10 | class TestScatter(PointsModuleTestBase): 11 | """Test the Scatter class.""" 12 | 13 | shape_name = 'scatter' 14 | distance_test_cases = (((20, 50), 0.0), ((30, 60), 0.0), ((-500, -150), 0.0)) 15 | -------------------------------------------------------------------------------- /docs/citation.rst: -------------------------------------------------------------------------------- 1 | If you use this software, please cite both Data Morph (DOI: 2 | `10.5281/zenodo.7834197 `_) and 3 | "`Same Stats, Different Graphs: Generating Datasets with Varied Appearance 4 | and Identical Statistics through Simulated Annealing 5 | `_" 6 | by Justin Matejka and George Fitzmaurice (ACM CHI 2017). 7 | -------------------------------------------------------------------------------- /tests/shapes/lines/test_high_lines.py: -------------------------------------------------------------------------------- 1 | """Test the high_lines module.""" 2 | 3 | import pytest 4 | 5 | from .bases import ParallelLinesModuleTestBase 6 | 7 | pytestmark = [pytest.mark.shapes, pytest.mark.lines] 8 | 9 | 10 | class TestHighLines(ParallelLinesModuleTestBase): 11 | """Test the HighLines class.""" 12 | 13 | shape_name = 'high_lines' 14 | distance_test_cases = (((20, 50), 6.0), ((30, 60), 4.0)) 15 | expected_line_count = 2 16 | expected_slopes = 0 17 | -------------------------------------------------------------------------------- /tests/test_main.py: -------------------------------------------------------------------------------- 1 | """Test the __main__ module.""" 2 | 3 | import subprocess 4 | import sys 5 | 6 | import pytest 7 | 8 | pytestmark = pytest.mark.cli 9 | 10 | 11 | @pytest.mark.parametrize(('flag', 'return_code'), [('--version', 0), ('', 2)]) 12 | def test_main_access_cli(flag, return_code): 13 | """Confirm that CLI can be accessed via __main__.""" 14 | result = subprocess.run([sys.executable, '-m', 'data_morph', flag]) 15 | assert result.returncode == return_code 16 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | Data Morph 2 | ========== 3 | 4 | .. toctree:: 5 | :maxdepth: 2 6 | :caption: Contents: 7 | :hidden: 8 | 9 | quickstart 10 | tutorials/index 11 | cli 12 | api 13 | release-notes 14 | 15 | .. automodule:: data_morph 16 | :noindex: 17 | 18 | .. include:: quickstart.rst 19 | :start-after: .. INSTALLATION 20 | 21 | .. _classroom ideas: 22 | 23 | .. include:: classroom-ideas.rst 24 | 25 | Citations 26 | --------- 27 | 28 | .. include:: citation.rst 29 | -------------------------------------------------------------------------------- /tests/shapes/lines/test_horizontal_lines.py: -------------------------------------------------------------------------------- 1 | """Test the horizontal_lines module.""" 2 | 3 | import pytest 4 | 5 | from .bases import ParallelLinesModuleTestBase 6 | 7 | pytestmark = [pytest.mark.shapes, pytest.mark.lines] 8 | 9 | 10 | class TestHorizontalLines(ParallelLinesModuleTestBase): 11 | """Test the HorizontalLines class.""" 12 | 13 | shape_name = 'h_lines' 14 | distance_test_cases = (((20, 50), 0.0), ((30, 60), 2.5)) 15 | expected_line_count = 5 16 | expected_slopes = 0 17 | -------------------------------------------------------------------------------- /tests/shapes/lines/test_slant_up_lines.py: -------------------------------------------------------------------------------- 1 | """Test the slant_down module.""" 2 | 3 | import pytest 4 | 5 | from .bases import ParallelLinesModuleTestBase 6 | 7 | pytestmark = [pytest.mark.shapes, pytest.mark.lines] 8 | 9 | 10 | class TestSlantUpLines(ParallelLinesModuleTestBase): 11 | """Test the SlantUpLines class.""" 12 | 13 | shape_name = 'slant_up' 14 | distance_test_cases = (((20, 50), 1.664101), ((30, 60), 1.109400)) 15 | expected_line_count = 5 16 | expected_slopes = 1.5 17 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # Docs 2 | 3 | 1. Install the docs dependencies: `pip install --group docs`. 4 | 2. From this directory, run `make html`. 5 | 6 | ## Managing versions 7 | 8 | The versions that will show up in the switcher must be kept up-to-date in `_static/switcher.json`. 9 | 10 | ## Note on cleaning 11 | Use `make clean` to clean out the `_build` directory properly, if you use `rm` instead, you will need to run this again: 12 | 13 | ```shell 14 | cd _build 15 | git worktree add -f html gh-pages 16 | ``` 17 | -------------------------------------------------------------------------------- /tests/shapes/lines/test_wide_lines.py: -------------------------------------------------------------------------------- 1 | """Test the wide_lines module.""" 2 | 3 | import numpy as np 4 | import pytest 5 | 6 | from .bases import ParallelLinesModuleTestBase 7 | 8 | pytestmark = [pytest.mark.shapes, pytest.mark.lines] 9 | 10 | 11 | class TestWideLines(ParallelLinesModuleTestBase): 12 | """Test the WideLines class.""" 13 | 14 | shape_name = 'wide_lines' 15 | distance_test_cases = (((26, 50), 0), ((30, 60), 4.0)) 16 | expected_line_count = 2 17 | expected_slopes = np.inf 18 | -------------------------------------------------------------------------------- /tests/shapes/lines/test_slant_down_lines.py: -------------------------------------------------------------------------------- 1 | """Test the slant_down module.""" 2 | 3 | import pytest 4 | 5 | from .bases import ParallelLinesModuleTestBase 6 | 7 | pytestmark = [pytest.mark.shapes, pytest.mark.lines] 8 | 9 | 10 | class TestSlantDownLines(ParallelLinesModuleTestBase): 11 | """Test the SlantDownLines class.""" 12 | 13 | shape_name = 'slant_down' 14 | distance_test_cases = (((20, 50), 1.664101), ((30, 60), 0.554700)) 15 | expected_line_count = 5 16 | expected_slopes = -1.5 17 | -------------------------------------------------------------------------------- /src/data_morph/plotting/config/plot_style.mplstyle: -------------------------------------------------------------------------------- 1 | # Tweaks for data_morph 2 | font.size: 12.0 3 | font.family: monospace 4 | font.weight: normal 5 | font.sans-serif: Helvetica, DejaVu Sans, Bitstream Vera Sans, Computer Modern Sans Serif, Lucida Grande, Verdana, Geneva, Lucid, Arial, Avant Garde, sans-serif 6 | font.monospace: Decima Mono, DejaVu Sans Mono, Bitstream Vera Sans Mono, Computer Modern Typewriter, Andale Mono, Nimbus Mono L, Courier New, Courier, Fixed, Terminal, monospace 7 | text.color: 222222 8 | pdf.fonttype: 42 9 | -------------------------------------------------------------------------------- /tests/shapes/lines/test_vertical_lines.py: -------------------------------------------------------------------------------- 1 | """Test the vertical_lines module.""" 2 | 3 | import numpy as np 4 | import pytest 5 | 6 | from .bases import ParallelLinesModuleTestBase 7 | 8 | pytestmark = [pytest.mark.shapes, pytest.mark.lines] 9 | 10 | 11 | class TestVerticalLines(ParallelLinesModuleTestBase): 12 | """Test the VerticalLines class.""" 13 | 14 | shape_name = 'v_lines' 15 | distance_test_cases = (((35, 60), 5.0), ((30, 60), 0.0)) 16 | expected_line_count = 5 17 | expected_slopes = np.inf 18 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | """Global pytest config for data_morph tests.""" 2 | 3 | from pathlib import Path 4 | 5 | import pandas as pd 6 | import pytest 7 | 8 | 9 | @pytest.fixture(scope='session') 10 | def sample_data(): 11 | """Fixture for the sample data.""" 12 | return pd.DataFrame({'x': [10, 20, 30], 'y': [50, 50, 80]}) 13 | 14 | 15 | @pytest.fixture(scope='session') 16 | def starter_shapes_dir(request): 17 | """Fixture for the starter shapes directory.""" 18 | return ( 19 | Path(request.config.rootdir) / 'src' / 'data_morph' / 'data' / 'starter_shapes' 20 | ) 21 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # Dependabot version updates. See docs for configuration options: 2 | # https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file 3 | 4 | version: 2 5 | updates: 6 | 7 | - package-ecosystem: "github-actions" 8 | directory: "/" 9 | schedule: 10 | interval: "semiannually" 11 | ignore: # only update minor or major version changes 12 | - dependency-name: "*" 13 | update-types: ["version-update:semver-patch"] 14 | groups: 15 | actions: 16 | patterns: 17 | - "*" 18 | -------------------------------------------------------------------------------- /docs/_static/switcher.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "name": "0.3 (stable)", 4 | "version": "stable", 5 | "url": "https://stefaniemolin.com/data-morph/stable/" 6 | }, 7 | { 8 | "name": "0.4 (dev)", 9 | "version": "dev", 10 | "url": "https://stefaniemolin.com/data-morph/dev/" 11 | }, 12 | { 13 | "name": "0.2", 14 | "version": "0.2", 15 | "url": "https://stefaniemolin.com/data-morph/0.2/" 16 | }, 17 | { 18 | "name": "0.1", 19 | "version": "0.1", 20 | "url": "https://stefaniemolin.com/data-morph/0.1/" 21 | } 22 | ] 23 | -------------------------------------------------------------------------------- /tests/shapes/points/test_heart.py: -------------------------------------------------------------------------------- 1 | """Test the hearts module.""" 2 | 3 | import pytest 4 | 5 | from .bases import PointsModuleTestBase 6 | 7 | pytestmark = [pytest.mark.shapes, pytest.mark.points] 8 | 9 | 10 | class TestHeart(PointsModuleTestBase): 11 | """Test the Heart class.""" 12 | 13 | shape_name = 'heart' 14 | distance_test_cases = ( 15 | ((22.424114, 59.471779), 0.0), 16 | ((10.405462, 70.897342), 0.0), 17 | ((21.064032, 72.065253), 0.0), 18 | ((16.035166, 60.868470), 0.0), 19 | ((20, 50), 6.065782511791651), 20 | ((10, 80), 7.173013322704914), 21 | ) 22 | -------------------------------------------------------------------------------- /src/data_morph/shapes/points/__init__.py: -------------------------------------------------------------------------------- 1 | """Shapes made up of points.""" 2 | 3 | from .club import Club 4 | from .dots_grid import DotsGrid 5 | from .figure_eight import FigureEight 6 | from .heart import Heart 7 | from .parabola import DownParabola, LeftParabola, RightParabola, UpParabola 8 | from .scatter import Scatter 9 | from .spade import Spade 10 | from .spiral import Spiral 11 | 12 | __all__ = [ 13 | 'Club', 14 | 'DotsGrid', 15 | 'DownParabola', 16 | 'FigureEight', 17 | 'Heart', 18 | 'LeftParabola', 19 | 'RightParabola', 20 | 'Scatter', 21 | 'Spade', 22 | 'Spiral', 23 | 'UpParabola', 24 | ] 25 | -------------------------------------------------------------------------------- /.github/codecov.yml: -------------------------------------------------------------------------------- 1 | # This is the configuration for codecov.yml -- remember to validate it. 2 | # 3 | # cat .github/codecov.yml | curl --data-binary @- https://codecov.io/validate 4 | 5 | codecov: 6 | notify: 7 | wait_for_ci: false 8 | require_ci_to_pass: false 9 | 10 | coverage: 11 | status: 12 | patch: 13 | default: 14 | target: 90% 15 | project: 16 | default: false 17 | data_morph: 18 | target: 90% 19 | paths: 20 | - "!tests/.*" 21 | tests: 22 | target: 100% 23 | paths: 24 | - "tests/.*" 25 | 26 | comment: 27 | layout: "reach, diff, files" 28 | -------------------------------------------------------------------------------- /tests/shapes/points/test_spade.py: -------------------------------------------------------------------------------- 1 | """Test the spade module.""" 2 | 3 | import pytest 4 | 5 | from .bases import PointsModuleTestBase 6 | 7 | pytestmark = [pytest.mark.shapes, pytest.mark.points] 8 | 9 | 10 | class TestSpade(PointsModuleTestBase): 11 | """Test the Spade class.""" 12 | 13 | shape_name = 'spade' 14 | distance_test_cases = ( 15 | ((19.818701, 60.065370), 0), 16 | ((23.750000, 55.532859), 0), 17 | ((20.067229, 60.463689), 0), 18 | ((18.935968, 58.467606), 0), 19 | ((20, 75), 0.5335993101603015), 20 | ((0, 0), 57.861566654807596), 21 | ((10, 80), 11.404000978114487), 22 | ) 23 | -------------------------------------------------------------------------------- /.github/workflows/label.yml: -------------------------------------------------------------------------------- 1 | # This workflow will triage pull requests and apply a label based on the 2 | # paths that are modified in the pull request. 3 | # 4 | # To use this workflow, you will need to set up a .github/labeler.yml 5 | # file with configuration. For more information, see: 6 | # https://github.com/actions/labeler 7 | 8 | name: Labeler 9 | on: [pull_request_target] 10 | 11 | jobs: 12 | label: 13 | 14 | runs-on: ubuntu-latest 15 | permissions: 16 | contents: read 17 | pull-requests: write 18 | 19 | steps: 20 | - uses: actions/labeler@634933edcd8ababfe52f92936142cc22ac488b1b # v6.0.1 21 | with: 22 | repo-token: "${{ secrets.GITHUB_TOKEN }}" 23 | -------------------------------------------------------------------------------- /src/data_morph/shapes/lines/__init__.py: -------------------------------------------------------------------------------- 1 | """Shapes made up of lines.""" 2 | 3 | from .diamond import Diamond 4 | from .high_lines import HighLines 5 | from .horizontal_lines import HorizontalLines 6 | from .rectangle import Rectangle 7 | from .slant_down import SlantDownLines 8 | from .slant_up import SlantUpLines 9 | from .star import Star 10 | from .vertical_lines import VerticalLines 11 | from .wide_lines import WideLines 12 | from .x_lines import XLines 13 | 14 | __all__ = [ 15 | 'Diamond', 16 | 'HighLines', 17 | 'HorizontalLines', 18 | 'Rectangle', 19 | 'SlantDownLines', 20 | 'SlantUpLines', 21 | 'Star', 22 | 'VerticalLines', 23 | 'WideLines', 24 | 'XLines', 25 | ] 26 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | 2 | Fixes # 3 | 4 | **Describe your changes** 5 | 6 | 7 | 8 | **Checklist** 9 | 10 | 11 | 12 | - [ ] Test cases have been modified/added to cover any code changes. 13 | - [ ] Docstrings have been modified/created for any code changes. 14 | - [ ] All linting and formatting checks pass (see the [contributing guidelines](https://github.com/stefmolin/data-morph/blob/main/CONTRIBUTING.md) for more information). 15 | - [ ] If you added a new dataset or shape, please comment on which datasets worked best for your shape or which shapes worked best for your dataset and provide the GIFs for those here. 16 | -------------------------------------------------------------------------------- /tests/shapes/lines/test_star.py: -------------------------------------------------------------------------------- 1 | """Test the star module.""" 2 | 3 | import pytest 4 | 5 | from .bases import PolygonsLineModuleTestBase 6 | 7 | pytestmark = [pytest.mark.shapes, pytest.mark.lines, pytest.mark.polygons] 8 | 9 | 10 | class TestStar(PolygonsLineModuleTestBase): 11 | """Test the Star class.""" 12 | 13 | shape_name = 'star' 14 | distance_test_cases = ( 15 | ((8, 68), 0), 16 | ((17, 68), 0), 17 | ((20, 77), 0), 18 | ((23, 68), 0), 19 | ((32, 68), 0), 20 | ((24.5, 62), 0), 21 | ((27.5, 53), 0), 22 | ((20, 59), 0), 23 | ((12.5, 53), 0), 24 | ((15.5, 62), 0), 25 | ((20, 50), 7.027819284987274), 26 | ((30, 60), 4.58530260724415), 27 | ) 28 | expected_line_count = 10 29 | -------------------------------------------------------------------------------- /tests/shapes/points/test_spiral.py: -------------------------------------------------------------------------------- 1 | """Test the spiral module.""" 2 | 3 | import pytest 4 | 5 | from .bases import PointsModuleTestBase 6 | 7 | pytestmark = [pytest.mark.shapes, pytest.mark.points] 8 | 9 | 10 | class TestSpiral(PointsModuleTestBase): 11 | """Test the Spiral class.""" 12 | 13 | shape_name = 'spiral' 14 | distance_test_cases = ( 15 | ((10.862675, 65.846698), 0), 16 | ((29.280789, 59.546024), 0), 17 | ((16.022152, 68.248880), 0), 18 | ((20.310858, 65.251728), 0), 19 | ((22.803548, 72.599350), 0), 20 | ((0, 0), 58.03780546896006), 21 | ((10, 50), 8.239887412781957), 22 | ((30, 70), 0.6642518196535838), 23 | ((25, 65), 1.3042797087884075), 24 | ((-30, 100), 52.14470630148412), 25 | ) 26 | -------------------------------------------------------------------------------- /tests/shapes/points/test_club.py: -------------------------------------------------------------------------------- 1 | """Test the club module.""" 2 | 3 | import pytest 4 | 5 | from .bases import PointsModuleTestBase 6 | 7 | pytestmark = [pytest.mark.shapes, pytest.mark.points] 8 | 9 | 10 | class TestClub(PointsModuleTestBase): 11 | """Test the Club class.""" 12 | 13 | shape_name = 'club' 14 | distance_test_cases = ( 15 | ((19.639387, 73.783711), 0.0), # top lobe 16 | ((12.730310, 60.295844), 0.0), # bottom left lobe 17 | ((27.630301, 60.920443), 0.0), # bottom right lobe 18 | ((20.304761, 55.933333), 0.0), # top of stem 19 | ((18.8, 57.076666), 0.0), # left part of stem 20 | ((20.933333, 57.823333), 0.0), # right part of stem 21 | ((0, 0), 58.717591), 22 | ((20, 50), 5.941155), 23 | ((10, 80), 10.288055), 24 | ) 25 | -------------------------------------------------------------------------------- /tests/shapes/lines/test_diamond.py: -------------------------------------------------------------------------------- 1 | """Test the diamond module.""" 2 | 3 | import numpy as np 4 | import pytest 5 | 6 | from .bases import PolygonsLineModuleTestBase 7 | 8 | pytestmark = [pytest.mark.shapes, pytest.mark.lines, pytest.mark.polygons] 9 | 10 | 11 | class TestDiamond(PolygonsLineModuleTestBase): 12 | """Test the Diamond class.""" 13 | 14 | shape_name = 'diamond' 15 | distance_test_cases = ( 16 | ((20, 50), 0), 17 | ((20, 77), 0), 18 | ((11, 63.5), 0), 19 | ((29, 63.5), 0), 20 | ((30, 63.5), 1), 21 | ((30, 60), 2.773501), 22 | ) 23 | expected_line_count = 4 24 | 25 | def test_slopes(self, slopes): 26 | """Test that the slopes are as expected.""" 27 | np.testing.assert_array_equal(np.sort(slopes).flatten(), [-1.5, -1.5, 1.5, 1.5]) 28 | -------------------------------------------------------------------------------- /tests/shapes/points/test_figure_eight.py: -------------------------------------------------------------------------------- 1 | """Test the figure_eight module.""" 2 | 3 | import pytest 4 | 5 | from .bases import PointsModuleTestBase 6 | 7 | pytestmark = [pytest.mark.shapes, pytest.mark.points] 8 | 9 | 10 | class TestFigureEight(PointsModuleTestBase): 11 | """Test the FigureEight class.""" 12 | 13 | shape_name = 'figure_eight' 14 | distance_test_cases = ( 15 | ((17.79641748, 67.34954701), 0), 16 | ((21.71773824, 63.21594749), 0), 17 | ((22.20358252, 67.34954701), 0), 18 | ((19.26000438, 64.25495015), 0), 19 | ((19.50182914, 77.69858052), 0), 20 | ((0, 0), 55.70680898398098), 21 | ((19, 61), 1.9727377843832639), 22 | ((19, 64), 0.34685744033355576), 23 | ((25, 65), 3.6523121397065657), 24 | ((18, 40), 12.392782544116978), 25 | ) 26 | -------------------------------------------------------------------------------- /tests/shapes/lines/test_rectangle.py: -------------------------------------------------------------------------------- 1 | """Test the rectangle module.""" 2 | 3 | import numpy as np 4 | import pytest 5 | 6 | from .bases import PolygonsLineModuleTestBase 7 | 8 | pytestmark = [pytest.mark.shapes, pytest.mark.lines, pytest.mark.polygons] 9 | 10 | 11 | class TestRectangle(PolygonsLineModuleTestBase): 12 | """Test the Rectangle class.""" 13 | 14 | shape_name = 'rectangle' 15 | distance_test_cases = ( 16 | ((12, 60), 0), 17 | ((28, 60), 0), 18 | ((20, 50), 0), 19 | ((20, 74), 0), 20 | ((20, 75), 1), 21 | ((30, 80), 6.324555320336759), 22 | ) 23 | expected_line_count = 4 24 | 25 | def test_slopes(self, slopes): 26 | """Test that the slopes are as expected.""" 27 | np.testing.assert_array_equal(np.sort(slopes).flatten(), [0, 0, np.inf, np.inf]) 28 | -------------------------------------------------------------------------------- /tests/shapes/conftest.py: -------------------------------------------------------------------------------- 1 | """Hooks and config for data_morph.shapes tests.""" 2 | 3 | import pytest 4 | 5 | from data_morph.data.dataset import Dataset 6 | from data_morph.shapes.factory import ShapeFactory 7 | 8 | 9 | def pytest_generate_tests(metafunc): 10 | """ 11 | Parametrize the test_distance() methods for shape tests 12 | using the distance_test_cases class attribute on test classes. 13 | """ 14 | if metafunc.function.__name__ == 'test_distance': 15 | metafunc.parametrize( 16 | ['test_point', 'expected_distance'], 17 | metafunc.cls.distance_test_cases, 18 | ids=str, 19 | ) 20 | 21 | 22 | @pytest.fixture(scope='package') 23 | def shape_factory(sample_data): 24 | """Fixture for a ShapeFactory of sample data.""" 25 | return ShapeFactory(Dataset('sample', sample_data)) 26 | -------------------------------------------------------------------------------- /.github/workflows/greetings.yml: -------------------------------------------------------------------------------- 1 | name: Greetings 2 | 3 | on: [pull_request_target, issues] 4 | 5 | jobs: 6 | greeting: 7 | runs-on: ubuntu-latest 8 | permissions: 9 | issues: write 10 | pull-requests: write 11 | steps: 12 | - uses: actions/first-interaction@1c4688942c71f71d4f5502a26ea67c331730fa4d # v3.1.0 13 | with: 14 | repo-token: ${{ secrets.GITHUB_TOKEN }} 15 | issue-message: "It looks like this is your first issue here – welcome! Please familiarize yourself with the [contributing guidelines](https://github.com/stefmolin/data-morph/blob/main/CONTRIBUTING.md), if you haven't already." 16 | pr-message: "Congratulations on making your first pull request to Data Morph! Please familiarize yourself with the [contributing guidelines](https://github.com/stefmolin/data-morph/blob/main/CONTRIBUTING.md), if you haven't already." 17 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= -n 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build/_tmp 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | clean: 18 | -rm -rf `dirname $(BUILDDIR)`/html/*/ $(BUILDDIR) api 19 | 20 | # Catch-all target: route all unknown targets to Sphinx using the new 21 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 22 | %: Makefile 23 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 24 | @if [ "$@" = "html" ]; then\ 25 | python3 post_build.py;\ 26 | fi 27 | -------------------------------------------------------------------------------- /.github/workflows/stale.yml: -------------------------------------------------------------------------------- 1 | # This workflow warns and then closes issues and PRs that have had no activity for a specified amount of time. 2 | # 3 | # You can adjust the behavior by modifying this file. 4 | # For more information, see: 5 | # https://github.com/actions/stale 6 | name: Mark stale issues and pull requests 7 | 8 | on: 9 | schedule: 10 | - cron: '27 20 * * *' 11 | 12 | jobs: 13 | stale: 14 | runs-on: ubuntu-latest 15 | steps: 16 | - uses: actions/stale@5f858e3efba33a5ca4407a664cc011ad407f2008 # v10.1.0 17 | with: 18 | days-before-stale: 30 19 | days-before-close: 7 20 | stale-issue-message: 'This issue has been marked as stale due to lack of recent activity. It will be closed if no further activity occurs.' 21 | stale-pr-message: '' 22 | stale-issue-label: 'stale' 23 | stale-pr-label: 'stale' 24 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | # This CITATION.cff file was generated with cffinit. 2 | # Visit https://bit.ly/cffinit edit via form. 3 | 4 | cff-version: 1.2.0 5 | title: 'stefmolin/data-morph: Data Morph' 6 | message: 'If you use this software, please cite it as below.' 7 | type: software 8 | authors: 9 | - family-names: Molin 10 | given-names: Stefanie 11 | orcid: 'https://orcid.org/0009-0001-3359-3346' 12 | identifiers: 13 | - type: doi 14 | value: 10.5281/zenodo.7834197 15 | repository-code: 'https://github.com/stefmolin/data-morph' 16 | url: 'https://stefaniemolin.com/data-morph/' 17 | abstract: >- 18 | Morph an input dataset of 2D points into select shapes, 19 | while preserving the summary statistics to a given number 20 | of decimal points through simulated annealing. 21 | keywords: 22 | - data visualization 23 | - summary statistics 24 | - data animation 25 | license: MIT 26 | -------------------------------------------------------------------------------- /tests/bounds/test_utils.py: -------------------------------------------------------------------------------- 1 | """Test the _utils module.""" 2 | 3 | import pytest 4 | 5 | from data_morph.bounds._utils import _validate_2d 6 | 7 | 8 | @pytest.mark.bounds 9 | @pytest.mark.input_validation 10 | @pytest.mark.parametrize( 11 | ('data', 'msg'), 12 | [ 13 | (True, 'must be an iterable of 2 numeric values'), 14 | ({1, 2}, 'must be an iterable of 2 numeric values'), 15 | ('12', 'must be an iterable of 2 numeric values'), 16 | ([0, False], 'must be an iterable of 2 numeric values'), 17 | ([1, 2], False), 18 | ], 19 | ids=['True', '{1, 2}', '12', '[0, False]', '[1, 2]'], 20 | ) 21 | def test_validate_2d(data, msg): 22 | """Test that 2D numeric value check is working.""" 23 | if msg: 24 | with pytest.raises(ValueError, match=msg): 25 | _ = _validate_2d(data, 'test') 26 | else: 27 | assert data == _validate_2d(data, 'test') 28 | -------------------------------------------------------------------------------- /tests/data/test_stats.py: -------------------------------------------------------------------------------- 1 | """Test the stats module.""" 2 | 3 | import pytest 4 | 5 | from data_morph.data.loader import DataLoader 6 | from data_morph.data.stats import get_summary_statistics 7 | 8 | 9 | @pytest.mark.parametrize('with_median', [True, False]) 10 | def test_stats(with_median): 11 | """Test that summary statistics tuple is correct.""" 12 | 13 | data = DataLoader.load_dataset('dino').data 14 | 15 | stats = get_summary_statistics(data, with_median) 16 | 17 | assert stats.x_mean == data.x.mean() 18 | assert stats.y_mean == data.y.mean() 19 | assert stats.x_stdev == data.x.std() 20 | assert stats.y_stdev == data.y.std() 21 | assert stats.correlation == data.corr().x.y 22 | 23 | if with_median: 24 | assert stats.x_median == data.x.median() 25 | assert stats.y_median == data.y.median() 26 | else: 27 | assert stats.x_median is stats.y_median is None 28 | -------------------------------------------------------------------------------- /src/data_morph/bounds/_utils.py: -------------------------------------------------------------------------------- 1 | """Utility functions for working with intervals.""" 2 | 3 | from collections.abc import Iterable 4 | from numbers import Number 5 | 6 | 7 | def _validate_2d(data: Iterable[Number], name: str) -> Iterable[Number]: 8 | """ 9 | Validate the data is exactly two-dimensional. 10 | 11 | Parameters 12 | ---------- 13 | data : Iterable[numbers.Number] 14 | Data in two dimensions (e.g., a point or bounds). 15 | name : str 16 | The name of the value being passed in as ``data`` (for error messages). 17 | 18 | Returns 19 | ------- 20 | Iterable[numbers.Number] 21 | The validated data. 22 | """ 23 | if not ( 24 | isinstance(data, tuple | list) 25 | and len(data) == 2 26 | and all(isinstance(x, Number) and not isinstance(x, bool) for x in data) 27 | ): 28 | raise ValueError(f'{name} must be an iterable of 2 numeric values') 29 | 30 | return data 31 | -------------------------------------------------------------------------------- /.github/workflows/check-pr.yml: -------------------------------------------------------------------------------- 1 | # This workflow runs the pre-commit hooks on all files and confirms the docs can be built. 2 | # 3 | # Author: Stefanie Molin 4 | 5 | name: Check PR 6 | 7 | on: 8 | pull_request: 9 | 10 | concurrency: 11 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 12 | cancel-in-progress: true 13 | 14 | jobs: 15 | 16 | # Check that docs can be built 17 | docs: 18 | runs-on: ubuntu-latest 19 | name: Build docs 20 | 21 | steps: 22 | - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 23 | with: 24 | fetch-depth: 0 25 | 26 | - uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0 27 | with: 28 | python-version: "3.x" 29 | 30 | - name: Install docs dependencies 31 | run: | 32 | python -m pip install . --group docs 33 | 34 | - name: Sphinx build 35 | run: | 36 | cd docs 37 | make html 38 | -------------------------------------------------------------------------------- /docs/api.rst: -------------------------------------------------------------------------------- 1 | API 2 | === 3 | 4 | .. automodule:: data_morph 5 | 6 | .. rubric:: Modules 7 | 8 | .. autosummary:: 9 | :toctree: api 10 | :recursive: 11 | 12 | data_morph.bounds 13 | data_morph.data 14 | data_morph.morpher 15 | data_morph.plotting 16 | data_morph.progress 17 | data_morph.shapes 18 | 19 | ---- 20 | 21 | .. rubric:: Examples 22 | 23 | .. include:: quickstart.rst 24 | :start-after: .. PYTHON USAGE START 25 | :end-before: .. PYTHON USAGE END 26 | 27 | This produces the following animation in the directory specified as ``output_dir`` above: 28 | 29 | .. figure:: _static/panda_to_star.gif 30 | :alt: Morphing the panda dataset into the star shape. 31 | :align: center 32 | 33 | Morphing the panda :class:`.Dataset` into the star :class:`.Shape`. 34 | 35 | ---- 36 | 37 | .. include:: quickstart.rst 38 | :start-after: .. VIZ LISTINGS 39 | 40 | .. note:: 41 | There is also a :ref:`CLI option ` for morphing. 42 | 43 | .. rubric:: Citations 44 | 45 | .. include:: citation.rst 46 | -------------------------------------------------------------------------------- /docs/tutorials/index.rst: -------------------------------------------------------------------------------- 1 | ********* 2 | Tutorials 3 | ********* 4 | 5 | .. toctree:: 6 | :maxdepth: 1 7 | :hidden: 8 | 9 | custom-datasets 10 | shape-creation 11 | 12 | .. grid:: 1 1 2 2 13 | :gutter: 5 14 | 15 | .. grid-item-card:: Design and use a custom dataset 16 | :link-type: doc 17 | :link: custom-datasets 18 | :link-alt: Custom datasets tutorial 19 | :img-bottom: ../_static/tutorials/dataset-creation.jpg 20 | :img-alt: Custom dataset of Easter bunny with egg. 21 | :class-img-bottom: tutorial-card-image 22 | 23 | Learn how to make a custom dataset in this tutorial. 24 | 25 | .. grid-item-card:: Create your own target shape 26 | :link-type: doc 27 | :link: shape-creation 28 | :link-alt: Shape creation tutorial 29 | :img-bottom: ../_static/tutorials/example-shapes.png 30 | :img-alt: Example target shapes in Data Morph. 31 | :class-img-bottom: tutorial-card-image 32 | 33 | Learn how to add a new target shape in this tutorial. 34 | -------------------------------------------------------------------------------- /.github/workflows/codecov-validate.yml: -------------------------------------------------------------------------------- 1 | # This workflow checks that the codecov.yml file is valid, if it is included in the PR. 2 | # 3 | # Author: Stefanie Molin 4 | 5 | name: Validate codecov config 6 | 7 | on: 8 | pull_request: 9 | paths: 10 | - '.github/codecov.yml' 11 | schedule: 12 | - cron: '21 21 21 * *' 13 | 14 | concurrency: 15 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 16 | cancel-in-progress: true 17 | 18 | jobs: 19 | # check that the codecov.yml configuration is valid 20 | codecov: 21 | runs-on: ubuntu-latest 22 | name: Validate codecov.yml 23 | steps: 24 | - name: Checkout Code 25 | uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 26 | with: 27 | fetch-depth: 0 28 | - name: Validate with codecov API 29 | run: | 30 | response=$(cat .github/codecov.yml | curl --data-binary @- https://codecov.io/validate) 31 | echo $response 32 | if [[ $response = Error* ]]; then 33 | exit 1 34 | fi 35 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Something isn't working 4 | title: "[BUG] " 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Required attestation** 11 | - [ ] I have checked that this issue has not already been reported. 12 | - [ ] I have confirmed this bug exists on the [latest version](https://pypi.org/project/data-morph-ai/). 13 | - [ ] I have confirmed this bug exists on the `main` branch. 14 | 15 | **Describe the bug** 16 | A clear and concise description of what the bug is. 17 | 18 | **To Reproduce** 19 | Steps to reproduce the behavior. 20 | 21 | **Expected behavior** 22 | A clear and concise description of what you expected to happen. 23 | 24 | **Screenshots** 25 | If applicable, add screenshots to help explain your problem. 26 | 27 | **Environment** 28 | - OS: [e.g. iOS] 29 | - Browser [e.g. chrome, safari] 30 | - Python Version [e.g. 22] 31 | - Versions of all Python packages in your environment (i.e., output of `pip list`/`pip freeze`) 32 | 33 | **Additional context** 34 | Add any other context about the problem here. 35 | -------------------------------------------------------------------------------- /tests/shapes/lines/test_x_lines.py: -------------------------------------------------------------------------------- 1 | """Test the x_lines module.""" 2 | 3 | import numpy as np 4 | import pytest 5 | 6 | from .bases import LinesModuleTestBase 7 | 8 | pytestmark = [pytest.mark.shapes, pytest.mark.lines] 9 | 10 | 11 | class TestXLines(LinesModuleTestBase): 12 | """Test the XLines class.""" 13 | 14 | shape_name = 'x' 15 | distance_test_cases = ( 16 | ((8, 83), 0), # edge of X line 17 | ((20, 65), 0), # middle of X (intersection point) 18 | ((19, 64), 0.277350), # off the X 19 | ((10, 20), 27.073973), # off the X 20 | ) 21 | expected_line_count = 2 22 | expected_slopes = (-1.5, 1.5) 23 | 24 | def test_lines_form_an_x(self, shape): 25 | """Test that the lines form an X.""" 26 | lines = np.array(shape.lines) 27 | 28 | # check perpendicular 29 | xs, ys = lines.T 30 | runs = np.diff(xs, axis=0) 31 | rises = np.diff(ys, axis=0) 32 | assert np.dot(rises, runs.T) == 0 33 | 34 | # check that the lines intersect in the middle 35 | midpoints = np.mean(lines.T, axis=1)[0].T 36 | assert np.unique(midpoints).size == 1 37 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 jmatejka 4 | Copyright (c) 2023 Stefanie Molin 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | -------------------------------------------------------------------------------- /src/data_morph/shapes/lines/x_lines.py: -------------------------------------------------------------------------------- 1 | """X lines shape.""" 2 | 3 | from ...data.dataset import Dataset 4 | from ..bases.line_collection import LineCollection 5 | 6 | 7 | class XLines(LineCollection): 8 | """ 9 | Class for the X shape consisting of two crossing, perpendicular lines. 10 | 11 | .. plot:: 12 | :scale: 75 13 | :caption: 14 | This shape is generated using the panda dataset. 15 | 16 | from data_morph.data.loader import DataLoader 17 | from data_morph.plotting.diagnostics import plot_shape_on_dataset 18 | from data_morph.shapes.lines import XLines 19 | 20 | dataset = DataLoader.load_dataset('panda') 21 | shape = XLines(dataset) 22 | plot_shape_on_dataset(dataset, shape, show_bounds=False, alpha=0.25) 23 | 24 | Parameters 25 | ---------- 26 | dataset : Dataset 27 | The starting dataset to morph into other shapes. 28 | """ 29 | 30 | name = 'x' 31 | 32 | def __init__(self, dataset: Dataset) -> None: 33 | (xmin, xmax), (ymin, ymax) = dataset.morph_bounds 34 | 35 | super().__init__([[xmin, ymin], [xmax, ymax]], [[xmin, ymax], [xmax, ymin]]) 36 | -------------------------------------------------------------------------------- /.github/workflows/citation-validate.yml: -------------------------------------------------------------------------------- 1 | # This workflow checks that the CITATION.cff file is valid, if it is included in the PR. 2 | # 3 | # Author: Stefanie Molin 4 | 5 | name: Validate CITATION.cff 6 | 7 | on: 8 | pull_request: 9 | paths: 10 | - 'CITATION.cff' 11 | 12 | concurrency: 13 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 14 | cancel-in-progress: true 15 | 16 | jobs: 17 | # check that the CITATION.cff configuration is valid 18 | citation: 19 | runs-on: ubuntu-latest 20 | name: Validate CITATION.cff 21 | steps: 22 | - name: Checkout Code 23 | uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 24 | with: 25 | fetch-depth: 0 26 | - uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0 27 | with: 28 | python-version: "3.x" 29 | - name: Install cffconvert 30 | run: pip install cffconvert 31 | - name: Validate 32 | run: | 33 | verdict=$(cffconvert --validate) 34 | echo $verdict 35 | if [[ $verdict != "Citation metadata are valid"* ]]; then 36 | exit 1 37 | fi 38 | -------------------------------------------------------------------------------- /src/data_morph/progress.py: -------------------------------------------------------------------------------- 1 | """Progress bar using rich.""" 2 | 3 | from rich.progress import BarColumn, MofNCompleteColumn, Progress, TimeElapsedColumn 4 | 5 | 6 | class DataMorphProgress(Progress): 7 | """ 8 | Progress tracker for Data Morph. 9 | 10 | .. note:: 11 | Both the Python interface and CLI provide progress tracking using this class 12 | automatically. It is unlikely you will need to use this class yourself. 13 | 14 | Parameters 15 | ---------- 16 | auto_refresh : bool, default ``True`` 17 | Whether to automatically refresh the progress bar. This should be set to ``False`` 18 | for Jupyter Notebooks per the `Rich progress documentation 19 | `_. 20 | 21 | See Also 22 | -------- 23 | rich.progress.Progress 24 | The base class from which all progress bar functionality derives. 25 | """ 26 | 27 | def __init__(self, auto_refresh: bool = True) -> None: 28 | super().__init__( 29 | '[progress.description]{task.description}', 30 | BarColumn(), 31 | '[progress.percentage]{task.percentage:>3.0f}%', 32 | MofNCompleteColumn(), 33 | TimeElapsedColumn(), 34 | auto_refresh=auto_refresh, 35 | ) 36 | -------------------------------------------------------------------------------- /src/data_morph/shapes/lines/rectangle.py: -------------------------------------------------------------------------------- 1 | """Rectangle shape.""" 2 | 3 | from ...data.dataset import Dataset 4 | from ..bases.line_collection import LineCollection 5 | 6 | 7 | class Rectangle(LineCollection): 8 | """ 9 | Class for the rectangle shape. 10 | 11 | .. plot:: 12 | :scale: 75 13 | :caption: 14 | This shape is generated using the panda dataset. 15 | 16 | from data_morph.data.loader import DataLoader 17 | from data_morph.plotting.diagnostics import plot_shape_on_dataset 18 | from data_morph.shapes.lines import Rectangle 19 | 20 | dataset = DataLoader.load_dataset('panda') 21 | shape = Rectangle(dataset) 22 | plot_shape_on_dataset(dataset, shape, show_bounds=False, alpha=0.25) 23 | 24 | Parameters 25 | ---------- 26 | dataset : Dataset 27 | The starting dataset to morph into other shapes. 28 | """ 29 | 30 | def __init__(self, dataset: Dataset) -> None: 31 | xmin, xmax = dataset.data.x.quantile([0.1, 0.9]) 32 | ymin, ymax = dataset.data.y.quantile([0.1, 0.9]) 33 | 34 | super().__init__( 35 | [[xmin, ymin], [xmin, ymax]], 36 | [[xmin, ymin], [xmax, ymin]], 37 | [[xmax, ymin], [xmax, ymax]], 38 | [[xmin, ymax], [xmax, ymax]], 39 | ) 40 | -------------------------------------------------------------------------------- /src/data_morph/shapes/lines/vertical_lines.py: -------------------------------------------------------------------------------- 1 | """Vertical lines shape.""" 2 | 3 | import numpy as np 4 | 5 | from ...data.dataset import Dataset 6 | from ..bases.line_collection import LineCollection 7 | 8 | 9 | class VerticalLines(LineCollection): 10 | """ 11 | Class for the vertical lines shape. 12 | 13 | .. plot:: 14 | :scale: 75 15 | :caption: 16 | This shape is generated using the panda dataset. 17 | 18 | from data_morph.data.loader import DataLoader 19 | from data_morph.plotting.diagnostics import plot_shape_on_dataset 20 | from data_morph.shapes.lines import VerticalLines 21 | 22 | dataset = DataLoader.load_dataset('panda') 23 | shape = VerticalLines(dataset) 24 | plot_shape_on_dataset(dataset, shape, show_bounds=False, alpha=0.25) 25 | 26 | Parameters 27 | ---------- 28 | dataset : Dataset 29 | The starting dataset to morph into other shapes. 30 | """ 31 | 32 | name = 'v_lines' 33 | 34 | def __init__(self, dataset: Dataset) -> None: 35 | x_bounds, y_bounds = dataset.data_bounds 36 | 37 | super().__init__( 38 | *[ 39 | [[x, y_bounds[0]], [x, y_bounds[1]]] 40 | for x in np.linspace(x_bounds[0], x_bounds[1], 5) 41 | ] 42 | ) 43 | -------------------------------------------------------------------------------- /src/data_morph/shapes/lines/horizontal_lines.py: -------------------------------------------------------------------------------- 1 | """Horizontal lines shape.""" 2 | 3 | import numpy as np 4 | 5 | from ...data.dataset import Dataset 6 | from ..bases.line_collection import LineCollection 7 | 8 | 9 | class HorizontalLines(LineCollection): 10 | """ 11 | Class for the horizontal lines shape. 12 | 13 | .. plot:: 14 | :scale: 75 15 | :caption: 16 | This shape is generated using the panda dataset. 17 | 18 | from data_morph.data.loader import DataLoader 19 | from data_morph.plotting.diagnostics import plot_shape_on_dataset 20 | from data_morph.shapes.lines import HorizontalLines 21 | 22 | dataset = DataLoader.load_dataset('panda') 23 | shape = HorizontalLines(dataset) 24 | plot_shape_on_dataset(dataset, shape, show_bounds=False, alpha=0.25) 25 | 26 | Parameters 27 | ---------- 28 | dataset : Dataset 29 | The starting dataset to morph into other shapes. 30 | """ 31 | 32 | name = 'h_lines' 33 | 34 | def __init__(self, dataset: Dataset) -> None: 35 | x_bounds, y_bounds = dataset.data_bounds 36 | 37 | super().__init__( 38 | *[ 39 | [[x_bounds[0], y], [x_bounds[1], y]] 40 | for y in np.linspace(y_bounds[0], y_bounds[1], 5) 41 | ] 42 | ) 43 | -------------------------------------------------------------------------------- /src/data_morph/shapes/lines/high_lines.py: -------------------------------------------------------------------------------- 1 | """High lines shape.""" 2 | 3 | from ...data.dataset import Dataset 4 | from ..bases.line_collection import LineCollection 5 | 6 | 7 | class HighLines(LineCollection): 8 | """ 9 | Class for the high lines shape. 10 | 11 | .. plot:: 12 | :scale: 75 13 | :caption: 14 | This shape is generated using the panda dataset. 15 | 16 | from data_morph.data.loader import DataLoader 17 | from data_morph.plotting.diagnostics import plot_shape_on_dataset 18 | from data_morph.shapes.lines import HighLines 19 | 20 | dataset = DataLoader.load_dataset('panda') 21 | shape = HighLines(dataset) 22 | plot_shape_on_dataset(dataset, shape, show_bounds=False, alpha=0.25) 23 | 24 | Parameters 25 | ---------- 26 | dataset : Dataset 27 | The starting dataset to morph into other shapes. 28 | """ 29 | 30 | name = 'high_lines' 31 | 32 | def __init__(self, dataset: Dataset) -> None: 33 | x_bounds, y_bounds = dataset.data_bounds 34 | 35 | offset = y_bounds.range / 5 36 | lower = y_bounds[0] + offset 37 | upper = y_bounds[1] - offset 38 | 39 | super().__init__( 40 | [[x_bounds[0], lower], [x_bounds[1], lower]], 41 | [[x_bounds[0], upper], [x_bounds[1], upper]], 42 | ) 43 | -------------------------------------------------------------------------------- /src/data_morph/shapes/lines/wide_lines.py: -------------------------------------------------------------------------------- 1 | """Wide lines shape.""" 2 | 3 | from ...data.dataset import Dataset 4 | from ..bases.line_collection import LineCollection 5 | 6 | 7 | class WideLines(LineCollection): 8 | """ 9 | Class for the wide lines shape. 10 | 11 | .. plot:: 12 | :scale: 75 13 | :caption: 14 | This shape is generated using the panda dataset. 15 | 16 | from data_morph.data.loader import DataLoader 17 | from data_morph.plotting.diagnostics import plot_shape_on_dataset 18 | from data_morph.shapes.lines import WideLines 19 | 20 | dataset = DataLoader.load_dataset('panda') 21 | shape = WideLines(dataset) 22 | plot_shape_on_dataset(dataset, shape, show_bounds=False, alpha=0.25) 23 | 24 | Parameters 25 | ---------- 26 | dataset : Dataset 27 | The starting dataset to morph into other shapes. 28 | """ 29 | 30 | name = 'wide_lines' 31 | 32 | def __init__(self, dataset: Dataset) -> None: 33 | x_bounds, y_bounds = dataset.data_bounds 34 | 35 | offset = x_bounds.range / 5 36 | lower = x_bounds[0] + offset 37 | upper = x_bounds[1] - offset 38 | 39 | super().__init__( 40 | [[lower, y_bounds[0]], [lower, y_bounds[1]]], 41 | [[upper, y_bounds[0]], [upper, y_bounds[1]]], 42 | ) 43 | -------------------------------------------------------------------------------- /tests/shapes/points/test_parabola.py: -------------------------------------------------------------------------------- 1 | """Test the parabola module.""" 2 | 3 | import pytest 4 | 5 | from .bases import ParabolaTestBase 6 | 7 | pytestmark = [pytest.mark.shapes, pytest.mark.points] 8 | 9 | 10 | class TestDownParabola(ParabolaTestBase): 11 | """Test the DownParabola class.""" 12 | 13 | shape_name = 'down_parab' 14 | distance_test_cases = (((20, 50), 7.929688), ((30, 60), 3.455534)) 15 | positive_quadratic_term = False 16 | x_index = 0 17 | y_index = 1 18 | 19 | 20 | class TestLeftParabola(ParabolaTestBase): 21 | """Test the LeftParabola class.""" 22 | 23 | shape_name = 'left_parab' 24 | distance_test_cases = (((50, 20), 46.31798), ((10, 77), 0.0)) 25 | positive_quadratic_term = False 26 | x_index = 1 27 | y_index = 0 28 | 29 | 30 | class TestRightParabola(ParabolaTestBase): 31 | """Test the RightParabola class.""" 32 | 33 | shape_name = 'right_parab' 34 | distance_test_cases = (((50, 20), 38.58756), ((10, 77), 7.740692)) 35 | positive_quadratic_term = True 36 | x_index = 1 37 | y_index = 0 38 | 39 | 40 | class TestUpParabola(ParabolaTestBase): 41 | """Test the UpParabola class.""" 42 | 43 | shape_name = 'up_parab' 44 | distance_test_cases = (((0, 0), 53.774155), ((30, 60), 5.2576809)) 45 | positive_quadratic_term = True 46 | x_index = 0 47 | y_index = 1 48 | -------------------------------------------------------------------------------- /tests/shapes/circles/test_circle.py: -------------------------------------------------------------------------------- 1 | """Test the circle module.""" 2 | 3 | import numpy as np 4 | import pytest 5 | 6 | from .bases import CIRCLE_REPR, CirclesModuleTestBase 7 | 8 | pytestmark = [pytest.mark.shapes, pytest.mark.circles] 9 | 10 | 11 | class TestCircle(CirclesModuleTestBase): 12 | """Test the Circle class.""" 13 | 14 | shape_name = 'circle' 15 | center_x, center_y = (20, 65) 16 | radius = 20.49038105676658 17 | distance_test_cases = ( 18 | ((center_x, center_y + radius), 0), # north 19 | ((center_x, center_y - radius), 0), # south 20 | ((center_x + radius, center_y), 0), # east 21 | ((center_x - radius, center_y), 0), # west 22 | ((center_x, center_y), radius), # center of circle 23 | ((10, 25), 20.740675199410028), # inside the circle 24 | ((-20, 0), 55.831306555602154), # outside the circle 25 | ) 26 | repr_regex = '^' + CIRCLE_REPR + '$' 27 | 28 | def test_is_circle(self, shape): 29 | """Test that the Circle is a valid circle (mathematically).""" 30 | angles = np.arange(0, 361, 45) 31 | cx, cy = shape.center 32 | for x, y in zip( 33 | cx + shape.radius * np.cos(angles), 34 | cy + shape.radius * np.sin(angles), 35 | strict=True, 36 | ): 37 | assert pytest.approx(shape.distance(x, y)) == 0 38 | -------------------------------------------------------------------------------- /src/data_morph/shapes/points/dots_grid.py: -------------------------------------------------------------------------------- 1 | """Dots grid shape.""" 2 | 3 | import itertools 4 | 5 | from ...data.dataset import Dataset 6 | from ..bases.point_collection import PointCollection 7 | 8 | 9 | class DotsGrid(PointCollection): 10 | """ 11 | Class representing a 3x3 grid of dots. 12 | 13 | .. plot:: 14 | :scale: 75 15 | :caption: 16 | This shape is generated using the panda dataset. 17 | 18 | from data_morph.data.loader import DataLoader 19 | from data_morph.plotting.diagnostics import plot_shape_on_dataset 20 | from data_morph.shapes.points import DotsGrid 21 | 22 | dataset = DataLoader.load_dataset('panda') 23 | shape = DotsGrid(dataset) 24 | plot_shape_on_dataset(dataset, shape, show_bounds=False, alpha=0.1) 25 | 26 | Parameters 27 | ---------- 28 | dataset : Dataset 29 | The starting dataset to morph into other shapes. 30 | """ 31 | 32 | name = 'dots' 33 | 34 | def __init__(self, dataset: Dataset) -> None: 35 | xlow, xhigh = dataset.data.x.quantile([0.05, 0.95]).tolist() 36 | ylow, yhigh = dataset.data.y.quantile([0.05, 0.95]).tolist() 37 | 38 | xmid = (xhigh + xlow) / 2 39 | ymid = (yhigh + ylow) / 2 40 | 41 | super().__init__( 42 | *list(itertools.product([xlow, xmid, xhigh], [ylow, ymid, yhigh])) 43 | ) 44 | -------------------------------------------------------------------------------- /src/data_morph/shapes/lines/diamond.py: -------------------------------------------------------------------------------- 1 | """Diamond shape.""" 2 | 3 | from ...data.dataset import Dataset 4 | from ..bases.line_collection import LineCollection 5 | 6 | 7 | class Diamond(LineCollection): 8 | """ 9 | Class for the diamond shape. 10 | 11 | .. plot:: 12 | :scale: 75 13 | :caption: 14 | This shape is generated using the panda dataset. 15 | 16 | from data_morph.data.loader import DataLoader 17 | from data_morph.plotting.diagnostics import plot_shape_on_dataset 18 | from data_morph.shapes.lines import Diamond 19 | 20 | dataset = DataLoader.load_dataset('panda') 21 | shape = Diamond(dataset) 22 | plot_shape_on_dataset(dataset, shape, show_bounds=False, alpha=0.25) 23 | 24 | Parameters 25 | ---------- 26 | dataset : Dataset 27 | The starting dataset to morph into other shapes. 28 | """ 29 | 30 | def __init__(self, dataset: Dataset) -> None: 31 | xmin, xmax = dataset.data.x.quantile([0.05, 0.95]) 32 | ymin, ymax = dataset.data.y.quantile([0.05, 0.95]) 33 | 34 | xmid = (xmax + xmin) / 2 35 | ymid = (ymax + ymin) / 2 36 | 37 | super().__init__( 38 | [[xmin, ymid], [xmid, ymax]], 39 | [[xmid, ymax], [xmax, ymid]], 40 | [[xmax, ymid], [xmid, ymin]], 41 | [[xmid, ymin], [xmin, ymid]], 42 | ) 43 | -------------------------------------------------------------------------------- /.github/labeler.yml: -------------------------------------------------------------------------------- 1 | bounds: 2 | - any: 3 | - changed-files: 4 | - any-glob-to-any-file: src/data_morph/bounds/**/* 5 | 6 | ci/cd: 7 | - any: 8 | - changed-files: 9 | - any-glob-to-any-file: 10 | - '.coveragerc' 11 | - '.github/**/*' 12 | - '.pre-commit-config.yaml' 13 | - setup.cfg 14 | 15 | cli: 16 | - any: 17 | - changed-files: 18 | - any-glob-to-any-file: 19 | - src/data_morph/__main__.py 20 | - src/data_morph/cli.py 21 | 22 | data: 23 | - any: 24 | - changed-files: 25 | - any-glob-to-any-file: src/data_morph/data/**/* 26 | 27 | documentation: 28 | - any: 29 | - changed-files: 30 | - any-glob-to-any-file: 31 | - '.github/workflows/docs.yml' 32 | - docs/**/* 33 | - README.md 34 | 35 | morpher: 36 | - any: 37 | - changed-files: 38 | - any-glob-to-any-file: src/data_morph/morpher.py 39 | 40 | packaging: 41 | - any: 42 | - changed-files: 43 | - any-glob-to-any-file: 44 | - CITATION.cff 45 | - LICENSE 46 | - pyproject.toml 47 | - README.md 48 | - src/data_morph/__init__.py 49 | 50 | plotting: 51 | - any: 52 | - changed-files: 53 | - any-glob-to-any-file: src/data_morph/plotting/**/* 54 | 55 | shapes: 56 | - any: 57 | - changed-files: 58 | - any-glob-to-any-file: src/data_morph/shapes/**/* 59 | 60 | testing: 61 | - any: 62 | - changed-files: 63 | - any-glob-to-any-file: tests/**/* 64 | -------------------------------------------------------------------------------- /docs/_templates/autosummary/module.rst: -------------------------------------------------------------------------------- 1 | {{ name | escape | underline}} 2 | 3 | .. automodule:: {{ fullname }} 4 | 5 | {% block attributes %} 6 | {% if attributes %} 7 | .. rubric:: {{ _('Module Attributes') }} 8 | 9 | .. autosummary:: 10 | :toctree: 11 | {% for item in attributes %} 12 | {{ item }} 13 | {%- endfor %} 14 | {% endif %} 15 | {% endblock %} 16 | 17 | {% block functions %} 18 | {% if functions %} 19 | .. rubric:: {{ _('Functions') }} 20 | 21 | .. autosummary:: 22 | :toctree: 23 | {% for item in functions %} 24 | {{ item }} 25 | {%- endfor %} 26 | {% endif %} 27 | {% endblock %} 28 | 29 | {% block classes %} 30 | {% if classes %} 31 | .. rubric:: {{ _('Classes') }} 32 | 33 | .. autosummary:: 34 | :toctree: 35 | {% for item in classes %} 36 | {{ item }} 37 | {%- endfor %} 38 | {% endif %} 39 | {% endblock %} 40 | 41 | {% block exceptions %} 42 | {% if exceptions %} 43 | .. rubric:: {{ _('Exceptions') }} 44 | 45 | .. autosummary:: 46 | :toctree: 47 | {% for item in exceptions %} 48 | {{ item }} 49 | {%- endfor %} 50 | {% endif %} 51 | {% endblock %} 52 | 53 | {% block modules %} 54 | {% if modules %} 55 | .. rubric:: Modules 56 | 57 | .. autosummary:: 58 | :toctree: 59 | :recursive: 60 | {% for item in modules %} 61 | {{ item }} 62 | {%- endfor %} 63 | {% endif %} 64 | {% endblock %} 65 | -------------------------------------------------------------------------------- /tests/shapes/points/bases.py: -------------------------------------------------------------------------------- 1 | """Base test classes for points shapes.""" 2 | 3 | from __future__ import annotations 4 | 5 | from typing import TYPE_CHECKING 6 | 7 | import numpy as np 8 | import pytest 9 | 10 | if TYPE_CHECKING: 11 | from numbers import Number 12 | 13 | 14 | class PointsModuleTestBase: 15 | """Base for testing point-based shapes.""" 16 | 17 | shape_name: str 18 | distance_test_cases: tuple[tuple[tuple[Number], float]] 19 | 20 | @pytest.fixture(scope='class') 21 | def shape(self, shape_factory): 22 | """Fixture to get the shape for testing.""" 23 | return shape_factory.generate_shape(self.shape_name) 24 | 25 | def test_distance(self, shape, test_point, expected_distance): 26 | """ 27 | Test the distance() method parametrized by distance_test_cases 28 | (see conftest.py). 29 | """ 30 | actual_distance = shape.distance(*test_point) 31 | assert pytest.approx(actual_distance, abs=1e-5) == expected_distance 32 | 33 | 34 | class ParabolaTestBase(PointsModuleTestBase): 35 | """Base test class for parabolic shapes.""" 36 | 37 | positive_quadratic_term: bool 38 | x_index: int 39 | y_index: int 40 | 41 | def test_quadratic_term(self, shape): 42 | """Check the sign of the quadratic term.""" 43 | poly = np.polynomial.Polynomial.fit( 44 | shape.points[:, self.x_index], shape.points[:, self.y_index], 2 45 | ) 46 | assert (poly.coef[2] > 0) == self.positive_quadratic_term 47 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | ci: 2 | autofix_prs: false 3 | autoupdate_schedule: quarterly 4 | 5 | repos: 6 | - repo: https://github.com/pre-commit/pre-commit-hooks 7 | rev: v6.0.0 8 | hooks: 9 | - id: check-added-large-files 10 | - id: check-merge-conflict 11 | - id: check-toml 12 | - id: check-yaml 13 | - id: end-of-file-fixer 14 | - id: trailing-whitespace 15 | 16 | - repo: https://github.com/pre-commit/pygrep-hooks 17 | rev: v1.10.0 18 | hooks: 19 | - id: rst-backticks 20 | - id: rst-directive-colons 21 | - id: rst-inline-touching-normal 22 | 23 | - repo: https://github.com/codespell-project/codespell 24 | rev: v2.4.1 25 | hooks: 26 | - id: codespell 27 | additional_dependencies: 28 | - tomli 29 | exclude: (\.(svg|png|pdf)$)|(CODE_OF_CONDUCT.md) 30 | 31 | - repo: https://github.com/astral-sh/ruff-pre-commit 32 | rev: v0.14.2 33 | hooks: 34 | - id: ruff 35 | args: [--fix, --exit-non-zero-on-fix, --show-fixes] 36 | - id: ruff-format 37 | 38 | - repo: https://github.com/numpy/numpydoc 39 | rev: v1.9.0 40 | hooks: 41 | - id: numpydoc-validation 42 | exclude: (tests|docs)/.* 43 | 44 | - repo: https://github.com/stefmolin/docstringify 45 | rev: 1.1.1 46 | hooks: 47 | - id: docstringify 48 | files: tests/.* 49 | 50 | - repo: https://github.com/tox-dev/pyproject-fmt 51 | rev: v2.11.0 52 | hooks: 53 | - id: pyproject-fmt 54 | args: [--keep-full-version, --no-print-diff] 55 | -------------------------------------------------------------------------------- /tests/shapes/bases/test_shape.py: -------------------------------------------------------------------------------- 1 | """Test the shape module.""" 2 | 3 | import pytest 4 | 5 | from data_morph.shapes.bases.shape import Shape 6 | 7 | 8 | @pytest.mark.shapes 9 | class TestShapeABC: 10 | """Test the Shape abstract base class (ABC).""" 11 | 12 | def test_is_abc(self): 13 | """Test that Shape class can't be instantiated directly.""" 14 | with pytest.raises(TypeError): 15 | _ = Shape() 16 | 17 | class NewShape(Shape): 18 | """A test shape.""" 19 | 20 | def distance(self, x, y): 21 | """Calculate the distance from the shape to the point.""" 22 | return super().distance(x, y) 23 | 24 | def plot(self, ax=None): 25 | """Plot the shape.""" 26 | return super().plot(ax) 27 | 28 | with pytest.raises(NotImplementedError): 29 | NewShape().distance(0, 0) 30 | 31 | with pytest.raises(NotImplementedError): 32 | NewShape().plot() 33 | 34 | def test_repr(self): 35 | """Test that the __repr__() method is working.""" 36 | 37 | class NewShape(Shape): 38 | """A test shape.""" 39 | 40 | def distance(self, x, y): # pragma: no cover 41 | """Calculate the distance from the shape to the point.""" 42 | return x, y 43 | 44 | def plot(self, ax): # pragma: no cover 45 | """Plot the shape.""" 46 | return ax 47 | 48 | new_shape = NewShape() 49 | assert repr(new_shape) == '' 50 | -------------------------------------------------------------------------------- /src/data_morph/shapes/circles/bullseye.py: -------------------------------------------------------------------------------- 1 | """Bullseye shape.""" 2 | 3 | from __future__ import annotations 4 | 5 | from typing import TYPE_CHECKING 6 | 7 | import numpy as np 8 | 9 | from .rings import Rings 10 | 11 | if TYPE_CHECKING: 12 | from ..data.dataset import Dataset 13 | 14 | 15 | class Bullseye(Rings): 16 | """ 17 | Class representing a bullseye shape comprising two concentric circles. 18 | 19 | .. plot:: 20 | :scale: 75 21 | :caption: 22 | This shape is generated using the panda dataset. 23 | 24 | from data_morph.data.loader import DataLoader 25 | from data_morph.plotting.diagnostics import plot_shape_on_dataset 26 | from data_morph.shapes.circles import Bullseye 27 | 28 | dataset = DataLoader.load_dataset('panda') 29 | shape = Bullseye(dataset) 30 | plot_shape_on_dataset(dataset, shape, show_bounds=False, alpha=0.25) 31 | 32 | See Also 33 | -------- 34 | Circle : The individual rings are represented as circles. 35 | """ 36 | 37 | @staticmethod 38 | def _derive_radii(dataset: Dataset) -> np.ndarray: 39 | """ 40 | Derive the radii for the circles in the bullseye. 41 | 42 | Parameters 43 | ---------- 44 | dataset : Dataset 45 | The starting dataset to morph into. 46 | 47 | Returns 48 | ------- 49 | np.ndarray 50 | The radii for the circles in the bullseye. 51 | """ 52 | stdev = dataset.data[['x', 'y']].std().mean() * 1.5 53 | return np.linspace(stdev, 0, 2, endpoint=False) 54 | -------------------------------------------------------------------------------- /.github/workflows/docs.yml: -------------------------------------------------------------------------------- 1 | # This workflow builds the Data Morph documentation and publishes it using GitHub pages. 2 | # 3 | # Based on https://olgarithms.github.io/sphinx-tutorial/docs/8-automating-documentation-updates.html 4 | # 5 | # Author: Stefanie Molin 6 | 7 | name: Deploy Docs 8 | 9 | on: 10 | push: 11 | branches: [ "main" ] 12 | paths: 13 | - '.github/workflows/docs.yml' 14 | - 'docs/**' 15 | - 'src/**' 16 | - '!src/data_morph/__main__.py' 17 | 18 | concurrency: 19 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 20 | cancel-in-progress: true 21 | 22 | jobs: 23 | docs: 24 | name: Build and deploy docs. 25 | 26 | runs-on: ubuntu-latest 27 | permissions: 28 | contents: write 29 | 30 | steps: 31 | - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 32 | with: 33 | fetch-depth: 0 34 | 35 | - uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0 36 | with: 37 | python-version: "3.x" 38 | 39 | - name: Install dependencies 40 | run: | 41 | pip install . --group docs 42 | - name: Sphinx build 43 | run: | 44 | git worktree add docs/_build/html gh-pages 45 | cd docs 46 | make html 47 | 48 | - name: Deploy docs 49 | uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0 50 | with: 51 | publish_branch: gh-pages 52 | github_token: ${{ secrets.GITHUB_TOKEN }} 53 | publish_dir: docs/_build/html 54 | -------------------------------------------------------------------------------- /docs/cli.rst: -------------------------------------------------------------------------------- 1 | CLI Reference 2 | ============= 3 | 4 | .. sphinx_argparse_cli:: 5 | :module: data_morph.cli 6 | :func: generate_parser 7 | :prog: data-morph 8 | :group_title_prefix: 9 | 10 | ---- 11 | 12 | 13 | Examples 14 | -------- 15 | 16 | 1. Morph the panda shape into a star: 17 | 18 | .. code-block:: console 19 | 20 | $ data-morph --start panda --target star 21 | 22 | 2. Morph the panda shape into all available target shapes distributing the work 23 | to as many worker processes as possible: 24 | 25 | .. code-block:: console 26 | 27 | $ data-morph --start panda --target all --workers 0 28 | 29 | 3. Morph the cat, dog, and panda shapes into the circle and slant_down shapes: 30 | 31 | .. code-block:: console 32 | 33 | $ data-morph --start cat dog panda --target circle slant_down 34 | 35 | 4. Morph the dog shape into upward-slanting lines over 50,000 iterations with seed 1: 36 | 37 | .. code-block:: console 38 | 39 | $ data-morph --start dog --target slant_up --iterations 50000 --seed 1 40 | 41 | 5. Morph the cat shape into a circle, preserving summary statistics to 3 decimal places: 42 | 43 | .. code-block:: console 44 | 45 | $ data-morph --start cat --target circle --decimals 3 46 | 47 | 6. Morph the music shape into a bullseye, specifying the output directory: 48 | 49 | .. code-block:: console 50 | 51 | $ data-morph --start music --target bullseye --output-dir path/to/dir 52 | 53 | 7. Morph the sheep shape into vertical lines, slowly easing in and out for the animation: 54 | 55 | .. code-block:: console 56 | 57 | $ data-morph --start sheep --target v_lines --ease 58 | -------------------------------------------------------------------------------- /src/data_morph/shapes/lines/slant_up.py: -------------------------------------------------------------------------------- 1 | """Slant up lines shape.""" 2 | 3 | from ...data.dataset import Dataset 4 | from ..bases.line_collection import LineCollection 5 | 6 | 7 | class SlantUpLines(LineCollection): 8 | """ 9 | Class for the slant up lines shape. 10 | 11 | .. plot:: 12 | :scale: 75 13 | :caption: 14 | This shape is generated using the panda dataset. 15 | 16 | from data_morph.data.loader import DataLoader 17 | from data_morph.plotting.diagnostics import plot_shape_on_dataset 18 | from data_morph.shapes.lines import SlantUpLines 19 | 20 | dataset = DataLoader.load_dataset('panda') 21 | shape = SlantUpLines(dataset) 22 | plot_shape_on_dataset(dataset, shape, show_bounds=False, alpha=0.25) 23 | 24 | Parameters 25 | ---------- 26 | dataset : Dataset 27 | The starting dataset to morph into other shapes. 28 | """ 29 | 30 | name = 'slant_up' 31 | 32 | def __init__(self, dataset: Dataset) -> None: 33 | x_bounds, y_bounds = dataset.morph_bounds 34 | 35 | xmin, xmax = x_bounds 36 | xmid = xmin + x_bounds.range / 2 37 | x_offset = (xmid - xmin) / 2 38 | 39 | ymin, ymax = y_bounds 40 | ymid = ymin + y_bounds.range / 2 41 | y_offset = (ymid - ymin) / 2 42 | 43 | super().__init__( 44 | [[xmin, ymid], [xmid, ymax]], 45 | [[xmin, ymin + y_offset], [xmid + x_offset, ymax]], 46 | [[xmin, ymin], [xmax, ymax]], 47 | [[xmin + x_offset, ymin], [xmax, ymid + y_offset]], 48 | [[xmid, ymin], [xmax, ymid]], 49 | ) 50 | -------------------------------------------------------------------------------- /docs/classroom-ideas.rst: -------------------------------------------------------------------------------- 1 | Data Morph in the classroom 2 | --------------------------- 3 | 4 | Data Morph is intended to be used as a teaching tool to illustrate the importance 5 | of data visualization. Here are some potential classroom activities for instructors: 6 | 7 | * **Statistics Focus**: Have students pick one of the `built-in datasets 8 | <./api/data_morph.data.loader.html#data_morph.data.loader.DataLoader>`_, 9 | and morph it into all available `target shapes 10 | <./api/data_morph.shapes.factory.html#data_morph.shapes.factory.ShapeFactory>`_. 11 | Ask students to comment on which transformations worked best and why. 12 | * **Creativity Focus**: Have students :doc:`create a new dataset ` 13 | (*e.g.*, your school logo or something that the student designs), and morph that into multiple 14 | `target shapes <./api/data_morph.shapes.factory.html#data_morph.shapes.factory.ShapeFactory>`_. 15 | Ask students to comment on which transformations worked best and why. 16 | * **Math and Coding Focus**: Have students :doc:`create a custom shape ` 17 | by inheriting from :class:`.LineCollection` or :class:`.PointCollection`, and try morphing a 18 | couple of the `built-in datasets <./api/data_morph.data.loader.html#data_morph.data.loader.DataLoader>`_ 19 | into that shape. Ask students to explain how they chose to calculate the shape, and 20 | comment on which transformations worked best and why. 21 | 22 | If you end up using Data Morph in your classroom, I would love to hear about it. Please 23 | `send me a message `_ detailing how you used it and 24 | how it went. 25 | -------------------------------------------------------------------------------- /src/data_morph/shapes/lines/slant_down.py: -------------------------------------------------------------------------------- 1 | """Slant down lines shape.""" 2 | 3 | from ...data.dataset import Dataset 4 | from ..bases.line_collection import LineCollection 5 | 6 | 7 | class SlantDownLines(LineCollection): 8 | """ 9 | Class for the slant down lines shape. 10 | 11 | .. plot:: 12 | :scale: 75 13 | :caption: 14 | This shape is generated using the panda dataset. 15 | 16 | from data_morph.data.loader import DataLoader 17 | from data_morph.plotting.diagnostics import plot_shape_on_dataset 18 | from data_morph.shapes.lines import SlantDownLines 19 | 20 | dataset = DataLoader.load_dataset('panda') 21 | shape = SlantDownLines(dataset) 22 | plot_shape_on_dataset(dataset, shape, show_bounds=False, alpha=0.25) 23 | 24 | Parameters 25 | ---------- 26 | dataset : Dataset 27 | The starting dataset to morph into other shapes. 28 | """ 29 | 30 | name = 'slant_down' 31 | 32 | def __init__(self, dataset: Dataset) -> None: 33 | x_bounds, y_bounds = dataset.morph_bounds 34 | 35 | xmin, xmax = x_bounds 36 | xmid = xmin + x_bounds.range / 2 37 | x_offset = (xmid - xmin) / 2 38 | 39 | ymin, ymax = y_bounds 40 | ymid = ymin + y_bounds.range / 2 41 | y_offset = (ymid - ymin) / 2 42 | 43 | super().__init__( 44 | [[xmin, ymid], [xmid, ymin]], 45 | [[xmin, ymid + y_offset], [xmid + x_offset, ymin]], 46 | [[xmin, ymax], [xmax, ymin]], 47 | [[xmin + x_offset, ymax], [xmax, ymin + y_offset]], 48 | [[xmid, ymax], [xmax, ymid]], 49 | ) 50 | -------------------------------------------------------------------------------- /tests/plotting/test_diagnostics.py: -------------------------------------------------------------------------------- 1 | """Test the diagnostics module.""" 2 | 3 | import pytest 4 | from matplotlib.axes import Axes 5 | from matplotlib.patches import Rectangle 6 | 7 | from data_morph.data.loader import DataLoader 8 | from data_morph.plotting.diagnostics import plot_shape_on_dataset 9 | from data_morph.shapes.bases.line_collection import LineCollection 10 | from data_morph.shapes.bases.point_collection import PointCollection 11 | from data_morph.shapes.factory import ShapeFactory 12 | 13 | 14 | @pytest.mark.parametrize( 15 | ('dataset_name', 'shape_name', 'show_bounds', 'alpha'), 16 | [ 17 | ('panda', 'heart', True, 0.4), 18 | ('music', 'rectangle', False, 0.25), 19 | ('sheep', 'circle', False, 0.5), 20 | ], 21 | ) 22 | def test_plot_shape_on_dataset(dataset_name, shape_name, show_bounds, alpha): 23 | """Test the plot_shape_on_dataset() function.""" 24 | dataset = DataLoader.load_dataset(dataset_name) 25 | shape = ShapeFactory(dataset).generate_shape(shape_name) 26 | ax = plot_shape_on_dataset(dataset, shape, show_bounds, alpha) 27 | 28 | assert isinstance(ax, Axes) 29 | assert not ax.get_title() 30 | 31 | assert ax.collections[0].get_alpha() == alpha 32 | 33 | points_expected = dataset.data.shape[0] 34 | if isinstance(shape, PointCollection): 35 | points_expected += shape.points.shape[0] 36 | 37 | points_plotted = sum( 38 | collection.get_offsets().data.shape[0] for collection in ax.collections 39 | ) 40 | assert points_expected == points_plotted 41 | 42 | if isinstance(shape, LineCollection): 43 | assert len(ax.lines) == len(shape.lines) 44 | 45 | if show_bounds: 46 | assert sum(isinstance(patch, Rectangle) for patch in ax.patches) == 3 47 | -------------------------------------------------------------------------------- /src/data_morph/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Data Morph. 3 | 4 | Morph an input dataset of 2D points into select shapes, while preserving the summary 5 | statistics to a given number of decimal points through simulated annealing. It is intended 6 | to be used as a teaching tool to illustrate the importance of data visualization (see 7 | `Data Morph in the classroom `_ 8 | for ideas). 9 | 10 | Notes 11 | ----- 12 | This code has been altered by Stefanie Molin to work for other input datasets 13 | by parameterizing the target shapes with information from the input shape. 14 | The original code works for a specific dataset called the "Datasaurus" and was created 15 | for the paper *Same Stats, Different Graphs: Generating Datasets with Varied Appearance and 16 | Identical Statistics through Simulated Annealing* by Justin Matejka and George Fitzmaurice 17 | (ACM CHI 2017). 18 | 19 | The paper and video can be found on the `Autodesk Research website 20 | `_. 21 | The version of the code placed on GitHub at 22 | `jmatejka/same-stats-different-graphs `_, 23 | served as the starting point for the Data Morph codebase, which is on GitHub at 24 | `stefmolin/data-morph `_. 25 | 26 | Read more about the creation of Data Morph in `this article 27 | `_ 28 | and `this slide deck `_. 29 | """ 30 | 31 | __version__ = '0.4.0.dev0' 32 | MAIN_DIR = __name__ 33 | -------------------------------------------------------------------------------- /tests/plotting/test_style.py: -------------------------------------------------------------------------------- 1 | """Test the static module.""" 2 | 3 | import filecmp 4 | 5 | import matplotlib.pyplot as plt 6 | import pytest 7 | 8 | from data_morph.plotting.style import plot_with_custom_style, style_context 9 | 10 | pytestmark = pytest.mark.plotting 11 | 12 | 13 | def save_plot(filepath): 14 | """Save a generic plot to a filepath for testing.""" 15 | fig, ax = plt.subplots() 16 | ax.plot([0, 1]) 17 | fig.savefig(filepath) 18 | plt.close() 19 | 20 | 21 | @pytest.mark.parametrize('as_decorator', [False, True]) 22 | def test_style_context_manager(tmp_path, as_decorator): 23 | """Test that style_context() can be used as a context manager.""" 24 | no_style = tmp_path / 'original.png' 25 | styled = tmp_path / 'styled.png' 26 | 27 | save_plot(no_style) 28 | 29 | if as_decorator: 30 | 31 | @style_context() 32 | def style_plot(): 33 | """Generate a generic plot using the style context manager.""" 34 | return save_plot(styled) 35 | 36 | style_plot() 37 | else: 38 | with style_context(): 39 | save_plot(styled) 40 | 41 | assert not filecmp.cmp(no_style, styled, shallow=False) 42 | 43 | 44 | def test_plot_with_custom_style(tmp_path): 45 | """Test that the plot_with_custom_style decorator is working.""" 46 | no_style = tmp_path / 'original.png' 47 | styled = tmp_path / 'styled.png' 48 | 49 | save_plot(no_style) 50 | 51 | @plot_with_custom_style 52 | def style_plot(): 53 | """Generate a generic plot using the style context manager and wraps.""" 54 | return save_plot(styled) 55 | 56 | style_plot() 57 | 58 | assert plot_with_custom_style.__doc__ != style_plot.__doc__ 59 | assert not filecmp.cmp(no_style, styled, shallow=False) 60 | -------------------------------------------------------------------------------- /tests/shapes/points/test_dots_grid.py: -------------------------------------------------------------------------------- 1 | """Test the dots_grid module.""" 2 | 3 | import numpy as np 4 | import pytest 5 | 6 | from .bases import PointsModuleTestBase 7 | 8 | pytestmark = [pytest.mark.shapes, pytest.mark.points] 9 | 10 | 11 | class TestDotsGrid(PointsModuleTestBase): 12 | """Test the DotsGrid class.""" 13 | 14 | shape_name = 'dots' 15 | distance_test_cases = (((20, 50), 0.0), ((30, 60), 3.640055)) 16 | expected_point_count = 9 17 | 18 | def test_init(self, shape): 19 | """Test that the shape consists of the correct number points.""" 20 | num_unique_points, *_ = np.unique(shape.points, axis=0).shape 21 | assert num_unique_points == self.expected_point_count 22 | 23 | def test_points_form_symmetric_grid(self, shape): 24 | """Test that the points form a 3x3 symmetric grid.""" 25 | points = sorted(shape.points.tolist()) 26 | 27 | top_row = points[:3] 28 | middle_row = points[3:6] 29 | bottom_row = points[6:] 30 | 31 | # check x values 32 | for row in [top_row, middle_row, bottom_row]: 33 | # check x values are the same for all points in the column 34 | assert row[0][0] == row[1][0] == row[2][0] 35 | 36 | # check that the middle column is truly in the middle 37 | col_midpoint = (row[0][0] + row[2][0]) / 2 38 | assert col_midpoint == row[1][0] 39 | 40 | # check y values 41 | for point in range(3): 42 | # check y values are the same for all points in the column 43 | assert top_row[point][1] == middle_row[point][1] == bottom_row[point][1] 44 | 45 | # check that the middle row is truly in the middle 46 | row_midpoint = (top_row[point][1] + bottom_row[point][1]) / 2 47 | assert row_midpoint == middle_row[point][1] 48 | -------------------------------------------------------------------------------- /bin/ci.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Call this script with the names of files that have changed to get the 4 | datasets and shapes to test with the CLI. 5 | 6 | Examples 7 | -------- 8 | 9 | $ python bin/ci.py src/data_morph/shapes/circles.py 10 | bullseye circle rings 11 | 12 | $ python bin/ci.py src/data_morph/shapes/bases/line_collection.py 13 | high_lines h_lines slant_down slant_up v_lines wide_lines x diamond rectangle star 14 | 15 | $ python bin/ci.py src/data_morph/shapes/points/heart.py 16 | heart spade 17 | 18 | $ python bin/ci.py src/data_morph/data/starter_shapes/superdatascience.csv 19 | SDS 20 | """ 21 | 22 | import sys 23 | from pathlib import Path 24 | 25 | from data_morph.data.loader import DataLoader 26 | from data_morph.shapes.factory import ShapeFactory 27 | 28 | new_paths = sys.argv[1:] 29 | 30 | args = set() 31 | 32 | # Figure out argument of datasets based on .csv filename 33 | for dataset, filename in DataLoader._DATASETS.items(): 34 | for new_file in new_paths: 35 | if filename in new_file: 36 | args.add(dataset) 37 | 38 | # Figure out argument of shapes based on .py filename 39 | new_files = [Path(x).name for x in new_paths] 40 | for shape, shape_cls in ShapeFactory._SHAPE_MAPPING.items(): 41 | # Find the class and all parent classes and get their module name 42 | # We get the module name because it ends in the Python file without .py extension 43 | mro = [ 44 | x.__module__ for x in shape_cls.__mro__ if x.__module__.startswith('data_morph') 45 | ] 46 | 47 | if shape == 'spade': 48 | mro.append('heart') 49 | 50 | all_modules = [f'{x}.py' for x in mro] 51 | 52 | for new_file in new_files: 53 | for module in all_modules: 54 | if module.endswith(new_file): 55 | args.add(shape) 56 | break 57 | 58 | print(' '.join(args)) 59 | -------------------------------------------------------------------------------- /src/data_morph/data/stats.py: -------------------------------------------------------------------------------- 1 | """Utility functions for calculating summary statistics.""" 2 | 3 | from __future__ import annotations 4 | 5 | from typing import TYPE_CHECKING, NamedTuple 6 | 7 | if TYPE_CHECKING: 8 | from collections.abc import Generator 9 | 10 | import pandas as pd 11 | 12 | 13 | class SummaryStatistics(NamedTuple): 14 | """Named tuple containing the summary statistics for plotting/analysis.""" 15 | 16 | x_mean: float 17 | y_mean: float 18 | 19 | x_stdev: float 20 | y_stdev: float 21 | 22 | correlation: float 23 | 24 | x_median: float | None 25 | y_median: float | None 26 | 27 | def __iter__(self) -> Generator[float, None, None]: 28 | for statistic in self._fields: 29 | if (value := getattr(self, statistic)) is not None: 30 | yield value 31 | 32 | 33 | def get_summary_statistics(data: pd.DataFrame, with_median: bool) -> SummaryStatistics: 34 | """ 35 | Calculate the summary statistics for the given set of points. 36 | 37 | Parameters 38 | ---------- 39 | data : pandas.DataFrame 40 | A dataset with columns ``x`` and ``y``. 41 | with_median : bool 42 | Whether to include the median of ``x`` and ``y``. 43 | 44 | Returns 45 | ------- 46 | SummaryStatistics 47 | Named tuple consisting of mean and standard deviations of ``x`` and ``y``, 48 | along with the Pearson correlation coefficient between the two, and optionally, 49 | the median of ``x`` and ``y``. 50 | """ 51 | return SummaryStatistics( 52 | x_mean=data.x.mean(), 53 | y_mean=data.y.mean(), 54 | x_stdev=data.x.std(), 55 | y_stdev=data.y.std(), 56 | correlation=data.corr().x.y, 57 | x_median=data.x.median() if with_median else None, 58 | y_median=data.y.median() if with_median else None, 59 | ) 60 | -------------------------------------------------------------------------------- /src/data_morph/shapes/lines/star.py: -------------------------------------------------------------------------------- 1 | """Star shape.""" 2 | 3 | import itertools 4 | 5 | from ...data.dataset import Dataset 6 | from ..bases.line_collection import LineCollection 7 | 8 | 9 | class Star(LineCollection): 10 | """ 11 | Class for the star shape. 12 | 13 | .. plot:: 14 | :scale: 75 15 | :caption: 16 | This shape is generated using the panda dataset. 17 | 18 | from data_morph.data.loader import DataLoader 19 | from data_morph.plotting.diagnostics import plot_shape_on_dataset 20 | from data_morph.shapes.lines import Star 21 | 22 | dataset = DataLoader.load_dataset('panda') 23 | shape = Star(dataset) 24 | plot_shape_on_dataset(dataset, shape, show_bounds=False, alpha=0.25) 25 | 26 | Parameters 27 | ---------- 28 | dataset : Dataset 29 | The starting dataset to morph into other shapes. 30 | """ 31 | 32 | def __init__(self, dataset: Dataset) -> None: 33 | bounds = dataset.morph_bounds.clone() 34 | bounds.align_aspect_ratio(shrink=True) 35 | 36 | (xmin, xmax), (ymin, ymax) = bounds 37 | x_range, y_range = bounds.range 38 | 39 | pts = [ 40 | [xmin, ymin + y_range * 0.625], 41 | [xmin + x_range * 0.375, ymin + y_range * 0.625], 42 | [xmin + x_range * 0.5, ymax], 43 | [xmin + x_range * 0.625, ymin + y_range * 0.625], 44 | [xmax, ymin + y_range * 0.625], 45 | [xmin + x_range * 0.6875, ymin + y_range * 0.375], 46 | [xmin + x_range * 0.8125, ymin], 47 | [xmin + x_range * 0.5, ymin + y_range * 0.25], 48 | [xmin + x_range * 0.1875, ymin], 49 | [xmin + x_range * 0.3125, ymin + y_range * 0.375], 50 | [xmin, ymin + y_range * 0.625], 51 | ] 52 | 53 | super().__init__(*list(itertools.pairwise(pts))) 54 | -------------------------------------------------------------------------------- /tests/shapes/test_factory.py: -------------------------------------------------------------------------------- 1 | """Test the factory module.""" 2 | 3 | import matplotlib.pyplot as plt 4 | import pytest 5 | 6 | 7 | @pytest.mark.shapes 8 | class TestShapeFactory: 9 | """Test the ShapeFactory class.""" 10 | 11 | def test_generate_shape(self, shape_factory): 12 | """Test the generate_shape() method on a valid shape.""" 13 | for shape_name, shape_type in shape_factory._SHAPE_MAPPING.items(): 14 | shape = shape_factory.generate_shape(shape_name) 15 | assert isinstance(shape, shape_type) 16 | assert shape_name == str(shape) 17 | 18 | def test_generate_shape_error(self, shape_factory): 19 | """Test the generate_shape() method on a non-existent shape.""" 20 | with pytest.raises(ValueError, match='No such shape'): 21 | _ = shape_factory.generate_shape('does not exist') 22 | 23 | @pytest.mark.parametrize('subset', [4, 5, 8, 10, None]) 24 | def test_plot_available_shapes(self, shape_factory, monkeypatch, subset): 25 | """Test the plot_available_shapes() method.""" 26 | if subset: 27 | monkeypatch.setattr( 28 | shape_factory, 29 | 'AVAILABLE_SHAPES', 30 | shape_factory.AVAILABLE_SHAPES[:subset], 31 | ) 32 | 33 | axs = shape_factory.plot_available_shapes() 34 | if subset is None or subset > 5: 35 | assert len(axs) > 1 36 | else: 37 | assert len(axs) == axs.size 38 | 39 | populated_axs = [ax for ax in axs.flatten() if ax.get_figure()] 40 | assert len(populated_axs) == len(shape_factory.AVAILABLE_SHAPES) 41 | assert all(ax.get_xlabel() == ax.get_ylabel() == '' for ax in populated_axs) 42 | assert {ax.get_title() for ax in populated_axs} == set( 43 | shape_factory.AVAILABLE_SHAPES 44 | ) 45 | plt.close() 46 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # ignore results when running from this directory 2 | morphed_data/ 3 | 4 | # dev setup 5 | .vscode/ 6 | .idea/ 7 | 8 | # Byte-compiled / optimized / DLL files 9 | __pycache__/ 10 | *.py[cod] 11 | *$py.class 12 | 13 | # C extensions 14 | *.so 15 | 16 | # Distribution / packaging 17 | .Python 18 | env/ 19 | build/ 20 | develop-eggs/ 21 | dist/ 22 | downloads/ 23 | eggs/ 24 | .eggs/ 25 | lib/ 26 | lib64/ 27 | parts/ 28 | sdist/ 29 | var/ 30 | wheels/ 31 | *.egg-info/ 32 | .installed.cfg 33 | *.egg 34 | docs/api/ 35 | 36 | # PyInstaller 37 | # Usually these files are written by a python script from a template 38 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 39 | *.manifest 40 | *.spec 41 | 42 | # Installer logs 43 | pip-log.txt 44 | pip-delete-this-directory.txt 45 | 46 | # Unit test / coverage reports 47 | htmlcov/ 48 | .tox/ 49 | .coverage 50 | .coverage.* 51 | .cache 52 | nosetests.xml 53 | coverage.xml 54 | *.cover 55 | .hypothesis/ 56 | 57 | # Translations 58 | *.mo 59 | *.pot 60 | 61 | # Django stuff: 62 | *.log 63 | local_settings.py 64 | 65 | # Flask stuff: 66 | instance/ 67 | .webassets-cache 68 | 69 | # Scrapy stuff: 70 | .scrapy 71 | 72 | # Sphinx documentation 73 | docs/_build/ 74 | 75 | # PyBuilder 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # pyenv 82 | .python-version 83 | 84 | # celery beat schedule file 85 | celerybeat-schedule 86 | 87 | # SageMath parsed files 88 | *.sage.py 89 | 90 | # dotenv 91 | .env 92 | 93 | # virtualenv 94 | .venv 95 | venv/ 96 | ENV/ 97 | 98 | # Spyder project settings 99 | .spyderproject 100 | .spyproject 101 | 102 | # Rope project settings 103 | .ropeproject 104 | 105 | # mkdocs documentation 106 | /site 107 | 108 | # mypy 109 | .mypy_cache/ 110 | 111 | # misc 112 | .DS_Store 113 | -------------------------------------------------------------------------------- /src/data_morph/plotting/style.py: -------------------------------------------------------------------------------- 1 | """Utility functions for styling Matplotlib plots.""" 2 | 3 | from collections.abc import Callable, Generator 4 | from contextlib import contextmanager 5 | from functools import wraps 6 | from importlib.resources import as_file, files 7 | from pathlib import Path 8 | from typing import Any 9 | 10 | import matplotlib.pyplot as plt 11 | 12 | from .. import MAIN_DIR 13 | 14 | 15 | @contextmanager 16 | def style_context() -> Generator[None, None, None]: 17 | """Context manager for plotting in a custom style.""" 18 | 19 | style = files(MAIN_DIR).joinpath( 20 | Path('plotting') / 'config' / 'plot_style.mplstyle' 21 | ) 22 | with ( 23 | as_file(style) as style_path, 24 | plt.style.context(['seaborn-v0_8-darkgrid', style_path]), 25 | ): 26 | yield 27 | 28 | 29 | def plot_with_custom_style(plotting_function: Callable) -> Callable: 30 | """ 31 | Wrap a plotting function with a context manager to set the plot style. 32 | 33 | Parameters 34 | ---------- 35 | plotting_function : Callable 36 | The plotting function. 37 | 38 | Returns 39 | ------- 40 | Callable 41 | The decorated plotting function. 42 | """ 43 | 44 | @wraps(plotting_function) 45 | @style_context() 46 | def plot_in_style(*args: Any, **kwargs: Any) -> Any: # noqa: ANN401 47 | """ 48 | Use a context manager to set the plot style before running 49 | the plotting function. 50 | 51 | Parameters 52 | ---------- 53 | *args 54 | Positional arguments to pass to the plotting function. 55 | **kwargs 56 | Keyword arguments to pass to the plotting function. 57 | 58 | Returns 59 | ------- 60 | any 61 | Output of calling the plotting function. 62 | """ 63 | return plotting_function(*args, **kwargs) 64 | 65 | return plot_in_style 66 | -------------------------------------------------------------------------------- /src/data_morph/shapes/points/heart.py: -------------------------------------------------------------------------------- 1 | """Heart shape.""" 2 | 3 | import numpy as np 4 | 5 | from ...data.dataset import Dataset 6 | from ..bases.point_collection import PointCollection 7 | 8 | 9 | class Heart(PointCollection): 10 | """ 11 | Class for the heart shape. 12 | 13 | .. plot:: 14 | :scale: 75 15 | :caption: 16 | This shape is generated using the panda dataset. 17 | 18 | from data_morph.data.loader import DataLoader 19 | from data_morph.plotting.diagnostics import plot_shape_on_dataset 20 | from data_morph.shapes.points import Heart 21 | 22 | dataset = DataLoader.load_dataset('panda') 23 | shape = Heart(dataset) 24 | plot_shape_on_dataset(dataset, shape, show_bounds=False, alpha=0.25) 25 | 26 | Parameters 27 | ---------- 28 | dataset : Dataset 29 | The starting dataset to morph into other shapes. 30 | 31 | Notes 32 | ----- 33 | The formula for the heart shape is inspired by 34 | `Heart Curve `_: 35 | 36 | Weisstein, Eric W. "Heart Curve." From `MathWorld `_ 37 | --A Wolfram Web Resource. https://mathworld.wolfram.com/HeartCurve.html 38 | """ 39 | 40 | def __init__(self, dataset: Dataset) -> None: 41 | data_bounds = dataset.data_bounds 42 | (_, xmax), (_, ymax) = data_bounds 43 | x_shift, y_shift = data_bounds.center 44 | 45 | t = np.linspace(-3, 3, num=80) 46 | 47 | x = 16 * np.sin(t) ** 3 48 | y = 13 * np.cos(t) - 5 * np.cos(2 * t) - 2 * np.cos(3 * t) - np.cos(4 * t) 49 | 50 | # scale by the half the widest width of the heart 51 | scale_factor = min((xmax - x_shift), (ymax - y_shift)) / 16 52 | 53 | super().__init__( 54 | *self._center( 55 | np.stack([x * scale_factor, y * scale_factor], axis=1), data_bounds 56 | ) 57 | ) 58 | -------------------------------------------------------------------------------- /docs/post_build.py: -------------------------------------------------------------------------------- 1 | """Maintains the stable docs on the latest version; groups others together.""" 2 | 3 | import shutil 4 | from pathlib import Path 5 | 6 | from packaging.version import parse as parse_version 7 | 8 | import data_morph 9 | 10 | # information on where temporary and permanent files will go 11 | build_html_dir = Path('_build') / 'html' 12 | tmp_build = Path('_build') / '_tmp' 13 | 14 | 15 | def determine_versions(): 16 | """Determine stable/dev/etc. and docs version number.""" 17 | last_minor_release = sorted( 18 | [ 19 | parse_version(directory.name) 20 | for directory in Path().glob(f'{build_html_dir}/[0-9].[0-9]/') 21 | ] 22 | or [parse_version('0.0')] 23 | )[-1] 24 | docs_version = parse_version(data_morph.__version__) 25 | docs_version_group = parse_version(f'{docs_version.major}.{docs_version.minor}') 26 | 27 | if docs_version.is_devrelease: 28 | version_match = 'dev' 29 | elif docs_version_group >= last_minor_release: 30 | version_match = 'stable' 31 | else: 32 | version_match = f'{docs_version.major}.{docs_version.minor}' 33 | return version_match, docs_version_group 34 | 35 | 36 | if __name__ == '__main__': 37 | version_match, docs_version_group = determine_versions() 38 | 39 | # clean up the old version 40 | if (old_build := build_html_dir / version_match).exists(): 41 | shutil.rmtree(old_build) 42 | 43 | # move html files to proper spot 44 | tmp_html = tmp_build / 'html' 45 | for file in tmp_html.glob('*'): 46 | file.rename(tmp_build / file.name) 47 | tmp_html.rmdir() 48 | 49 | build = build_html_dir / version_match 50 | shutil.move(tmp_build, build) 51 | 52 | if version_match == 'stable': 53 | shutil.copytree( 54 | build, 55 | build_html_dir / str(docs_version_group), 56 | dirs_exist_ok=True, 57 | ) 58 | print(f'Build finished. The HTML pages are in {build}.') 59 | -------------------------------------------------------------------------------- /src/data_morph/shapes/points/spiral.py: -------------------------------------------------------------------------------- 1 | """Spiral shape.""" 2 | 3 | import numpy as np 4 | 5 | from ...data.dataset import Dataset 6 | from ..bases.point_collection import PointCollection 7 | 8 | 9 | class Spiral(PointCollection): 10 | """ 11 | Class for the spiral shape. 12 | 13 | .. plot:: 14 | :scale: 75 15 | :caption: 16 | This shape is generated using the panda dataset. 17 | 18 | from data_morph.data.loader import DataLoader 19 | from data_morph.plotting.diagnostics import plot_shape_on_dataset 20 | from data_morph.shapes.points import Spiral 21 | 22 | dataset = DataLoader.load_dataset('panda') 23 | shape = Spiral(dataset) 24 | plot_shape_on_dataset(dataset, shape, show_bounds=False, alpha=0.15) 25 | 26 | Parameters 27 | ---------- 28 | dataset : Dataset 29 | The starting dataset to morph into other shapes. 30 | 31 | Notes 32 | ----- 33 | This shape uses the formula for an `Archimedean spiral 34 | `_. 35 | """ 36 | 37 | def __init__(self, dataset: Dataset) -> None: 38 | max_radius = min(*dataset.morph_bounds.range) / 2 39 | 40 | x_center, y_center = dataset.data_bounds.center 41 | x_range, y_range = dataset.data_bounds.range 42 | num_rotations = 3 if x_range >= y_range else 3.25 43 | 44 | # progress of the spiral growing wider (0 to 100%) 45 | t = np.concatenate( 46 | [ 47 | np.linspace(0, 0.1, 3, endpoint=False), 48 | np.linspace(0.1, 0.2, 5, endpoint=False), 49 | np.linspace(0.2, 0.5, 25, endpoint=False), 50 | np.linspace(0.5, 0.75, 30, endpoint=False), 51 | np.linspace(0.75, 1, 35, endpoint=True), 52 | ] 53 | ) 54 | 55 | # x and y calculations for a spiral 56 | x = (t * max_radius) * np.cos(2 * num_rotations * np.pi * t) + x_center 57 | y = (t * max_radius) * np.sin(2 * num_rotations * np.pi * t) + y_center 58 | 59 | super().__init__(*np.stack([x, y], axis=1)) 60 | -------------------------------------------------------------------------------- /src/data_morph/shapes/points/scatter.py: -------------------------------------------------------------------------------- 1 | """Scatter shape.""" 2 | 3 | from numbers import Number 4 | 5 | import numpy as np 6 | 7 | from ...data.dataset import Dataset 8 | from ..bases.point_collection import PointCollection 9 | 10 | 11 | class Scatter(PointCollection): 12 | """ 13 | Class for the scatter shape: a cloud of randomly-scattered points. 14 | 15 | .. plot:: 16 | :scale: 75 17 | :caption: 18 | This shape is generated using the panda dataset. 19 | 20 | from data_morph.data.loader import DataLoader 21 | from data_morph.plotting.diagnostics import plot_shape_on_dataset 22 | from data_morph.shapes.points import Scatter 23 | 24 | dataset = DataLoader.load_dataset('panda') 25 | shape = Scatter(dataset) 26 | plot_shape_on_dataset(dataset, shape, show_bounds=False, alpha=0.1) 27 | 28 | Parameters 29 | ---------- 30 | dataset : Dataset 31 | The starting dataset to morph into other shapes. 32 | """ 33 | 34 | def __init__(self, dataset: Dataset) -> None: 35 | rng = np.random.default_rng(1) 36 | morph_range = dataset.morph_bounds.range 37 | center = dataset.morph_bounds.center 38 | points = [center] 39 | points.extend( 40 | [ 41 | ( 42 | center[0] + np.cos(angle) * rng.uniform(0, morph_range[0] / 2), 43 | center[1] + np.sin(angle) * rng.uniform(0, morph_range[1] / 2), 44 | ) 45 | for angle in np.linspace(0, 720, num=100, endpoint=False) 46 | ] 47 | ) 48 | super().__init__(*points) 49 | 50 | self._alpha = 0.4 51 | 52 | def distance(self, x: Number, y: Number) -> int: 53 | """ 54 | No-op that allows returns 0 so that all perturbations are accepted. 55 | 56 | Parameters 57 | ---------- 58 | x, y : int or float 59 | Coordinates of a point in 2D space. 60 | 61 | Returns 62 | ------- 63 | int 64 | Always returns 0 to allow for scattering of the points. 65 | """ 66 | return 0 67 | -------------------------------------------------------------------------------- /tests/plotting/test_static.py: -------------------------------------------------------------------------------- 1 | """Test the static module.""" 2 | 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | import pytest 6 | 7 | from data_morph.plotting.static import plot 8 | 9 | pytestmark = pytest.mark.plotting 10 | 11 | 12 | @pytest.mark.parametrize( 13 | ('file_path', 'with_median', 'classic'), 14 | [ 15 | ('test_plot.png', False, True), 16 | (None, True, True), 17 | (None, False, True), 18 | (None, True, False), 19 | (None, False, False), 20 | ], 21 | ) 22 | def test_plot(sample_data, tmp_path, file_path, with_median, classic): 23 | """Test static plot creation.""" 24 | bounds = (-5.0, 105.0) 25 | 26 | marginals = ( 27 | None if classic else (np.histogram(sample_data.x), np.histogram(sample_data.y)) 28 | ) 29 | 30 | if file_path: 31 | save_to = tmp_path / 'another-level' / file_path 32 | 33 | plot( 34 | data=sample_data, 35 | x_bounds=bounds, 36 | y_bounds=bounds, 37 | save_to=save_to, 38 | decimals=2, 39 | with_median=with_median, 40 | marginals=marginals, 41 | ) 42 | assert save_to.is_file() 43 | 44 | else: 45 | ax = plot( 46 | data=sample_data, 47 | x_bounds=bounds, 48 | y_bounds=bounds, 49 | save_to=None, 50 | decimals=2, 51 | with_median=with_median, 52 | marginals=marginals, 53 | ) 54 | 55 | # confirm that the stylesheet was used 56 | assert ax.texts[0].get_fontfamily() == ['monospace'] 57 | 58 | # confirm that bounds are correct 59 | assert ax.get_xlim() == bounds 60 | assert ax.get_ylim() == bounds 61 | 62 | # confirm that the right number of stats was drawn 63 | expected_stats = 7 if with_median else 5 64 | expected_texts = 2 * expected_stats # label and the number 65 | assert len(ax.texts) == expected_texts 66 | 67 | # if marginals should be there, check for two inset Axes 68 | expected_insets = 0 if classic else 2 69 | inset_axes = [ 70 | child for child in ax.get_children() if isinstance(child, plt.Axes) 71 | ] 72 | assert len(inset_axes) == expected_insets 73 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | # This workflow builds Data Morph on Mac, Linux, and Windows for 2 | # multiple versions of Python to confirm it can be properly installed. 3 | # 4 | # For more information see https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python 5 | # 6 | # Author: Stefanie Molin 7 | 8 | name: CI 9 | 10 | on: 11 | push: 12 | branches: [ "main" ] 13 | paths: 14 | - '**' 15 | - '!bin/**' 16 | - '!docs/**' 17 | - '!.github/**' 18 | - '.github/workflows/ci.yml' 19 | tags: [ "v*" ] 20 | 21 | pull_request: 22 | paths: 23 | - '**' 24 | - '!docs/**' 25 | - '!.github/**' 26 | - '.github/workflows/ci.yml' 27 | 28 | concurrency: 29 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 30 | cancel-in-progress: true 31 | 32 | jobs: 33 | build: 34 | name: Build with Python ${{ matrix.python-version }} on ${{ matrix.os }} 35 | 36 | # The type of runner that the job will run on 37 | runs-on: ${{ matrix.os }} 38 | 39 | defaults: 40 | run: 41 | shell: bash -e {0} 42 | 43 | strategy: 44 | fail-fast: false 45 | matrix: 46 | os: [macos-latest, ubuntu-latest, windows-latest] 47 | python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"] 48 | 49 | env: 50 | MPLBACKEND: Agg # non-interactive backend for matplotlib 51 | 52 | steps: 53 | - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 54 | 55 | - name: Set up Python ${{ matrix.python-version }} 56 | uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0 57 | with: 58 | python-version: ${{ matrix.python-version }} 59 | 60 | - name: Install Data Morph 61 | run: | 62 | python -m pip install --upgrade pip 63 | python -m pip install setuptools --upgrade 64 | python -m pip install . 65 | 66 | - name: Check entry point 67 | run: data-morph --version 68 | 69 | - name: Install testing dependencies 70 | run: python -m pip install --group test 71 | 72 | - name: Run tests 73 | run: pytest 74 | 75 | - name: Upload coverage reports to Codecov 76 | uses: codecov/codecov-action@5a1091511ad55cbe89839c7260b706298ca349f7 # v5.5.1 77 | env: 78 | CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} 79 | -------------------------------------------------------------------------------- /tests/shapes/circles/bases.py: -------------------------------------------------------------------------------- 1 | """Base test class for circle shapes.""" 2 | 3 | from __future__ import annotations 4 | 5 | import re 6 | from typing import TYPE_CHECKING 7 | 8 | import matplotlib.pyplot as plt 9 | import numpy as np 10 | import pytest 11 | 12 | from data_morph.shapes.circles import Circle 13 | 14 | if TYPE_CHECKING: 15 | from numbers import Number 16 | 17 | CIRCLE_REPR = r'' 18 | 19 | 20 | class CirclesModuleTestBase: 21 | """Base for testing circle shapes.""" 22 | 23 | shape_name: str 24 | distance_test_cases: tuple[tuple[tuple[Number], float]] 25 | repr_regex: str 26 | 27 | @pytest.fixture(scope='class') 28 | def shape(self, shape_factory): 29 | """Fixture to get the shape for testing.""" 30 | return shape_factory.generate_shape(self.shape_name) 31 | 32 | def test_distance(self, shape, test_point, expected_distance): 33 | """ 34 | Test the distance() method parametrized by distance_test_cases 35 | (see conftest.py). 36 | """ 37 | actual_distance = shape.distance(*test_point) 38 | assert pytest.approx(actual_distance) == expected_distance 39 | 40 | def test_repr(self, shape): 41 | """Test that the __repr__() method is working.""" 42 | assert re.match(self.repr_regex, repr(shape)) is not None 43 | 44 | @pytest.mark.parametrize('ax', [None, plt.subplots()[1]]) 45 | def test_plot(self, shape, ax): 46 | """Test that the plot() method is working.""" 47 | if ax: 48 | ax.clear() 49 | 50 | plot_ax = shape.plot(ax) 51 | if ax: 52 | assert plot_ax is ax 53 | else: 54 | assert plot_ax is not ax 55 | 56 | plotted_circles = plot_ax.patches 57 | plotted_centers = {plotted_circle._center for plotted_circle in plotted_circles} 58 | plotted_radii = { 59 | plotted_circle._width / 2 for plotted_circle in plotted_circles 60 | } 61 | 62 | if isinstance(shape, Circle): 63 | assert len(plotted_circles) == 1 64 | assert plotted_centers == {shape.center} 65 | assert plotted_radii == {shape.radius} 66 | else: 67 | assert len(plotted_circles) == len(shape.circles) 68 | assert plotted_centers == {tuple(np.unique(shape._centers))} 69 | assert plotted_radii.difference(shape._radii) == set() 70 | 71 | plt.close() 72 | -------------------------------------------------------------------------------- /tests/shapes/bases/test_point_collection.py: -------------------------------------------------------------------------------- 1 | """Test point_collection module.""" 2 | 3 | import re 4 | 5 | import matplotlib.pyplot as plt 6 | import numpy as np 7 | import pytest 8 | 9 | from data_morph.bounds.bounding_box import BoundingBox 10 | from data_morph.shapes.bases.point_collection import PointCollection 11 | 12 | 13 | @pytest.mark.points 14 | @pytest.mark.shapes 15 | class TestPointCollection: 16 | """Test the PointCollection class.""" 17 | 18 | @pytest.fixture(scope='class') 19 | def point_collection(self): 20 | """An instance of PointCollection.""" 21 | return PointCollection([0, 0], [20, 50]) 22 | 23 | @pytest.mark.parametrize( 24 | 'bounding_box', 25 | [BoundingBox([0, 100], [-50, 50]), BoundingBox([0, 20], [0, 50])], 26 | ) 27 | def test_center(self, point_collection, bounding_box): 28 | """Test that points are centered within the bounding box.""" 29 | points = point_collection._center(point_collection.points, bounding_box) 30 | 31 | (xmin, xmax), (ymin, ymax) = bounding_box 32 | upper = np.array([xmax, ymax]) - points.max(axis=0) 33 | lower = points.min(axis=0) - np.array([xmin, ymin]) 34 | assert np.array_equal(upper, lower) 35 | 36 | def test_distance_zero(self, point_collection): 37 | """Test the distance() method on points in the collection.""" 38 | for point in point_collection.points: 39 | assert point_collection.distance(*point) == 0 40 | 41 | @pytest.mark.parametrize( 42 | ('point', 'expected_distance'), [((-1, 0), 1), ((-1, -1), 1.414214)], ids=str 43 | ) 44 | def test_distance_nonzero(self, point_collection, point, expected_distance): 45 | """Test the distance() method on points not in the collection.""" 46 | assert pytest.approx(point_collection.distance(*point)) == expected_distance 47 | 48 | def test_repr(self, point_collection): 49 | """Test that the __repr__() method is working.""" 50 | assert ( 51 | re.match(r'^$', repr(point_collection)) 52 | is not None 53 | ) 54 | 55 | @pytest.mark.parametrize('ax', [None, plt.subplots()[1]]) 56 | def test_plot(self, point_collection, ax): 57 | """Test that plotting is working.""" 58 | ax = point_collection.plot(ax) 59 | assert len(ax.collections[0].get_offsets().data) == len(point_collection.points) 60 | assert pytest.approx(ax.get_aspect()) == 1.0 61 | -------------------------------------------------------------------------------- /tests/shapes/bases/test_line_collection.py: -------------------------------------------------------------------------------- 1 | """Test line_collection module.""" 2 | 3 | import itertools 4 | import re 5 | 6 | import matplotlib.pyplot as plt 7 | import pytest 8 | from matplotlib.axes import Axes 9 | 10 | from data_morph.shapes.bases.line_collection import LineCollection 11 | 12 | 13 | @pytest.mark.lines 14 | @pytest.mark.shapes 15 | class TestLineCollection: 16 | """Test the LineCollection class.""" 17 | 18 | @pytest.fixture(scope='class') 19 | def line_collection(self): 20 | """An instance of LineCollection.""" 21 | return LineCollection( 22 | [[0, 0], [0, 1]], 23 | [[1, 0], [1, 1]], 24 | [[10.5, 11.5], [11, 10]], 25 | ) 26 | 27 | def test_distance_zero(self, line_collection): 28 | """Test the distance() method on points on lines in the collection.""" 29 | for point in itertools.chain(*line_collection.lines): 30 | assert line_collection.distance(*point) == 0 31 | 32 | @pytest.mark.parametrize( 33 | ('point', 'expected_distance'), [((-1, 0), 1), ((-1, -1), 1.414214)], ids=str 34 | ) 35 | def test_distance_nonzero(self, line_collection, point, expected_distance): 36 | """Test the distance() method on points not on lines in the collection.""" 37 | assert pytest.approx(line_collection.distance(*point)) == expected_distance 38 | 39 | @pytest.mark.parametrize('line', [[(0, 0), (0, 0)], [(-1, -1), (-1, -1)]], ids=str) 40 | def test_line_as_point(self, line): 41 | """Test LineCollection raises a ValueError for small line magnitudes.""" 42 | with pytest.raises(ValueError, match='same start and end'): 43 | LineCollection(line) 44 | 45 | def test_repr(self, line_collection): 46 | """Test that the __repr__() method is working.""" 47 | assert ( 48 | re.match( 49 | r"""\n lines=\n {8}array\(\[\[\d+""", 50 | repr(line_collection), 51 | ) 52 | is not None 53 | ) 54 | 55 | @pytest.mark.parametrize('existing_ax', [True, False]) 56 | def test_plot(self, line_collection, existing_ax): 57 | """Test the plot() method is working.""" 58 | input_ax = plt.subplots()[1] if existing_ax else None 59 | result_ax = line_collection.plot(input_ax) 60 | 61 | assert isinstance(result_ax, Axes) 62 | assert len(result_ax.lines) == len(line_collection.lines) 63 | assert pytest.approx(result_ax.get_aspect()) == 1.0 64 | -------------------------------------------------------------------------------- /tests/shapes/circles/test_bullseye.py: -------------------------------------------------------------------------------- 1 | """Test the bullseye module.""" 2 | 3 | import numpy as np 4 | import pytest 5 | 6 | from .bases import CIRCLE_REPR, CirclesModuleTestBase 7 | 8 | pytestmark = [pytest.mark.shapes, pytest.mark.circles] 9 | 10 | 11 | class TestBullseye(CirclesModuleTestBase): 12 | """Test the Bullseye class.""" 13 | 14 | shape_name = 'bullseye' 15 | center_x, center_y = (20, 65) 16 | inner_radius, outer_radius = (10.24519052838329, 20.49038105676658) 17 | mid_radius = (outer_radius + inner_radius) / 2 18 | distance_test_cases = ( 19 | ((center_x, center_y + outer_radius), 0), # north on outer ring 20 | ((center_x, center_y + inner_radius), 0), # north on inner ring 21 | ((center_x, center_y - outer_radius), 0), # south on outer ring 22 | ((center_x, center_y - inner_radius), 0), # south on inner ring 23 | ((center_x + outer_radius, center_y), 0), # east on outer ring 24 | ((center_x + inner_radius, center_y), 0), # east on inner ring 25 | ((center_x - outer_radius, center_y), 0), # west on outer ring 26 | ((center_x - inner_radius, center_y), 0), # west on inner ring 27 | ((center_x, center_y), inner_radius), # center of bullseye 28 | ( 29 | (center_x, center_y + mid_radius), 30 | inner_radius / 2, 31 | ), # between the circles (north) 32 | ( 33 | (center_x, center_y - mid_radius), 34 | inner_radius / 2, 35 | ), # between the circles (south) 36 | ( 37 | (center_x + mid_radius, center_y), 38 | inner_radius / 2, 39 | ), # between the circles (east) 40 | ( 41 | (center_x - mid_radius, center_y), 42 | inner_radius / 2, 43 | ), # between the circles (west) 44 | ( 45 | (center_x, center_y + outer_radius * 2), 46 | outer_radius, 47 | ), # north of both circles 48 | ( 49 | (center_x - outer_radius * 1.5, center_y), 50 | inner_radius, 51 | ), # west of both circles 52 | ) 53 | repr_regex = ( 54 | r'^\n' 55 | r' circles=\n' 56 | r' ' + CIRCLE_REPR + '\n' 57 | r' ' + CIRCLE_REPR + '$' 58 | ) 59 | 60 | def test_init(self, shape): 61 | """Test that the Bullseye contains two concentric circles.""" 62 | assert len(shape.circles) == 2 63 | 64 | a, b = shape.circles 65 | assert np.array_equal(a.center, b.center) 66 | assert a.radius != b.radius 67 | -------------------------------------------------------------------------------- /src/data_morph/plotting/diagnostics.py: -------------------------------------------------------------------------------- 1 | """Diagnostic plot to visualize a shape superimposed on the dataset.""" 2 | 3 | from __future__ import annotations 4 | 5 | from typing import TYPE_CHECKING 6 | 7 | from ..plotting.style import plot_with_custom_style 8 | 9 | if TYPE_CHECKING: 10 | from numbers import Number 11 | 12 | from matplotlib.axes import Axes 13 | 14 | from ..data.dataset import Dataset 15 | from ..shapes.bases.shape import Shape 16 | 17 | 18 | @plot_with_custom_style 19 | def plot_shape_on_dataset( 20 | dataset: Dataset, 21 | shape: Shape, 22 | show_bounds: bool = False, 23 | alpha: Number = 0.25, 24 | ) -> Axes: 25 | """ 26 | Plot a shape superimposed on a dataset to evaluate heuristics. 27 | 28 | Parameters 29 | ---------- 30 | dataset : Dataset 31 | The dataset that ``shape`` was instantiated with. 32 | shape : Shape 33 | The shape that was instantiated with ``dataset``. 34 | show_bounds : bool, default ``False`` 35 | Whether to include the dataset's bounds in the plot. 36 | alpha : Number, default ``0.25`` 37 | The transparency to use for the dataset's points. 38 | 39 | Returns 40 | ------- 41 | matplotlib.axes.Axes 42 | The :class:`~matplotlib.axes.Axes` object containing the plot. 43 | 44 | Examples 45 | -------- 46 | 47 | .. plot:: 48 | :scale: 75 49 | :include-source: 50 | :caption: 51 | Visualization of the :class:`.Star` shape when calculated based on the 52 | music :class:`.Dataset`, with the dataset's bounds. 53 | 54 | from data_morph.data.loader import DataLoader 55 | from data_morph.plotting.diagnostics import plot_shape_on_dataset 56 | from data_morph.shapes.lines import Star 57 | 58 | dataset = DataLoader.load_dataset('music') 59 | shape = Star(dataset) 60 | plot_shape_on_dataset(dataset, shape, show_bounds=True, alpha=0.1) 61 | 62 | .. plot:: 63 | :scale: 75 64 | :include-source: 65 | :caption: 66 | Visualization of the :class:`.Heart` shape when calculated based on the 67 | music :class:`.Dataset`, without the dataset's bounds. 68 | 69 | from data_morph.data.loader import DataLoader 70 | from data_morph.plotting.diagnostics import plot_shape_on_dataset 71 | from data_morph.shapes.points import Heart 72 | 73 | dataset = DataLoader.load_dataset('music') 74 | shape = Heart(dataset) 75 | plot_shape_on_dataset(dataset, shape, alpha=0.1) 76 | """ 77 | ax = dataset.plot(show_bounds=show_bounds, title=None, alpha=alpha) 78 | shape.plot(ax=ax) 79 | return ax 80 | -------------------------------------------------------------------------------- /src/data_morph/shapes/points/figure_eight.py: -------------------------------------------------------------------------------- 1 | """Figure eight shape.""" 2 | 3 | import numpy as np 4 | 5 | from ...data.dataset import Dataset 6 | from ..bases.point_collection import PointCollection 7 | 8 | 9 | class FigureEight(PointCollection): 10 | """ 11 | Class for the figure eight shape. 12 | 13 | .. plot:: 14 | :scale: 75 15 | :caption: 16 | This shape is generated using the panda dataset. 17 | 18 | from data_morph.data.loader import DataLoader 19 | from data_morph.plotting.diagnostics import plot_shape_on_dataset 20 | from data_morph.shapes.points import FigureEight 21 | 22 | dataset = DataLoader.load_dataset('panda') 23 | shape = FigureEight(dataset) 24 | plot_shape_on_dataset(dataset, shape, show_bounds=False, alpha=0.25) 25 | 26 | Parameters 27 | ---------- 28 | dataset : Dataset 29 | The starting dataset to morph into other shapes. For datasets 30 | with larger *y* ranges than *x* ranges, the figure eight will be 31 | vertical; otherwise, it will be horizontal. 32 | 33 | Notes 34 | ----- 35 | This shape uses the formula for the `Lemniscate of Bernoulli 36 | `_. It will orient itself 37 | vertically or horizontally depending on which direction has a larger range in the 38 | input dataset. For example, the panda dataset used above resulted in a horizontal 39 | orientation, but the music dataset results in a vertical orientation: 40 | 41 | .. plot:: 42 | :scale: 75 43 | :caption: 44 | This shape is generated using the music dataset. 45 | 46 | from data_morph.data.loader import DataLoader 47 | from data_morph.plotting.diagnostics import plot_shape_on_dataset 48 | from data_morph.shapes.points import FigureEight 49 | 50 | dataset = DataLoader.load_dataset('music') 51 | shape = FigureEight(dataset) 52 | plot_shape_on_dataset(dataset, shape, show_bounds=False, alpha=0.1) 53 | """ 54 | 55 | name = 'figure_eight' 56 | 57 | def __init__(self, dataset: Dataset) -> None: 58 | x_shift, y_shift = dataset.data_bounds.center 59 | x_range, y_range = dataset.data_bounds.range 60 | 61 | t = np.linspace(-3.1, 3.1, num=80) 62 | 63 | focal_distance = max(x_range, y_range) * 0.3 64 | half_width = focal_distance * np.sqrt(2) 65 | 66 | x = (half_width * np.cos(t)) / (1 + np.square(np.sin(t))) 67 | y = x * np.sin(t) 68 | 69 | super().__init__( 70 | *np.stack([x, y] if x_range >= y_range else [y, x], axis=1) 71 | + np.array([x_shift, y_shift]) 72 | ) 73 | -------------------------------------------------------------------------------- /src/data_morph/data/starter_shapes/dino.csv: -------------------------------------------------------------------------------- 1 | x,y 2 | 55.3846,97.1795 3 | 51.5385,96.0256 4 | 46.1538,94.4872 5 | 42.8205,91.4103 6 | 40.7692,88.3333 7 | 38.7179,84.8718 8 | 35.641,79.8718 9 | 33.0769,77.5641 10 | 28.9744,74.4872 11 | 26.1538,71.4103 12 | 23.0769,66.4103 13 | 22.3077,61.7949 14 | 22.3077,57.1795 15 | 23.3333,52.9487 16 | 25.8974,51.0256 17 | 29.4872,51.0256 18 | 32.8205,51.0256 19 | 35.3846,51.4103 20 | 40.2564,51.4103 21 | 44.1026,52.9487 22 | 46.6667,54.1026 23 | 50,55.2564 24 | 53.0769,55.641 25 | 56.6667,56.0256 26 | 59.2308,57.9487 27 | 61.2821,62.1795 28 | 61.5385,66.4103 29 | 61.7949,69.1026 30 | 57.4359,55.2564 31 | 54.8718,49.8718 32 | 52.5641,46.0256 33 | 48.2051,38.3333 34 | 49.4872,42.1795 35 | 51.0256,44.1026 36 | 45.3846,36.4103 37 | 42.8205,32.5641 38 | 38.7179,31.4103 39 | 35.1282,30.2564 40 | 32.5641,32.1795 41 | 30,36.7949 42 | 33.5897,41.4103 43 | 36.6667,45.641 44 | 38.2051,49.1026 45 | 29.7436,36.0256 46 | 29.7436,32.1795 47 | 30,29.1026 48 | 32.0513,26.7949 49 | 35.8974,25.2564 50 | 41.0256,25.2564 51 | 44.1026,25.641 52 | 47.1795,28.718 53 | 49.4872,31.4103 54 | 51.5385,34.8718 55 | 53.5897,37.5641 56 | 55.1282,40.641 57 | 56.6667,42.1795 58 | 59.2308,44.4872 59 | 62.3077,46.0256 60 | 64.8718,46.7949 61 | 67.9487,47.9487 62 | 70.5128,53.718 63 | 71.5385,60.641 64 | 71.5385,64.4872 65 | 69.4872,69.4872 66 | 46.9231,79.8718 67 | 48.2051,84.1026 68 | 50,85.2564 69 | 53.0769,85.2564 70 | 55.3846,86.0256 71 | 56.6667,86.0256 72 | 56.1538,82.9487 73 | 53.8462,80.641 74 | 51.2821,78.718 75 | 50,78.718 76 | 47.9487,77.5641 77 | 29.7436,59.8718 78 | 29.7436,62.1795 79 | 31.2821,62.5641 80 | 57.9487,99.4872 81 | 61.7949,99.1026 82 | 64.8718,97.5641 83 | 68.4615,94.1026 84 | 70.7692,91.0256 85 | 72.0513,86.4103 86 | 73.8462,83.3333 87 | 75.1282,79.1026 88 | 76.6667,75.2564 89 | 77.6923,71.4103 90 | 79.7436,66.7949 91 | 81.7949,60.2564 92 | 83.3333,55.2564 93 | 85.1282,51.4103 94 | 86.4103,47.5641 95 | 87.9487,46.0256 96 | 89.4872,42.5641 97 | 93.3333,39.8718 98 | 95.3846,36.7949 99 | 98.2051,33.718 100 | 56.6667,40.641 101 | 59.2308,38.3333 102 | 60.7692,33.718 103 | 63.0769,29.1026 104 | 64.1026,25.2564 105 | 64.359,24.1026 106 | 74.359,22.9487 107 | 71.2821,22.9487 108 | 67.9487,22.1795 109 | 65.8974,20.2564 110 | 63.0769,19.1026 111 | 61.2821,19.1026 112 | 58.7179,18.3333 113 | 55.1282,18.3333 114 | 52.3077,18.3333 115 | 49.7436,17.5641 116 | 47.4359,16.0256 117 | 44.8718,13.718 118 | 48.7179,14.8718 119 | 51.2821,14.8718 120 | 54.1026,14.8718 121 | 56.1538,14.1026 122 | 52.0513,12.5641 123 | 48.7179,11.0256 124 | 47.1795,9.8718 125 | 46.1538,6.0256 126 | 50.5128,9.4872 127 | 53.8462,10.2564 128 | 57.4359,10.2564 129 | 60,10.641 130 | 64.1026,10.641 131 | 66.9231,10.641 132 | 71.2821,10.641 133 | 74.359,10.641 134 | 78.2051,10.641 135 | 67.9487,8.718 136 | 68.4615,5.2564 137 | 68.2051,2.9487 138 | 37.6923,25.7692 139 | 39.4872,25.3846 140 | 91.2821,41.5385 141 | 50,95.7692 142 | 47.9487,95 143 | 44.1026,92.6923 144 | -------------------------------------------------------------------------------- /src/data_morph/shapes/circles/circle.py: -------------------------------------------------------------------------------- 1 | """Circle shape.""" 2 | 3 | from __future__ import annotations 4 | 5 | from typing import TYPE_CHECKING 6 | 7 | import matplotlib.pyplot as plt 8 | import numpy as np 9 | 10 | from ...plotting.style import plot_with_custom_style 11 | from ..bases.shape import Shape 12 | 13 | if TYPE_CHECKING: 14 | from numbers import Number 15 | 16 | from matplotlib.axes import Axes 17 | 18 | from ..data.dataset import Dataset 19 | 20 | 21 | class Circle(Shape): 22 | """ 23 | Class representing a hollow circle. 24 | 25 | .. plot:: 26 | :scale: 75 27 | :caption: 28 | This shape is generated using the panda dataset. 29 | 30 | from data_morph.data.loader import DataLoader 31 | from data_morph.plotting.diagnostics import plot_shape_on_dataset 32 | from data_morph.shapes.circles import Circle 33 | 34 | dataset = DataLoader.load_dataset('panda') 35 | shape = Circle(dataset) 36 | plot_shape_on_dataset(dataset, shape, show_bounds=False, alpha=0.25) 37 | 38 | Parameters 39 | ---------- 40 | dataset : Dataset 41 | The starting dataset to morph into other shapes. 42 | radius : numbers.Number, optional 43 | The radius of the circle. 44 | """ 45 | 46 | def __init__(self, dataset: Dataset, radius: Number | None = None) -> None: 47 | self.center: tuple[Number, Number] = dataset.data_bounds.center 48 | """The (x, y) coordinates of the circle's center.""" 49 | 50 | self.radius: Number = radius or dataset.data[['x', 'y']].std().mean() * 1.5 51 | """The radius of the circle.""" 52 | 53 | def __repr__(self) -> str: 54 | x, y = self.center 55 | return f'<{self.__class__.__name__} center={(float(x), float(y))} radius={self.radius}>' 56 | 57 | def distance(self, x: Number, y: Number) -> float: 58 | """ 59 | Calculate the absolute distance between this circle's edge and a point (x, y). 60 | 61 | Parameters 62 | ---------- 63 | x, y : numbers.Number 64 | Coordinates of a point in 2D space. 65 | 66 | Returns 67 | ------- 68 | float 69 | The absolute distance between this circle's edge and the point (x, y). 70 | """ 71 | return abs( 72 | self._euclidean_distance(self.center, np.array([x, y])) - self.radius 73 | ) 74 | 75 | @plot_with_custom_style 76 | def plot(self, ax: Axes | None = None) -> Axes: 77 | """ 78 | Plot the shape. 79 | 80 | Parameters 81 | ---------- 82 | ax : matplotlib.axes.Axes, optional 83 | An optional :class:`~matplotlib.axes.Axes` object to plot on. 84 | 85 | Returns 86 | ------- 87 | matplotlib.axes.Axes 88 | The :class:`~matplotlib.axes.Axes` object containing the plot. 89 | """ 90 | if not ax: 91 | fig, ax = plt.subplots(layout='constrained') 92 | fig.get_layout_engine().set(w_pad=0.2, h_pad=0.2) 93 | _ = ax.axis('equal') 94 | _ = ax.add_patch(plt.Circle(self.center, self.radius, ec='k', fill=False)) 95 | _ = ax.autoscale() 96 | return ax 97 | -------------------------------------------------------------------------------- /tests/data/test_loader.py: -------------------------------------------------------------------------------- 1 | """Test the loader module.""" 2 | 3 | import pytest 4 | from pandas.testing import assert_frame_equal 5 | 6 | from data_morph.data.dataset import Dataset 7 | from data_morph.data.loader import DataLoader 8 | 9 | 10 | class TestDataLoader: 11 | """Test the DataLoader class.""" 12 | 13 | def test_static_class(self): 14 | """Make sure DataLoader can't be instantiated.""" 15 | with pytest.raises(NotImplementedError): 16 | _ = DataLoader() 17 | 18 | @pytest.mark.dataset 19 | @pytest.mark.parametrize( 20 | ('name', 'file'), [('dino', 'dino.csv'), ('sheep', 'sheep.csv')] 21 | ) 22 | def test_load_dataset(self, name, file, starter_shapes_dir): 23 | """Confirm that loading the dataset by name and file works.""" 24 | dataset_from_pkg = DataLoader.load_dataset(name) 25 | dataset_from_file = DataLoader.load_dataset(starter_shapes_dir / file) 26 | 27 | assert isinstance(dataset_from_pkg, Dataset) 28 | assert isinstance(dataset_from_file, Dataset) 29 | assert dataset_from_pkg.name == dataset_from_file.name 30 | assert_frame_equal(dataset_from_pkg.data, dataset_from_file.data) 31 | 32 | @pytest.mark.input_validation 33 | @pytest.mark.parametrize('dataset', ['does_not_exist', 'does_not_exist.csv']) 34 | def test_load_dataset_unknown_data(self, dataset): 35 | """Confirm that trying to load non-existent datasets raises an exception.""" 36 | with pytest.raises(ValueError, match='Unknown dataset'): 37 | _ = DataLoader.load_dataset(dataset) 38 | 39 | @pytest.mark.parametrize( 40 | ('provided_name', 'expected_name'), 41 | [('python', 'Python'), ('Python', 'Python'), ('sds', 'SDS'), ('SDS', 'SDS')], 42 | ) 43 | def test_load_dataset_proper_nouns(self, provided_name, expected_name): 44 | """ 45 | Confirm that datasets with names that are proper nouns and abbreviations 46 | are being handled properly. 47 | """ 48 | assert DataLoader.load_dataset(provided_name).name == expected_name 49 | 50 | @pytest.mark.parametrize('subset', [2, 3, 5, None]) 51 | def test_plot_available_datasets(self, monkeypatch, subset): 52 | """Test the plot_available_datasets() method.""" 53 | if subset: 54 | monkeypatch.setattr( 55 | DataLoader, 56 | 'AVAILABLE_DATASETS', 57 | DataLoader.AVAILABLE_DATASETS[:subset], 58 | ) 59 | 60 | axs = DataLoader.plot_available_datasets() 61 | if subset is None or subset > 3: 62 | assert len(axs) > 1 63 | else: 64 | assert len(axs) == axs.size 65 | 66 | populated_axs = [ax for ax in axs.flatten() if ax.get_figure()] 67 | assert len(populated_axs) == len(DataLoader.AVAILABLE_DATASETS) 68 | assert all(ax.get_xlabel() == ax.get_ylabel() == '' for ax in populated_axs) 69 | 70 | for dataset, ax in zip( 71 | DataLoader.AVAILABLE_DATASETS, populated_axs, strict=True 72 | ): 73 | subplot_title = ax.get_title() 74 | assert subplot_title.startswith(dataset) 75 | assert subplot_title.endswith(' points)') 76 | if dataset in ['Python', 'SDS']: 77 | assert 'logo' in subplot_title 78 | assert ax.get_aspect() == 1 79 | -------------------------------------------------------------------------------- /src/data_morph/shapes/bases/point_collection.py: -------------------------------------------------------------------------------- 1 | """Base class for shapes that are composed of points.""" 2 | 3 | from __future__ import annotations 4 | 5 | from typing import TYPE_CHECKING 6 | 7 | import matplotlib.pyplot as plt 8 | import numpy as np 9 | 10 | from ...plotting.style import plot_with_custom_style 11 | from .shape import Shape 12 | 13 | if TYPE_CHECKING: 14 | from collections.abc import Iterable 15 | from numbers import Number 16 | 17 | from matplotlib.axes import Axes 18 | 19 | from ...bounds.bounding_box import BoundingBox 20 | 21 | 22 | class PointCollection(Shape): 23 | """ 24 | Class representing a shape formed by a collection of points. 25 | 26 | Parameters 27 | ---------- 28 | *points : Iterable[numbers.Number] 29 | An iterable of (x, y) values representing an arrangement of points. 30 | """ 31 | 32 | def __init__(self, *points: Iterable[Number]) -> None: 33 | self.points = np.array(points) 34 | """numpy.ndarray: An array of (x, y) values 35 | representing an arrangement of points.""" 36 | 37 | self._alpha = 1 38 | 39 | def __repr__(self) -> str: 40 | return f'<{self.__class__.__name__} of {len(self.points)} points>' 41 | 42 | @staticmethod 43 | def _center(points: np.ndarray, bounds: BoundingBox) -> np.ndarray: 44 | """ 45 | Center the points within the bounding box. 46 | 47 | Parameters 48 | ---------- 49 | points : np.ndarray 50 | The points to center. 51 | bounds : BoundingBox 52 | The bounding box within which to center the points. 53 | 54 | Returns 55 | ------- 56 | np.ndarray 57 | The centered points. 58 | """ 59 | maxes = points.max(axis=0) 60 | span = maxes - points.min(axis=0) 61 | gap = (np.array(bounds.range) - span) / 2 62 | 63 | (_, xmax), (_, ymax) = bounds 64 | shift = np.array([xmax, ymax]) - maxes - gap 65 | 66 | return points + shift 67 | 68 | def distance(self, x: Number, y: Number) -> float: 69 | """ 70 | Calculate the minimum distance from the points of this shape 71 | to a point (x, y). 72 | 73 | Parameters 74 | ---------- 75 | x, y : numbers.Number 76 | Coordinates of a point in 2D space. 77 | 78 | Returns 79 | ------- 80 | float 81 | The minimum distance from the points of this shape 82 | to the point (x, y). 83 | """ 84 | return np.min( 85 | np.linalg.norm(np.array(self.points) - np.array((x, y)), ord=2, axis=1) 86 | ) 87 | 88 | @plot_with_custom_style 89 | def plot(self, ax: Axes | None = None) -> Axes: 90 | """ 91 | Plot the shape. 92 | 93 | Parameters 94 | ---------- 95 | ax : matplotlib.axes.Axes, optional 96 | An optional :class:`~matplotlib.axes.Axes` object to plot on. 97 | 98 | Returns 99 | ------- 100 | matplotlib.axes.Axes 101 | The :class:`~matplotlib.axes.Axes` object containing the plot. 102 | """ 103 | if not ax: 104 | fig, ax = plt.subplots(layout='constrained') 105 | fig.get_layout_engine().set(w_pad=0.2, h_pad=0.2) 106 | _ = ax.axis('equal') 107 | _ = ax.scatter(*self.points.T, s=5, color='k', alpha=self._alpha) 108 | return ax 109 | -------------------------------------------------------------------------------- /tests/shapes/circles/test_rings.py: -------------------------------------------------------------------------------- 1 | """Test the rings module.""" 2 | 3 | import numpy as np 4 | import pytest 5 | 6 | from .bases import CIRCLE_REPR, CirclesModuleTestBase 7 | 8 | pytestmark = [pytest.mark.shapes, pytest.mark.circles] 9 | 10 | 11 | class TestRings(CirclesModuleTestBase): 12 | """Test the Rings class.""" 13 | 14 | shape_name = 'rings' 15 | center_x, center_y = (20, 65) 16 | radii = (3.666666666666667, 7.333333333333334, 11) 17 | mid_radii = (sum(radii[:2]) / 2, sum(radii[1:]) / 2) 18 | distance_test_cases = ( 19 | ((center_x, center_y + radii[0]), 0), # north on inner ring 20 | ((center_x, center_y + radii[1]), 0), # north on middle ring 21 | ((center_x, center_y + radii[2]), 0), # north on outer ring 22 | ((center_x, center_y - radii[0]), 0), # south on inner ring 23 | ((center_x, center_y - radii[1]), 0), # south on middle ring 24 | ((center_x, center_y - radii[2]), 0), # south on outer ring 25 | ((center_x + radii[0], center_y), 0), # east on inner ring 26 | ((center_x + radii[1], center_y), 0), # east on middle ring 27 | ((center_x + radii[2], center_y), 0), # east on outer ring 28 | ((center_x - radii[0], center_y), 0), # west on inner ring 29 | ((center_x - radii[1], center_y), 0), # west on middle ring 30 | ((center_x - radii[2], center_y), 0), # west on outer ring 31 | ((center_x, center_y), radii[0]), # center of all rings 32 | ( 33 | (center_x, center_y + mid_radii[0]), 34 | radii[0] / 2, 35 | ), # between the inner circles (north) 36 | ( 37 | (center_x, center_y - mid_radii[0]), 38 | radii[0] / 2, 39 | ), # between the inner circles (south) 40 | ( 41 | (center_x + mid_radii[0], center_y), 42 | radii[0] / 2, 43 | ), # between the inner circles (east) 44 | ( 45 | (center_x - mid_radii[0], center_y), 46 | radii[0] / 2, 47 | ), # between the inner circles (west) 48 | ( 49 | (center_x, center_y + mid_radii[1]), 50 | radii[0] / 2, 51 | ), # between the outer circles (north) 52 | ( 53 | (center_x, center_y - mid_radii[1]), 54 | radii[0] / 2, 55 | ), # between the outer circles (south) 56 | ( 57 | (center_x + mid_radii[1], center_y), 58 | radii[0] / 2, 59 | ), # between the outer circles (east) 60 | ( 61 | (center_x - mid_radii[1], center_y), 62 | radii[0] / 2, 63 | ), # between the outer circles (west) 64 | ((center_x, center_y + radii[2] * 2), radii[2]), # north of all circles 65 | ((center_x - radii[2] * 1.5, center_y), radii[2] / 2), # west of all circles 66 | ) 67 | repr_regex = ( 68 | r'^\n' 69 | r' circles=\n' 70 | r' ' + CIRCLE_REPR + '\n' 71 | r' ' + CIRCLE_REPR + '\n' 72 | r' ' + CIRCLE_REPR + '$' 73 | ) 74 | 75 | def test_init(self, shape_factory): 76 | """Test that the Rings contains three concentric circles.""" 77 | shape = shape_factory.generate_shape(self.shape_name) 78 | 79 | num_rings = 3 80 | assert len(shape.circles) == num_rings 81 | assert all( 82 | np.array_equal(circle.center, shape.circles[0].center) 83 | for circle in shape.circles[1:] 84 | ) 85 | assert len({circle.radius for circle in shape.circles}) == num_rings 86 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Contributions are welcome. Please adhere to the following process. 4 | 5 | ## 1. Open an issue 6 | Open up an issue describing in detail the bug or feature request you are proposing. Be sure to fill out all the information requested in the template. Please wait for confirmation from a maintainer that this unit of work is in line with the project's roadmap *before* submitting a pull request. 7 | 8 | ## 2. Work on your changes 9 | Once you have been given the go ahead, you can start working on the code. Start by forking the project, cloning locally, and then creating a branch to work on. You will need to then install the main dependencies as well as the `dev` and `docs` dependencies, which can be done by running the following command: 10 | 11 | ```shell 12 | $ pip install -e . --group dev 13 | ``` 14 | 15 | Set up the pre-commit hooks to make sure you can pass the CI checks: 16 | 17 | ```shell 18 | $ pre-commit install 19 | ``` 20 | 21 | All commits will be squashed, so just make sure that the final commit passes all linting, documentation, and testing checks. These will run in GitHub Actions when you open a pull request, but you should also run them locally: 22 | 23 | ```shell 24 | $ pre-commit run --all-files # linting and documentation format checks 25 | $ pytest # run the test suite 26 | $ cd docs && make html # build the documentation locally 27 | ``` 28 | 29 | Some things to remember: 30 | 31 | - All code must be documented using docstrings in the [numpydoc style](https://numpydoc.readthedocs.io/en/latest/format.html) – the pre-commit hooks will check for this. 32 | - Any changes to the API must be accompanied by either an additional test case or a new test. Run `pytest` to make sure your changes are covered. 33 | - Documentation for the project is built with Sphinx. Your changes must render correct in the output. Run `make html` from the `docs` directory and inspect the result. 34 | 35 | ## 3. Open a pull request 36 | 37 | Once you have finished with the task you are working on and all the checks and tests pass, you can create a pull request. Please consult [GitHub's guide on how to do this](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request-from-a-fork), if you need some guidance. 38 | 39 | In your description, please do the following: 40 | 1. Link to the corresponding issue for this unit of work. 41 | 2. Describe what you changed and why. 42 | 3. Include, if applicable, any testing instructions. 43 | 44 | When you create your pull request, first-time contributors will need to wait for a maintainer to approve running the GitHub Actions workflows. Please be patient until this happens. 45 | 46 | Once it does, the same checks described above (testing, documentation, linting) that you ran on your machine will run on Linux, macOS, and Windows with multiple versions of Python. Please note that it is possible that differences in operating systems and/or Python versions results in a failure, despite it working on your machine. 47 | 48 | If anything fails, please attempt to fix it as we're unlikely to review your code until everything passes. If stuck, please feel free to leave a note in the pull request enumerating what you have already tried and someone may be able to offer assistance. 49 | 50 | ## 4. Code review 51 | 52 | After all checks in your pull request pass, a maintainer will review your code. In many cases, there will be some feedback to address, and this may require a few iterations to get to the best implementation. Remember to be patient and polite during this process. 53 | 54 | ## 5. Congratulations! 55 | 56 | When the pull request is approved, it will be merged into the `main` branch. Users of this project won't have your changes until the next release is made. 57 | -------------------------------------------------------------------------------- /src/data_morph/shapes/circles/rings.py: -------------------------------------------------------------------------------- 1 | """Rings shape.""" 2 | 3 | from __future__ import annotations 4 | 5 | from typing import TYPE_CHECKING 6 | 7 | import numpy as np 8 | 9 | from ...plotting.style import plot_with_custom_style 10 | from ..bases.shape import Shape 11 | from .circle import Circle 12 | 13 | if TYPE_CHECKING: 14 | from numbers import Number 15 | 16 | from matplotlib.axes import Axes 17 | 18 | from ..data.dataset import Dataset 19 | 20 | 21 | class Rings(Shape): 22 | """ 23 | Class representing rings comprising three concentric circles. 24 | 25 | .. plot:: 26 | :scale: 75 27 | :caption: 28 | This shape is generated using the panda dataset. 29 | 30 | from data_morph.data.loader import DataLoader 31 | from data_morph.plotting.diagnostics import plot_shape_on_dataset 32 | from data_morph.shapes.circles import Rings 33 | 34 | dataset = DataLoader.load_dataset('panda') 35 | shape = Rings(dataset) 36 | plot_shape_on_dataset(dataset, shape, show_bounds=False, alpha=0.25) 37 | 38 | Parameters 39 | ---------- 40 | dataset : Dataset 41 | The starting dataset to morph into other shapes. 42 | 43 | See Also 44 | -------- 45 | Circle : The individual rings are represented as circles. 46 | """ 47 | 48 | def __init__(self, dataset: Dataset) -> None: 49 | self.circles: list[Circle] = [ 50 | Circle(dataset, radius) for radius in self._derive_radii(dataset) 51 | ] 52 | """The individual rings represented by :class:`Circle` objects.""" 53 | 54 | self._centers = np.array([circle.center for circle in self.circles]) 55 | self._radii = np.array([circle.radius for circle in self.circles]) 56 | 57 | def __repr__(self) -> str: 58 | return self._recursive_repr('circles') 59 | 60 | @staticmethod 61 | def _derive_radii(dataset: Dataset) -> np.ndarray: 62 | """ 63 | Derive the radii for the circles in the rings. 64 | 65 | Parameters 66 | ---------- 67 | dataset : Dataset 68 | The starting dataset to morph into. 69 | 70 | Returns 71 | ------- 72 | np.ndarray 73 | The radii for the circles in the rings. 74 | """ 75 | stdev = (min(dataset.data_bounds.range) + min(dataset.morph_bounds.range)) / 4 76 | return np.linspace(stdev, 0, 3, endpoint=False) 77 | 78 | def distance(self, x: Number, y: Number) -> float: 79 | """ 80 | Calculate the minimum absolute distance between any of this shape's 81 | circles' edges and a point (x, y). 82 | 83 | Parameters 84 | ---------- 85 | x, y : numbers.Number 86 | Coordinates of a point in 2D space. 87 | 88 | Returns 89 | ------- 90 | float 91 | The minimum absolute distance between any of this shape's 92 | circles' edges and the point (x, y). 93 | """ 94 | point = np.array([x, y]) 95 | return np.min( 96 | np.abs(np.linalg.norm(self._centers - point, axis=1) - self._radii) 97 | ) 98 | 99 | @plot_with_custom_style 100 | def plot(self, ax: Axes | None = None) -> Axes: 101 | """ 102 | Plot the shape. 103 | 104 | Parameters 105 | ---------- 106 | ax : matplotlib.axes.Axes, optional 107 | An optional :class:`~matplotlib.axes.Axes` object to plot on. 108 | 109 | Returns 110 | ------- 111 | matplotlib.axes.Axes 112 | The :class:`~matplotlib.axes.Axes` object containing the plot. 113 | """ 114 | for circle in self.circles: 115 | ax = circle.plot(ax) 116 | return ax 117 | -------------------------------------------------------------------------------- /tests/shapes/lines/bases.py: -------------------------------------------------------------------------------- 1 | """Base test classes for line shapes.""" 2 | 3 | from __future__ import annotations 4 | 5 | from numbers import Number 6 | 7 | import numpy as np 8 | import pytest 9 | 10 | 11 | class LinesModuleTestBase: 12 | """Base for testing line-based shapes.""" 13 | 14 | shape_name: str 15 | distance_test_cases: tuple[tuple[tuple[Number], float]] 16 | expected_line_count: int 17 | expected_slopes: tuple[Number] | Number 18 | 19 | @pytest.fixture(scope='class') 20 | def shape(self, shape_factory): 21 | """Fixture to get the shape for testing.""" 22 | return shape_factory.generate_shape(self.shape_name) 23 | 24 | @pytest.fixture(scope='class') 25 | def slopes(self, shape): 26 | """Fixture to get the slopes of the lines.""" 27 | xs, ys = np.array(shape.lines).T 28 | runs = np.diff(xs, axis=0) 29 | rises = np.diff(ys, axis=0) 30 | slopes = rises / np.ma.masked_array(runs, mask=runs == 0) 31 | return slopes.filled(np.inf) 32 | 33 | def test_init(self, shape): 34 | """Test that the shape consists of the correct number of distinct lines.""" 35 | num_unique_lines, *_ = np.unique(shape.lines, axis=0).shape 36 | assert num_unique_lines == self.expected_line_count 37 | 38 | def test_distance(self, shape, test_point, expected_distance): 39 | """ 40 | Test the distance() method parametrized by distance_test_cases 41 | (see conftest.py). 42 | """ 43 | assert pytest.approx(shape.distance(*test_point)) == expected_distance 44 | 45 | def test_slopes(self, slopes): 46 | """Test that the slopes are as expected.""" 47 | expected = ( 48 | [self.expected_slopes] 49 | if isinstance(self.expected_slopes, Number) 50 | else self.expected_slopes 51 | ) 52 | assert np.array_equal(np.unique(slopes), expected) 53 | 54 | 55 | class ParallelLinesModuleTestBase(LinesModuleTestBase): 56 | """Base for testing parallel line-based shapes.""" 57 | 58 | def test_lines_are_parallel(self, slopes): 59 | """Test that the lines are parallel (slopes are equal).""" 60 | assert np.unique(slopes).size == 1 61 | 62 | 63 | class PolygonsLineModuleTestBase: 64 | """Base for testing polygon shapes.""" 65 | 66 | shape_name: str 67 | distance_test_cases: tuple[tuple[tuple[Number], float]] 68 | expected_line_count: int 69 | 70 | @pytest.fixture(scope='class') 71 | def shape(self, shape_factory): 72 | """Fixture to get the shape for testing.""" 73 | return shape_factory.generate_shape(self.shape_name) 74 | 75 | @pytest.fixture(scope='class') 76 | def slopes(self, shape): 77 | """Fixture to get the slopes of the lines.""" 78 | xs, ys = np.array(shape.lines).T 79 | runs = np.diff(xs, axis=0) 80 | rises = np.diff(ys, axis=0) 81 | slopes = rises / np.ma.masked_array(runs, mask=runs == 0) 82 | return slopes.filled(np.inf) 83 | 84 | def test_init(self, shape): 85 | """Test that the shape consists of the correct number of distinct lines.""" 86 | num_unique_lines, *_ = np.unique(shape.lines, axis=0).shape 87 | assert num_unique_lines == self.expected_line_count 88 | 89 | def test_distance(self, shape, test_point, expected_distance): 90 | """ 91 | Test the distance() method parametrized by distance_test_cases 92 | (see conftest.py). 93 | """ 94 | actual_distance = shape.distance(*test_point) 95 | assert pytest.approx(actual_distance) == expected_distance 96 | 97 | def test_lines_form_polygon(self, shape): 98 | """Test that the lines form a polygon.""" 99 | endpoints = np.array(shape.lines).reshape(-1, 2) 100 | assert np.unique(endpoints, axis=0).shape[0] == self.expected_line_count 101 | -------------------------------------------------------------------------------- /.github/workflows/generate-morphs.yml: -------------------------------------------------------------------------------- 1 | # This workflow runs Data Morph on datasets and/or shapes that have 2 | # been added or modified. 3 | # 4 | # Author: Stefanie Molin, Daniel Schaefer 5 | 6 | name: Generate Morphs 7 | 8 | on: 9 | pull_request: 10 | paths: 11 | - 'src/**' 12 | - 'pyproject.toml' 13 | - '.github/workflows/generate-morphs.yml' 14 | 15 | concurrency: 16 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 17 | cancel-in-progress: true 18 | 19 | jobs: 20 | generate-morphs: 21 | name: Run Data Morph on new/altered datasets/shapes 22 | 23 | # Just generate on one operating system (they should all be the same) 24 | runs-on: ubuntu-latest 25 | 26 | defaults: 27 | run: 28 | shell: bash -e {0} 29 | 30 | strategy: 31 | fail-fast: false 32 | 33 | steps: 34 | - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 35 | 36 | - name: Set up Python 37 | uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0 38 | with: 39 | python-version: "3.x" 40 | 41 | - name: Install Data Morph 42 | run: | 43 | python -m pip install --upgrade pip 44 | python -m pip install setuptools --upgrade 45 | python -m pip install . 46 | 47 | # docs for this action: https://github.com/tj-actions/changed-files 48 | - name: Get all dataset and shape files that have changed 49 | id: changed-files-yaml 50 | uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0 51 | with: 52 | files_yaml: | 53 | dataset: 54 | - src/data_morph/data/starter_shapes/* 55 | shape: 56 | - src/data_morph/shapes/** 57 | 58 | # If datasets were added or changed in this PR 59 | - name: Generate morphs from new or changed datasets 60 | if: steps.changed-files-yaml.outputs.dataset_any_changed == 'true' 61 | env: 62 | DATASET_ALL_CHANGED_FILES: ${{ steps.changed-files-yaml.outputs.dataset_all_changed_files }} 63 | run: | 64 | echo "Detected changes to dataset(s): $DATASET_ALL_CHANGED_FILES" 65 | DATASET_ARGS=$(python bin/ci.py $DATASET_ALL_CHANGED_FILES) 66 | echo "Generating morphs for the following datasets: $DATASET_ARGS" 67 | data-morph \ 68 | --start $DATASET_ARGS \ 69 | --target bullseye heart rectangle star slant_up \ 70 | --workers 0 71 | 72 | # If shapes are added or modified in this PR 73 | - name: Generate morphs from new or changed shapes 74 | if: steps.changed-files-yaml.outputs.shape_any_changed == 'true' 75 | env: 76 | SHAPE_ALL_CHANGED_FILES: ${{ steps.changed-files-yaml.outputs.shape_all_changed_files }} 77 | run: | 78 | echo "Detected changes to shape(s): $SHAPE_ALL_CHANGED_FILES" 79 | SHAPE_ARGS=$(python bin/ci.py $SHAPE_ALL_CHANGED_FILES) 80 | echo "Generating morphs for the following shapes: $SHAPE_ARGS" 81 | data-morph \ 82 | --start music \ 83 | --target $SHAPE_ARGS \ 84 | --workers 0 85 | 86 | # For core code changes, we want to do a couple morphs to see if they still look ok 87 | # Only need to run if neither of the previous two morphs ran 88 | - name: Morph shapes with core code changes 89 | if: steps.changed-files-yaml.outputs.dataset_any_changed != 'true' && steps.changed-files-yaml.outputs.shape_any_changed != 'true' 90 | run: | 91 | data-morph \ 92 | --start music \ 93 | --target bullseye heart star \ 94 | --workers 0 95 | 96 | - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 97 | with: 98 | name: morphed-data-pr${{ github.event.number }}-${{ github.sha }} 99 | path: morphed_data 100 | -------------------------------------------------------------------------------- /src/data_morph/shapes/points/spade.py: -------------------------------------------------------------------------------- 1 | """Spade shape.""" 2 | 3 | from numbers import Number 4 | 5 | import numpy as np 6 | 7 | from ...data.dataset import Dataset 8 | from ..bases.point_collection import PointCollection 9 | from .heart import Heart 10 | 11 | 12 | class Spade(PointCollection): 13 | """ 14 | Class for the spade shape. 15 | 16 | .. plot:: 17 | :scale: 75 18 | :caption: 19 | This shape is generated using the panda dataset. 20 | 21 | from data_morph.data.loader import DataLoader 22 | from data_morph.plotting.diagnostics import plot_shape_on_dataset 23 | from data_morph.shapes.points import Spade 24 | 25 | dataset = DataLoader.load_dataset('panda') 26 | shape = Spade(dataset) 27 | plot_shape_on_dataset(dataset, shape, show_bounds=False, alpha=0.25) 28 | 29 | Parameters 30 | ---------- 31 | dataset : Dataset 32 | The starting dataset to morph into other shapes. 33 | """ 34 | 35 | def __init__(self, dataset: Dataset) -> None: 36 | data_bounds = dataset.data_bounds 37 | _, xmax = data_bounds.x_bounds 38 | x_shift, y_shift = data_bounds.center 39 | 40 | # upside-down heart 41 | heart_points = self._get_inverted_heart(dataset, y_shift) 42 | 43 | # base of the spade 44 | base_x, base_y = self._get_base(xmax, x_shift, y_shift) 45 | 46 | # combine all points 47 | x = np.concatenate((heart_points[:, 0], base_x), axis=0) 48 | y = np.concatenate((heart_points[:, 1], base_y), axis=0) 49 | 50 | super().__init__(*self._center(np.stack([x, y], axis=1), data_bounds)) 51 | 52 | @staticmethod 53 | def _get_inverted_heart(dataset: Dataset, y_shift: Number) -> np.ndarray: 54 | """ 55 | Get points for an inverted heart. 56 | 57 | Parameters 58 | ---------- 59 | dataset : Dataset 60 | The starting dataset to morph into other shapes. 61 | y_shift : Number 62 | The constant value to shift the *y* up/down by. 63 | 64 | Returns 65 | ------- 66 | numpy.ndarray 67 | The points for the upside-down heart. 68 | 69 | See Also 70 | -------- 71 | Heart : This shape is reused to calculate the spade. 72 | """ 73 | heart_points = Heart(dataset).points 74 | heart_points[:, 1] = -heart_points[:, 1] + 2 * y_shift 75 | return heart_points 76 | 77 | @staticmethod 78 | def _get_base( 79 | xmax: Number, x_shift: Number, y_shift: Number 80 | ) -> tuple[np.ndarray, np.ndarray]: 81 | """ 82 | Get the base of the spade. 83 | 84 | Parameters 85 | ---------- 86 | xmax : Number 87 | The maximum *x* value for the shape. 88 | x_shift : Number 89 | The constant value to shift the *x* left/right by. 90 | y_shift : Number 91 | The constant value to shift the *y* up/down by. 92 | 93 | Returns 94 | ------- 95 | tuple[numpy.ndarray, numpy.ndarray] 96 | The *x* and *y* coordinates for the base of the spade. 97 | """ 98 | # line base 99 | line_x = np.linspace(-6, 6, num=12) 100 | line_y = np.repeat(-16, 12) 101 | 102 | # left wing 103 | left_x = np.linspace(-6, 0, num=12) 104 | left_y = 0.278 * np.power(left_x + 6, 2) - 16 105 | 106 | # right wing 107 | right_x = np.linspace(0, 6, num=12) 108 | right_y = 0.278 * np.power(right_x - 6, 2) - 16 109 | 110 | # shift and scale the base and wing 111 | base_x = np.concatenate((line_x, left_x, right_x), axis=0) 112 | base_y = np.concatenate((line_y, left_y, right_y), axis=0) 113 | 114 | # scale by the half the widest width of the spade 115 | scale_factor = (xmax - x_shift) / 16 116 | 117 | base_x = base_x * scale_factor + x_shift 118 | base_y = base_y * scale_factor + y_shift 119 | 120 | return base_x, base_y 121 | -------------------------------------------------------------------------------- /.github/workflows/pypi-publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish Python 🐍 distribution 📦 to PyPI and TestPyPI 2 | 3 | on: 4 | push: 5 | tags: ["*"] 6 | 7 | jobs: 8 | build: 9 | name: Build distribution 📦 10 | runs-on: ubuntu-latest 11 | 12 | steps: 13 | - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 14 | - name: Set up Python 15 | uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0 16 | with: 17 | python-version: "3.x" 18 | - name: Install pypa/build 19 | run: >- 20 | python3 -m 21 | pip install 22 | build 23 | --user 24 | - name: Build a binary wheel and a source tarball 25 | run: python3 -m build 26 | - name: Store the distribution packages 27 | uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 28 | with: 29 | name: python-package-distributions 30 | path: dist/ 31 | 32 | publish-to-pypi: 33 | name: >- 34 | Publish Python 🐍 distribution 📦 to PyPI 35 | if: startsWith(github.ref, 'refs/tags/') && !contains(github.ref, '-rc') # only publish releases to PyPI on tag pushes 36 | needs: 37 | - build 38 | runs-on: ubuntu-latest 39 | environment: 40 | name: pypi 41 | url: https://pypi.org/p/data-morph-ai 42 | permissions: 43 | id-token: write # IMPORTANT: mandatory for trusted publishing 44 | 45 | steps: 46 | - name: Download all the dists 47 | uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0 48 | with: 49 | name: python-package-distributions 50 | path: dist/ 51 | - name: Publish distribution 📦 to PyPI 52 | uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # v1.13.0 53 | 54 | github-release: 55 | name: >- 56 | Sign the Python 🐍 distribution 📦 with Sigstore 57 | and upload them to GitHub Release 58 | needs: 59 | - publish-to-pypi 60 | runs-on: ubuntu-latest 61 | 62 | permissions: 63 | contents: write # IMPORTANT: mandatory for making GitHub Releases 64 | id-token: write # IMPORTANT: mandatory for sigstore 65 | 66 | steps: 67 | - name: Download all the dists 68 | uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0 69 | with: 70 | name: python-package-distributions 71 | path: dist/ 72 | - name: Sign the dists with Sigstore 73 | uses: sigstore/gh-action-sigstore-python@f832326173235dcb00dd5d92cd3f353de3188e6c # v3.1.0 74 | with: 75 | inputs: >- 76 | ./dist/*.tar.gz 77 | ./dist/*.whl 78 | - name: Create GitHub Release 79 | env: 80 | GITHUB_TOKEN: ${{ github.token }} 81 | run: >- 82 | gh release create 83 | '${{ github.ref_name }}' 84 | --repo '${{ github.repository }}' 85 | --generate-notes 86 | - name: Upload artifact signatures to GitHub Release 87 | env: 88 | GITHUB_TOKEN: ${{ github.token }} 89 | # Upload to GitHub Release using the `gh` CLI. 90 | # `dist/` contains the built packages, and the 91 | # sigstore-produced signatures and certificates. 92 | run: >- 93 | gh release upload 94 | '${{ github.ref_name }}' dist/** 95 | --repo '${{ github.repository }}' 96 | 97 | publish-to-testpypi: 98 | name: Publish Python 🐍 distribution 📦 to TestPyPI 99 | if: startsWith(github.ref, 'refs/tags/') && contains(github.ref, '-rc') # only publish release candidates to TestPyPI on tag pushes 100 | needs: 101 | - build 102 | runs-on: ubuntu-latest 103 | 104 | environment: 105 | name: testpypi 106 | url: https://test.pypi.org/p/data-morph-ai 107 | 108 | permissions: 109 | id-token: write # IMPORTANT: mandatory for trusted publishing 110 | 111 | steps: 112 | - name: Download all the dists 113 | uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0 114 | with: 115 | name: python-package-distributions 116 | path: dist/ 117 | - name: Publish distribution 📦 to TestPyPI 118 | uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # v1.13.0 119 | with: 120 | repository-url: https://test.pypi.org/legacy/ 121 | -------------------------------------------------------------------------------- /src/data_morph/shapes/bases/line_collection.py: -------------------------------------------------------------------------------- 1 | """Base class for shapes that are composed of lines.""" 2 | 3 | from __future__ import annotations 4 | 5 | from typing import TYPE_CHECKING 6 | 7 | import matplotlib.pyplot as plt 8 | import numpy as np 9 | 10 | from ...plotting.style import plot_with_custom_style 11 | from .shape import Shape 12 | 13 | if TYPE_CHECKING: 14 | from collections.abc import Iterable 15 | from numbers import Number 16 | 17 | from matplotlib.axes import Axes 18 | 19 | 20 | class LineCollection(Shape): 21 | """ 22 | Class representing a shape consisting of one or more lines. 23 | 24 | Parameters 25 | ---------- 26 | *lines : Iterable[Iterable[numbers.Number]] 27 | An iterable of two (x, y) pairs representing the endpoints 28 | of a line. 29 | """ 30 | 31 | def __init__(self, *lines: Iterable[Iterable[Number]]) -> None: 32 | # check that lines with the same starting and ending points raise an error 33 | for line in lines: 34 | if np.allclose(*line): 35 | raise ValueError(f'Line {line} has the same start and end point') 36 | 37 | self.lines = np.array(lines) 38 | """Iterable[Iterable[numbers.Number]]: An iterable 39 | of two (x, y) pairs representing the endpoints of a line.""" 40 | 41 | def __repr__(self) -> str: 42 | return self._recursive_repr('lines') 43 | 44 | def distance(self, x: Number, y: Number) -> float: 45 | """ 46 | Calculate the minimum distance from the lines of this shape 47 | to a point (x, y). 48 | 49 | Parameters 50 | ---------- 51 | x, y : numbers.Number 52 | Coordinates of a point in 2D space. 53 | 54 | Returns 55 | ------- 56 | float 57 | The minimum distance from the lines of this shape to the 58 | point (x, y). 59 | 60 | Notes 61 | ----- 62 | Implementation based on `this Stack Overflow answer`_. 63 | 64 | .. _this Stack Overflow answer: https://stackoverflow.com/a/58781995 65 | """ 66 | point = np.array([x, y]) 67 | 68 | start_points = self.lines[:, 0, :] 69 | end_points = self.lines[:, 1, :] 70 | 71 | tangent_vector = end_points - start_points 72 | normalized_tangent_vectors = np.divide( 73 | tangent_vector, 74 | np.hypot(tangent_vector[:, 0], tangent_vector[:, 1]).reshape(-1, 1), 75 | ) 76 | 77 | # row-wise dot products of 2D vectors 78 | signed_parallel_distance_start = np.multiply( 79 | start_points - point, normalized_tangent_vectors 80 | ).sum(axis=1) 81 | signed_parallel_distance_end = np.multiply( 82 | point - end_points, normalized_tangent_vectors 83 | ).sum(axis=1) 84 | 85 | clamped_parallel_distance = np.maximum.reduce( 86 | [ 87 | signed_parallel_distance_start, 88 | signed_parallel_distance_end, 89 | np.zeros(signed_parallel_distance_start.shape[0]), 90 | ] 91 | ) 92 | 93 | # row-wise cross products of 2D vectors 94 | diff = point - start_points 95 | perpendicular_distance_component = ( 96 | diff[..., 0] * normalized_tangent_vectors[..., 1] 97 | - diff[..., 1] * normalized_tangent_vectors[..., 0] 98 | ) 99 | 100 | return np.min( 101 | np.hypot(clamped_parallel_distance, perpendicular_distance_component) 102 | ) 103 | 104 | @plot_with_custom_style 105 | def plot(self, ax: Axes | None = None) -> Axes: 106 | """ 107 | Plot the shape. 108 | 109 | Parameters 110 | ---------- 111 | ax : matplotlib.axes.Axes, optional 112 | An optional :class:`~matplotlib.axes.Axes` object to plot on. 113 | 114 | Returns 115 | ------- 116 | matplotlib.axes.Axes 117 | The :class:`~matplotlib.axes.Axes` object containing the plot. 118 | """ 119 | if not ax: 120 | fig, ax = plt.subplots(layout='constrained') 121 | fig.get_layout_engine().set(w_pad=0.2, h_pad=0.2) 122 | _ = ax.axis('equal') 123 | for start, end in self.lines: 124 | ax.plot(*list(zip(start, end, strict=True)), 'k-') 125 | return ax 126 | -------------------------------------------------------------------------------- /src/data_morph/shapes/bases/shape.py: -------------------------------------------------------------------------------- 1 | """Abstract base class for shapes.""" 2 | 3 | from __future__ import annotations 4 | 5 | from abc import ABC, abstractmethod 6 | from typing import TYPE_CHECKING 7 | 8 | import numpy as np 9 | 10 | if TYPE_CHECKING: 11 | from collections.abc import Iterable 12 | from numbers import Number 13 | 14 | from matplotlib.axes import Axes 15 | 16 | 17 | class Shape(ABC): 18 | """Abstract base class for a shape.""" 19 | 20 | name: str | None = None 21 | """The display name for the shape, if the lowercased class name is not desired.""" 22 | 23 | @classmethod 24 | def get_name(cls) -> str: 25 | """ 26 | Get the name of the shape. 27 | 28 | Returns 29 | ------- 30 | str 31 | The name of the shape. 32 | """ 33 | return cls.name or cls.__name__.lower() 34 | 35 | def __repr__(self) -> str: 36 | """ 37 | Return string representation of the shape. 38 | 39 | Returns 40 | ------- 41 | str 42 | The unambiguous string representation of the shape. 43 | """ 44 | return self._recursive_repr() 45 | 46 | def __str__(self) -> str: 47 | """ 48 | Return string representation of the shape. 49 | 50 | Returns 51 | ------- 52 | str 53 | The human-readable string representation of the shape. 54 | 55 | See Also 56 | -------- 57 | get_name : This calls the :meth:`.get_name` class method. 58 | """ 59 | return self.get_name() 60 | 61 | @abstractmethod 62 | def distance(self, x: Number, y: Number) -> float: 63 | """ 64 | Calculate the distance between this shape and a point (x, y). 65 | 66 | Parameters 67 | ---------- 68 | x, y : numbers.Number 69 | Coordinates of a point in 2D space. 70 | 71 | Returns 72 | ------- 73 | float 74 | The distance between this shape and the point (x, y). 75 | """ 76 | raise NotImplementedError 77 | 78 | @staticmethod 79 | def _euclidean_distance(a: Iterable[Number], b: Iterable[Number]) -> float: 80 | """ 81 | Calculate the Euclidean distance between points a and b. 82 | 83 | Parameters 84 | ---------- 85 | a, b : Iterable[numbers.Number] 86 | Coordinates of points in two-dimensional space. 87 | 88 | Returns 89 | ------- 90 | float 91 | The Euclidean distance between a and b. 92 | 93 | See Also 94 | -------- 95 | numpy.linalg.norm : Euclidean distance calculation. 96 | """ 97 | return np.linalg.norm(a - b) 98 | 99 | def _recursive_repr(self, attr: str | None = None) -> str: 100 | """ 101 | Return string representation of the shape incorporating 102 | any items inside a specific attribute. 103 | 104 | Parameters 105 | ---------- 106 | attr : str, optional 107 | The attribute to incorporate into the result; must 108 | be iterable. 109 | 110 | Returns 111 | ------- 112 | str 113 | The unambiguous string representation of the shape. 114 | """ 115 | value = f'<{self.__class__.__name__}>' 116 | if not attr: 117 | return value 118 | 119 | indented_line = '\n ' 120 | offset = len(attr) + 4 121 | hanging_indent = f'{indented_line:<{offset}}' 122 | return ( 123 | value 124 | + f'{indented_line}{attr}={hanging_indent}' 125 | + f'{hanging_indent}'.join(repr(item) for item in getattr(self, attr)) 126 | ) 127 | 128 | @abstractmethod 129 | def plot(self, ax: Axes | None = None) -> Axes: 130 | """ 131 | Plot the shape. 132 | 133 | Parameters 134 | ---------- 135 | ax : matplotlib.axes.Axes, optional 136 | An optional :class:`~matplotlib.axes.Axes` object to plot on. 137 | 138 | Returns 139 | ------- 140 | matplotlib.axes.Axes 141 | The :class:`~matplotlib.axes.Axes` object containing the plot. 142 | 143 | Notes 144 | ----- 145 | When implementing this method for subclasses, make sure to apply the 146 | :func:`.plotting.style.plot_with_custom_style` decorator. 147 | """ 148 | raise NotImplementedError 149 | -------------------------------------------------------------------------------- /tests/plotting/test_animation.py: -------------------------------------------------------------------------------- 1 | """Test the animation module.""" 2 | 3 | from contextlib import suppress 4 | 5 | import numpy as np 6 | import pytest 7 | from PIL import Image 8 | 9 | from data_morph.plotting import animation 10 | from data_morph.plotting.animation import stitch_gif_animation 11 | from data_morph.plotting.static import plot 12 | 13 | pytestmark = pytest.mark.plotting 14 | 15 | 16 | @pytest.mark.parametrize('forward_only', [True, False]) 17 | def test_frame_stitching(sample_data, tmp_path, forward_only): 18 | """Test stitching frames into a GIF animation.""" 19 | start_shape = 'sample' 20 | target_shape = 'circle' 21 | bounds = [-5, 105] 22 | frame_numbers = list(range(10)) 23 | rng = np.random.default_rng() 24 | 25 | for frame in frame_numbers: 26 | plot( 27 | data=sample_data + rng.standard_normal(), 28 | x_bounds=bounds, 29 | y_bounds=bounds, 30 | save_to=(tmp_path / f'{start_shape}-to-{target_shape}-{frame}.png'), 31 | decimals=2, 32 | with_median=False, 33 | marginals=None, 34 | ) 35 | 36 | duration_multipliers = [0, 0, 0, 0, 1, 1, *frame_numbers[2:], frame_numbers[-1]] 37 | stitch_gif_animation( 38 | output_dir=tmp_path, 39 | start_shape=start_shape, 40 | target_shape=target_shape, 41 | frame_numbers=duration_multipliers, 42 | keep_frames=False, 43 | forward_only_animation=forward_only, 44 | ) 45 | 46 | animation_file = tmp_path / f'{start_shape}_to_{target_shape}.gif' 47 | assert animation_file.is_file() 48 | assert not (tmp_path / f'{start_shape}-to-{target_shape}-{frame}.png').is_file() 49 | 50 | with Image.open(animation_file) as img: 51 | # we subtract one when playing in reverse as well because the middle frame (last 52 | # in the forward direction) is combined into a single frame with the start of the 53 | # reversal as part of PIL's optimization 54 | assert img.n_frames == ( 55 | len(frame_numbers) if forward_only else len(frame_numbers) * 2 - 1 56 | ) 57 | for frame in range(len(frame_numbers)): 58 | with suppress(KeyError): 59 | # if we play in reverse, the midpoint will have double duration since 60 | # those two frames are combined 61 | rewind_multiplier = ( 62 | 2 if not forward_only and frame == len(frame_numbers) - 1 else 1 63 | ) 64 | # duration only seems to be present on frames where it is different 65 | if frame_duration := img.info['duration']: 66 | assert ( 67 | frame_duration 68 | == duration_multipliers.count(frame) * 5 * rewind_multiplier 69 | ) 70 | with suppress(EOFError): 71 | # move to the next frame 72 | img.seek(img.tell() + 1) 73 | 74 | 75 | @pytest.mark.parametrize( 76 | ('ease_function', 'step', 'expected'), 77 | [ 78 | ('linear', 0.1, 0.1), 79 | ('linear', 0.5, 0.5), 80 | ('linear', 0.9, 0.9), 81 | ('ease_in_sine', 0.1, 0.012312), 82 | ('ease_in_sine', 0.5, 0.292893), 83 | ('ease_in_sine', 0.9, 0.843566), 84 | ('ease_out_sine', 0.1, 0.156434), 85 | ('ease_out_sine', 0.5, 0.707107), 86 | ('ease_out_sine', 0.9, 0.987688), 87 | ('ease_in_out_sine', 0.1, 0.024472), 88 | ('ease_in_out_sine', 0.5, 0.5), 89 | ('ease_in_out_sine', 0.9, 0.975528), 90 | ('ease_in_out_quadratic', 0.1, 0.02), 91 | ('ease_in_out_quadratic', 0.5, 0.5), 92 | ('ease_in_out_quadratic', 0.9, 0.98), 93 | ], 94 | ) 95 | def test_easing_functions(ease_function, step, expected): 96 | """Test that easing functions return expected values.""" 97 | ease_func = getattr(animation, ease_function) 98 | assert round(ease_func(step), ndigits=6) == expected 99 | 100 | 101 | @pytest.mark.parametrize( 102 | 'invalid_step', 103 | [ 104 | 'string', 105 | -1, 106 | 2, 107 | ], 108 | ) 109 | @pytest.mark.parametrize( 110 | 'ease_function', 111 | [ 112 | 'linear', 113 | 'ease_in_sine', 114 | 'ease_out_sine', 115 | 'ease_in_out_sine', 116 | 'ease_in_out_quadratic', 117 | ], 118 | ) 119 | def test_invalid_easing_step(ease_function, invalid_step): 120 | """Test that an invalid step type will produce a ValueError when passed to an easing function.""" 121 | ease_func = getattr(animation, ease_function) 122 | 123 | with pytest.raises( 124 | ValueError, match='Step must be an integer or float, between 0 and 1' 125 | ): 126 | ease_func(invalid_step) 127 | -------------------------------------------------------------------------------- /src/data_morph/shapes/factory.py: -------------------------------------------------------------------------------- 1 | """Factory class for generating shape objects.""" 2 | 3 | from itertools import zip_longest 4 | from numbers import Number 5 | from typing import ClassVar 6 | 7 | import matplotlib.pyplot as plt 8 | import numpy as np 9 | from matplotlib.axes import Axes 10 | 11 | from ..data.dataset import Dataset 12 | from ..plotting.style import plot_with_custom_style 13 | from .bases.shape import Shape 14 | from .circles import Bullseye, Circle, Rings 15 | from .lines import ( 16 | Diamond, 17 | HighLines, 18 | HorizontalLines, 19 | Rectangle, 20 | SlantDownLines, 21 | SlantUpLines, 22 | Star, 23 | VerticalLines, 24 | WideLines, 25 | XLines, 26 | ) 27 | from .points import ( 28 | Club, 29 | DotsGrid, 30 | DownParabola, 31 | FigureEight, 32 | Heart, 33 | LeftParabola, 34 | RightParabola, 35 | Scatter, 36 | Spade, 37 | Spiral, 38 | UpParabola, 39 | ) 40 | 41 | 42 | class ShapeFactory: 43 | """ 44 | Factory for generating shape objects based on data. 45 | 46 | .. plot:: 47 | :caption: 48 | Target shapes currently available. 49 | 50 | from data_morph.data.loader import DataLoader 51 | from data_morph.shapes.factory import ShapeFactory 52 | 53 | dataset = DataLoader.load_dataset('panda') 54 | _ = ShapeFactory(dataset).plot_available_shapes() 55 | 56 | Parameters 57 | ---------- 58 | dataset : Dataset 59 | The starting dataset to morph into other shapes. 60 | """ 61 | 62 | _SHAPE_CLASSES: tuple[type[Shape]] = ( 63 | Bullseye, 64 | Circle, 65 | Club, 66 | Diamond, 67 | DotsGrid, 68 | DownParabola, 69 | FigureEight, 70 | Heart, 71 | HighLines, 72 | HorizontalLines, 73 | LeftParabola, 74 | Rectangle, 75 | RightParabola, 76 | Rings, 77 | Scatter, 78 | SlantDownLines, 79 | SlantUpLines, 80 | Spade, 81 | Spiral, 82 | Star, 83 | UpParabola, 84 | VerticalLines, 85 | WideLines, 86 | XLines, 87 | ) 88 | """New shape classes must be registered here.""" 89 | 90 | _SHAPE_MAPPING: ClassVar[dict[str, type[Shape]]] = { 91 | shape_cls.get_name(): shape_cls for shape_cls in _SHAPE_CLASSES 92 | } 93 | """Mapping of shape display names to classes.""" 94 | 95 | AVAILABLE_SHAPES: list[str] = sorted(_SHAPE_MAPPING.keys()) 96 | """The list of available shapes, which can be visualized with 97 | :meth:`.plot_available_shapes`.""" 98 | 99 | def __init__(self, dataset: Dataset) -> None: 100 | self._dataset: Dataset = dataset 101 | 102 | def generate_shape(self, shape: str, **kwargs: Number) -> Shape: 103 | """ 104 | Generate the shape object based on the dataset. 105 | 106 | Parameters 107 | ---------- 108 | shape : str 109 | The desired shape. See :attr:`.AVAILABLE_SHAPES`. 110 | **kwargs 111 | Additional keyword arguments to pass down when creating 112 | the shape. 113 | 114 | Returns 115 | ------- 116 | Shape 117 | An shape object of the requested type. 118 | """ 119 | try: 120 | return self._SHAPE_MAPPING[shape](self._dataset, **kwargs) 121 | except KeyError as err: 122 | raise ValueError(f'No such shape as {shape}.') from err 123 | 124 | @plot_with_custom_style 125 | def plot_available_shapes(self) -> Axes: 126 | """ 127 | Plot the available target shapes. 128 | 129 | Returns 130 | ------- 131 | matplotlib.axes.Axes 132 | The :class:`~matplotlib.axes.Axes` object containing the plot. 133 | 134 | See Also 135 | -------- 136 | AVAILABLE_SHAPES 137 | The list of available shapes. 138 | """ 139 | num_cols = 6 140 | num_plots = len(self.AVAILABLE_SHAPES) 141 | num_rows = int(np.ceil(num_plots / num_cols)) 142 | 143 | fig, axs = plt.subplots( 144 | num_rows, 145 | num_cols, 146 | layout='constrained', 147 | figsize=(2 * num_cols, 2 * num_rows), 148 | ) 149 | fig.get_layout_engine().set(w_pad=0.2, h_pad=0.2) 150 | 151 | for shape, ax in zip_longest(self.AVAILABLE_SHAPES, axs.flatten()): 152 | if shape: 153 | ax.tick_params( 154 | axis='both', 155 | which='both', 156 | bottom=False, 157 | left=False, 158 | right=False, 159 | labelbottom=False, 160 | labelleft=False, 161 | ) 162 | shape_obj = self.generate_shape(shape) 163 | ax = shape_obj.plot(ax=ax).set( 164 | xlabel='', ylabel='', title=str(shape_obj) 165 | ) 166 | else: 167 | ax.remove() 168 | return axs 169 | -------------------------------------------------------------------------------- /docs/tutorials/custom-datasets.rst: -------------------------------------------------------------------------------- 1 | Custom Datasets 2 | =============== 3 | 4 | This tutorial provides guidance on how to move from an idea for a custom dataset to 5 | an input dataset for morphing. 6 | 7 | .. contents:: Steps 8 | :depth: 2 9 | :local: 10 | :backlinks: none 11 | 12 | ---- 13 | 14 | Generate points 15 | --------------- 16 | 17 | Below are some ways to create an original starter dataset. Each method will 18 | yield some (x, y) points, which may be in web browser coordinates or Cartesian 19 | coordinates. Save these to a text file called ``points.txt`` for processing in the 20 | :ref:`next step `. 21 | 22 | .. note:: 23 | All tools included in this section are for reference only; 24 | this is not an endorsement. 25 | 26 | 27 | Drawing a shape 28 | ~~~~~~~~~~~~~~~ 29 | 30 | If you have a shape in mind or plan to trace one, you can use a tool like the 31 | following to create (x, y) points by free-hand drawing or tracing an image: 32 | 33 | * Trace an image with `Mobilefish.com`_ (web browser coordinates) 34 | * Draw an image with `DrawMyData`_ (Cartesian coordinates) 35 | 36 | .. _DrawMyData: http://robertgrantstats.co.uk/drawmydata.html 37 | .. _Mobilefish.com: https://www.mobilefish.com/services/record_mouse_coordinates/record_mouse_coordinates.php 38 | 39 | 40 | Using an SVG image 41 | ~~~~~~~~~~~~~~~~~~ 42 | 43 | If you are starting from an SVG image, you can use a tool like `PathToPoints`_ 44 | to generate points (in web browser coordinates) from the paths in the SVG file. 45 | Depending on the starting image, you may want to `crop`_ and/or `remove whitespace`_ 46 | from the SVG file before generating the points. Note that the linked tools are just 47 | examples; make sure to look for the tool that works for your use case. 48 | 49 | .. _crop: https://msurguy.github.io/svg-cropper-tool/ 50 | .. _remove whitespace: https://svgcrop.com/ 51 | .. _PathToPoints: https://shinao.github.io/PathToPoints/ 52 | 53 | 54 | Create a CSV file in Cartesian coordinates 55 | ------------------------------------------ 56 | 57 | Depending on the tool you use to generate your points in the previous step, 58 | your points may be in the web browser coordinate system, in which case they 59 | will appear upside-down unless we flip them. Use the following code to convert 60 | the points into Cartesian coordinates (if necessary), save to a CSV file for 61 | morphing, and plot it: 62 | 63 | .. code:: python 64 | 65 | import pandas as pd 66 | import matplotlib.pyplot as plt 67 | 68 | 69 | # whether the points are in web browser coordinates 70 | browser_coordinates = True 71 | 72 | with open('points.txt') as file: 73 | points = pd.DataFrame( 74 | [tuple(map(float, line.split(','))) for line in file.readlines()], 75 | columns=['x', 'y'], 76 | ) 77 | 78 | if browser_coordinates: 79 | # reflect points over the x-axis (web browser coordinates only) 80 | points = points.assign(y=lambda df: -df.y) 81 | 82 | points.to_csv('points.csv', index=False) 83 | 84 | points.plot(kind='scatter', x='x', y='y', color='black', s=1).axis('equal') 85 | plt.show() 86 | 87 | .. note:: 88 | While Data Morph provides a scaling option, consider scaling the data when 89 | creating your CSV file to save some typing later. For example, you can divide 90 | all values by 10 to scale down by a factor of 10. This makes morphing faster. 91 | 92 | Likewise, you can shift the data in the x/y direction at this step, although 93 | this is purely aesthetic. 94 | 95 | 96 | Morph the data 97 | -------------- 98 | Pass the path to the CSV file to use those points as the starting shape: 99 | 100 | .. code:: console 101 | 102 | $ data-morph --start path/to/points.csv --target wide_lines --classic 103 | 104 | Here is an example animation generated from a custom dataset: 105 | 106 | .. figure:: ../_static/tutorials/easter-egg-to-wide-lines.gif 107 | :alt: Congratulations, you've found the Easter egg! 108 | :align: center 109 | 110 | Congratulations, you've found the Easter egg! 111 | 112 | 113 | (Optional) Contribute the dataset 114 | --------------------------------- 115 | 116 | If you have the rights to distribute the dataset and you think it would 117 | be a good built-in dataset, `create an issue `_ 118 | in the Data Morph repository proposing its inclusion. Be sure to consult the 119 | `contributing guidelines `_ 120 | before doing so. 121 | 122 | If and only if you are given the go ahead: 123 | 124 | 1. Add your CSV file to the ``src/data_morph/data/starter_shapes/`` directory. 125 | 2. Add an entry to the ``DataLoader._DATASETS`` dictionary in ``src/data_morph/data/loader.py``. 126 | 3. Submit your pull request. 127 | 128 | .. note:: 129 | For inclusion in Data Morph, the proposed dataset must work with more 130 | than one shape. You can pass ``all`` as the target shape to generate all 131 | options for inspection: 132 | 133 | .. code:: console 134 | 135 | $ data-morph --start path/to/points.csv --target all 136 | -------------------------------------------------------------------------------- /src/data_morph/data/starter_shapes/pi.csv: -------------------------------------------------------------------------------- 1 | x,y 2 | 13.0,-44.0 3 | 9.0,-48.0 4 | 6.0,-54.0 5 | 4.0,-62.0 6 | 5.0,-67.0 7 | 9.0,-68.0 8 | 17.0,-66.0 9 | 22.0,-62.0 10 | 29.0,-56.0 11 | 33.0,-51.0 12 | 37.0,-46.0 13 | 45.0,-45.0 14 | 51.0,-44.0 15 | 58.0,-44.0 16 | 59.0,-50.0 17 | 60.0,-58.0 18 | 60.0,-64.0 19 | 57.0,-74.0 20 | 57.0,-79.0 21 | 57.0,-91.0 22 | 56.0,-97.0 23 | 54.0,-111.0 24 | 52.0,-118.0 25 | 51.0,-125.0 26 | 50.0,-129.0 27 | 46.0,-142.0 28 | 43.0,-147.0 29 | 38.0,-156.0 30 | 32.0,-164.0 31 | 32.0,-164.0 32 | 25.0,-176.0 33 | 22.0,-184.0 34 | 27.0,-194.0 35 | 29.0,-198.0 36 | 37.0,-199.0 37 | 43.0,-203.0 38 | 53.0,-200.0 39 | 57.0,-198.0 40 | 63.0,-191.0 41 | 66.0,-184.0 42 | 68.0,-180.0 43 | 72.0,-169.0 44 | 75.0,-157.0 45 | 76.0,-147.0 46 | 78.0,-135.0 47 | 78.0,-121.0 48 | 81.0,-110.0 49 | 81.0,-98.0 50 | 82.0,-88.0 51 | 85.0,-75.0 52 | 85.0,-68.0 53 | 87.0,-56.0 54 | 87.0,-50.0 55 | 88.0,-44.0 56 | 94.0,-44.0 57 | 101.0,-43.0 58 | 110.0,-43.0 59 | 115.0,-43.0 60 | 125.0,-42.0 61 | 132.0,-45.0 62 | 131.0,-51.0 63 | 129.0,-63.0 64 | 128.0,-70.0 65 | 126.0,-80.0 66 | 124.0,-94.0 67 | 124.0,-103.0 68 | 124.0,-118.0 69 | 122.0,-129.0 70 | 123.0,-143.0 71 | 125.0,-155.0 72 | 127.0,-169.0 73 | 129.0,-180.0 74 | 134.0,-191.0 75 | 139.0,-196.0 76 | 147.0,-203.0 77 | 156.0,-204.0 78 | 169.0,-204.0 79 | 183.0,-200.0 80 | 188.0,-194.0 81 | 195.0,-186.0 82 | 201.0,-178.0 83 | 204.0,-168.0 84 | 207.0,-156.0 85 | 209.0,-147.0 86 | 205.0,-138.0 87 | 199.0,-140.0 88 | 194.0,-147.0 89 | 189.0,-157.0 90 | 184.0,-159.0 91 | 170.0,-162.0 92 | 162.0,-153.0 93 | 155.0,-143.0 94 | 156.0,-130.0 95 | 153.0,-117.0 96 | 155.0,-106.0 97 | 155.0,-94.0 98 | 155.0,-80.0 99 | 159.0,-70.0 100 | 157.0,-57.0 101 | 161.0,-47.0 102 | 165.0,-42.0 103 | 173.0,-42.0 104 | 180.0,-41.0 105 | 188.0,-42.0 106 | 199.0,-40.0 107 | 206.0,-39.0 108 | 208.0,-35.0 109 | 209.0,-31.0 110 | 208.0,-21.0 111 | 208.0,-13.0 112 | 207.0,-7.0 113 | 200.0,-5.0 114 | 193.0,-5.0 115 | 182.0,-6.0 116 | 175.0,-6.0 117 | 162.0,-7.0 118 | 141.0,-8.0 119 | 124.0,-6.0 120 | 107.0,-8.0 121 | 87.0,-6.0 122 | 71.0,-7.0 123 | 57.0,-7.0 124 | 45.0,-9.0 125 | 36.0,-13.0 126 | 33.0,-15.0 127 | 27.0,-20.0 128 | 21.0,-25.0 129 | 16.0,-33.0 130 | 15.0,-36.0 131 | 14.0,-56.0 132 | 20.0,-50.0 133 | 23.0,-39.0 134 | 30.0,-46.0 135 | 33.0,-29.0 136 | 39.0,-40.0 137 | 46.0,-23.0 138 | 51.0,-33.0 139 | 60.0,-17.0 140 | 67.0,-35.0 141 | 68.0,-23.0 142 | 54.0,-29.0 143 | 39.0,-30.0 144 | 75.0,-18.0 145 | 75.0,-33.0 146 | 72.0,-39.0 147 | 81.0,-36.0 148 | 87.0,-22.0 149 | 91.0,-15.0 150 | 99.0,-18.0 151 | 95.0,-32.0 152 | 94.0,-34.0 153 | 101.0,-37.0 154 | 107.0,-33.0 155 | 113.0,-27.0 156 | 111.0,-21.0 157 | 118.0,-16.0 158 | 124.0,-23.0 159 | 124.0,-28.0 160 | 125.0,-33.0 161 | 134.0,-35.0 162 | 136.0,-29.0 163 | 139.0,-21.0 164 | 130.0,-20.0 165 | 118.0,-35.0 166 | 116.0,-23.0 167 | 103.0,-27.0 168 | 97.0,-24.0 169 | 84.0,-29.0 170 | 125.0,-15.0 171 | 139.0,-16.0 172 | 150.0,-16.0 173 | 155.0,-17.0 174 | 147.0,-27.0 175 | 155.0,-30.0 176 | 147.0,-36.0 177 | 164.0,-23.0 178 | 164.0,-23.0 179 | 174.0,-19.0 180 | 189.0,-21.0 181 | 180.0,-27.0 182 | 194.0,-32.0 183 | 201.0,-12.0 184 | 175.0,-29.0 185 | 163.0,-33.0 186 | 157.0,-38.0 187 | 147.0,-47.0 188 | 139.0,-50.0 189 | 143.0,-60.0 190 | 135.0,-69.0 191 | 142.0,-74.0 192 | 140.0,-87.0 193 | 146.0,-91.0 194 | 136.0,-99.0 195 | 135.0,-110.0 196 | 139.0,-119.0 197 | 144.0,-117.0 198 | 146.0,-105.0 199 | 147.0,-97.0 200 | 130.0,-117.0 201 | 131.0,-129.0 202 | 132.0,-134.0 203 | 140.0,-135.0 204 | 142.0,-130.0 205 | 146.0,-124.0 206 | 149.0,-138.0 207 | 132.0,-147.0 208 | 130.0,-141.0 209 | 142.0,-147.0 210 | 151.0,-150.0 211 | 142.0,-160.0 212 | 138.0,-155.0 213 | 133.0,-160.0 214 | 135.0,-169.0 215 | 147.0,-165.0 216 | 151.0,-156.0 217 | 160.0,-160.0 218 | 160.0,-167.0 219 | 157.0,-173.0 220 | 148.0,-172.0 221 | 143.0,-180.0 222 | 136.0,-179.0 223 | 143.0,-188.0 224 | 152.0,-189.0 225 | 148.0,-198.0 226 | 160.0,-199.0 227 | 158.0,-190.0 228 | 152.0,-180.0 229 | 162.0,-181.0 230 | 170.0,-170.0 231 | 170.0,-179.0 232 | 183.0,-169.0 233 | 192.0,-165.0 234 | 198.0,-155.0 235 | 202.0,-145.0 236 | 199.0,-165.0 237 | 191.0,-176.0 238 | 190.0,-183.0 239 | 184.0,-186.0 240 | 179.0,-179.0 241 | 177.0,-171.0 242 | 177.0,-184.0 243 | 168.0,-188.0 244 | 166.0,-194.0 245 | 172.0,-196.0 246 | 178.0,-193.0 247 | 66.0,-46.0 248 | 75.0,-48.0 249 | 70.0,-59.0 250 | 66.0,-66.0 251 | 72.0,-73.0 252 | 75.0,-66.0 253 | 78.0,-54.0 254 | 69.0,-53.0 255 | 65.0,-72.0 256 | 67.0,-84.0 257 | 71.0,-87.0 258 | 77.0,-78.0 259 | 79.0,-66.0 260 | 76.0,-92.0 261 | 67.0,-99.0 262 | 63.0,-92.0 263 | 75.0,-102.0 264 | 66.0,-105.0 265 | 61.0,-102.0 266 | 61.0,-114.0 267 | 69.0,-116.0 268 | 73.0,-107.0 269 | 75.0,-117.0 270 | 68.0,-124.0 271 | 60.0,-119.0 272 | 60.0,-124.0 273 | 58.0,-130.0 274 | 64.0,-133.0 275 | 74.0,-133.0 276 | 69.0,-140.0 277 | 61.0,-141.0 278 | 54.0,-136.0 279 | 50.0,-135.0 280 | 54.0,-145.0 281 | 62.0,-151.0 282 | 69.0,-149.0 283 | 68.0,-157.0 284 | 58.0,-158.0 285 | 51.0,-151.0 286 | 47.0,-155.0 287 | 46.0,-160.0 288 | 50.0,-165.0 289 | 54.0,-167.0 290 | 63.0,-167.0 291 | 68.0,-162.0 292 | 60.0,-176.0 293 | 48.0,-176.0 294 | 42.0,-170.0 295 | 42.0,-164.0 296 | 38.0,-176.0 297 | 33.0,-180.0 298 | 40.0,-188.0 299 | 51.0,-186.0 300 | 59.0,-183.0 301 | 45.0,-180.0 302 | 32.0,-186.0 303 | 35.0,-195.0 304 | 46.0,-195.0 305 | 53.0,-191.0 306 | -------------------------------------------------------------------------------- /src/data_morph/shapes/points/parabola.py: -------------------------------------------------------------------------------- 1 | """Parabola shapes.""" 2 | 3 | import numpy as np 4 | 5 | from ...data.dataset import Dataset 6 | from ..bases.point_collection import PointCollection 7 | 8 | 9 | class DownParabola(PointCollection): 10 | """ 11 | Class for the down parabola shape. 12 | 13 | .. plot:: 14 | :scale: 75 15 | :caption: 16 | This shape is generated using the panda dataset. 17 | 18 | from data_morph.data.loader import DataLoader 19 | from data_morph.plotting.diagnostics import plot_shape_on_dataset 20 | from data_morph.shapes.points import DownParabola 21 | 22 | dataset = DataLoader.load_dataset('panda') 23 | shape = DownParabola(dataset) 24 | plot_shape_on_dataset(dataset, shape, show_bounds=False, alpha=0.25) 25 | 26 | Parameters 27 | ---------- 28 | dataset : Dataset 29 | The starting dataset to morph into other shapes. 30 | """ 31 | 32 | name = 'down_parab' 33 | 34 | def __init__(self, dataset: Dataset) -> None: 35 | x_bounds, (ymin, ymax) = dataset.data_bounds 36 | xmin, xmax = x_bounds 37 | 38 | x_offset = x_bounds.range / 10 39 | xmin += x_offset 40 | xmax -= x_offset 41 | 42 | poly = np.polynomial.Polynomial.fit( 43 | [xmin, x_bounds.center, xmax], [ymin, ymax, ymin], 2 44 | ) 45 | 46 | super().__init__(*np.stack(poly.linspace(), axis=1)) 47 | 48 | 49 | class LeftParabola(PointCollection): 50 | """ 51 | Class for the left parabola shape. 52 | 53 | .. plot:: 54 | :scale: 75 55 | :caption: 56 | This shape is generated using the panda dataset. 57 | 58 | from data_morph.data.loader import DataLoader 59 | from data_morph.plotting.diagnostics import plot_shape_on_dataset 60 | from data_morph.shapes.points import LeftParabola 61 | 62 | dataset = DataLoader.load_dataset('panda') 63 | shape = LeftParabola(dataset) 64 | plot_shape_on_dataset(dataset, shape, show_bounds=False, alpha=0.25) 65 | 66 | Parameters 67 | ---------- 68 | dataset : Dataset 69 | The starting dataset to morph into other shapes. 70 | """ 71 | 72 | name = 'left_parab' 73 | 74 | def __init__(self, dataset: Dataset) -> None: 75 | (xmin, xmax), y_bounds = dataset.data_bounds 76 | ymin, ymax = y_bounds 77 | 78 | y_offset = y_bounds.range / 10 79 | ymin += y_offset 80 | ymax -= y_offset 81 | 82 | poly = np.polynomial.Polynomial.fit( 83 | [ymin, y_bounds.center, ymax], [xmin, xmax, xmin], 2 84 | ) 85 | 86 | super().__init__(*np.stack(poly.linspace()[::-1], axis=1)) 87 | 88 | 89 | class RightParabola(PointCollection): 90 | """ 91 | Class for the right parabola shape. 92 | 93 | .. plot:: 94 | :scale: 75 95 | :caption: 96 | This shape is generated using the panda dataset. 97 | 98 | from data_morph.data.loader import DataLoader 99 | from data_morph.plotting.diagnostics import plot_shape_on_dataset 100 | from data_morph.shapes.points import RightParabola 101 | 102 | dataset = DataLoader.load_dataset('panda') 103 | shape = RightParabola(dataset) 104 | plot_shape_on_dataset(dataset, shape, show_bounds=False, alpha=0.25) 105 | 106 | Parameters 107 | ---------- 108 | dataset : Dataset 109 | The starting dataset to morph into other shapes. 110 | """ 111 | 112 | name = 'right_parab' 113 | 114 | def __init__(self, dataset: Dataset) -> None: 115 | (xmin, xmax), y_bounds = dataset.data_bounds 116 | ymin, ymax = y_bounds 117 | 118 | y_offset = y_bounds.range / 10 119 | ymin += y_offset 120 | ymax -= y_offset 121 | 122 | poly = np.polynomial.Polynomial.fit( 123 | [ymin, y_bounds.center, ymax], [xmax, xmin, xmax], 2 124 | ) 125 | 126 | super().__init__(*np.stack(poly.linspace()[::-1], axis=1)) 127 | 128 | 129 | class UpParabola(PointCollection): 130 | """ 131 | Class for the up parabola shape. 132 | 133 | .. plot:: 134 | :scale: 75 135 | :caption: 136 | This shape is generated using the panda dataset. 137 | 138 | from data_morph.data.loader import DataLoader 139 | from data_morph.plotting.diagnostics import plot_shape_on_dataset 140 | from data_morph.shapes.points import UpParabola 141 | 142 | dataset = DataLoader.load_dataset('panda') 143 | shape = UpParabola(dataset) 144 | plot_shape_on_dataset(dataset, shape, show_bounds=False, alpha=0.25) 145 | 146 | Parameters 147 | ---------- 148 | dataset : Dataset 149 | The starting dataset to morph into other shapes. 150 | """ 151 | 152 | name = 'up_parab' 153 | 154 | def __init__(self, dataset: Dataset) -> None: 155 | x_bounds, (ymin, ymax) = dataset.data_bounds 156 | xmin, xmax = x_bounds 157 | 158 | x_offset = x_bounds.range / 10 159 | xmin += x_offset 160 | xmax -= x_offset 161 | 162 | poly = np.polynomial.Polynomial.fit( 163 | [xmin, x_bounds.center, xmax], [ymax, ymin, ymax], 2 164 | ) 165 | 166 | super().__init__(*np.stack(poly.linspace(), axis=1)) 167 | -------------------------------------------------------------------------------- /src/data_morph/shapes/points/club.py: -------------------------------------------------------------------------------- 1 | """Club shape.""" 2 | 3 | from numbers import Number 4 | 5 | import numpy as np 6 | 7 | from ...data.dataset import Dataset 8 | from ..bases.point_collection import PointCollection 9 | 10 | 11 | class Club(PointCollection): 12 | """ 13 | Class for the club shape. 14 | 15 | .. plot:: 16 | :scale: 75 17 | :caption: 18 | This shape is generated using the panda dataset. 19 | 20 | from data_morph.data.loader import DataLoader 21 | from data_morph.plotting.diagnostics import plot_shape_on_dataset 22 | from data_morph.shapes.points import Club 23 | 24 | dataset = DataLoader.load_dataset('panda') 25 | shape = Club(dataset) 26 | plot_shape_on_dataset(dataset, shape, show_bounds=False, alpha=0.25) 27 | 28 | Parameters 29 | ---------- 30 | dataset : Dataset 31 | The starting dataset to morph into other shapes. 32 | """ 33 | 34 | def __init__(self, dataset: Dataset) -> None: 35 | scale_factor = min(*dataset.data_bounds.range) / 75 36 | 37 | x_lobes, y_lobes = self._get_lobes(scale_factor) 38 | x_stem, y_stem = self._get_stem(scale_factor) 39 | 40 | x_center, y_center = dataset.data_bounds.center 41 | xs = x_center + np.concatenate(x_lobes + x_stem) 42 | ys = y_center + np.concatenate(y_lobes + y_stem) 43 | 44 | super().__init__(*np.stack([xs, ys], axis=1)) 45 | 46 | @staticmethod 47 | def _get_arc( 48 | r: Number, 49 | t: np.ndarray, 50 | angle_offset: np.float64, 51 | x_offset: Number, 52 | y_offset: Number, 53 | ) -> tuple[np.ndarray, np.ndarray]: 54 | """ 55 | Get arc of a circle. 56 | 57 | Parameters 58 | ---------- 59 | r : Number 60 | The radius of the circle. 61 | t : numpy.ndarray 62 | The values to sample at in radians. 63 | angle_offset : numpy.float64 64 | Angle at which to start the arc in radians. 65 | x_offset : Number 66 | A constant value to shift the *x* coordinates by. 67 | y_offset : Number 68 | A constant value to shift the *y* coordinates by. 69 | 70 | Returns 71 | ------- 72 | tuple[numpy.ndarray, numpy.ndarray] 73 | The *x* and *y* coordinates for the arc. 74 | """ 75 | x = r * np.cos(t + angle_offset) + x_offset 76 | y = r * np.sin(t + angle_offset) + y_offset 77 | return x, y 78 | 79 | @classmethod 80 | def _get_lobes( 81 | cls, scale_factor: Number 82 | ) -> tuple[list[np.ndarray], list[np.ndarray]]: 83 | """ 84 | Get the lobes of the club. 85 | 86 | Parameters 87 | ---------- 88 | scale_factor : Number 89 | The factor to scale up/down the radius of the arcs used to calculate the lobes. 90 | 91 | Returns 92 | ------- 93 | tuple[list[numpy.ndarray], list[numpy.ndarray]] 94 | The *x* and *y* coordinates for the lobes. 95 | """ 96 | radius = 15 * scale_factor 97 | top_lobe_y_offset = 18 * scale_factor 98 | bottom_lobes_x_offset = 15 * scale_factor 99 | bottom_lobes_y_offset = 9 * scale_factor 100 | 101 | t = np.linspace(0, (2 - 1 / 3) * np.pi, num=30) 102 | 103 | x_top, y_top = cls._get_arc(radius, t, -np.pi / 3, 0, top_lobe_y_offset) 104 | x_bottom_left, y_bottom_left = cls._get_arc( 105 | radius, t, np.pi / 3, -bottom_lobes_x_offset, -bottom_lobes_y_offset 106 | ) 107 | x_bottom_right, y_bottom_right = cls._get_arc( 108 | radius, t, np.pi, bottom_lobes_x_offset, -bottom_lobes_y_offset 109 | ) 110 | 111 | x_lobes = [x_top, x_bottom_left, x_bottom_right] 112 | y_lobes = [y_top, y_bottom_left, y_bottom_right] 113 | 114 | return x_lobes, y_lobes 115 | 116 | @classmethod 117 | def _get_stem( 118 | cls, scale_factor: Number 119 | ) -> tuple[list[np.ndarray], list[np.ndarray]]: 120 | """ 121 | Get the stem of the club. 122 | 123 | Parameters 124 | ---------- 125 | scale_factor : Number 126 | The factor to scale up/down the stem. 127 | 128 | Returns 129 | ------- 130 | tuple[list[numpy.ndarray], list[numpy.ndarray]] 131 | The *x* and *y* coordinates for the stem. 132 | """ 133 | stem_x_offset = 8 * scale_factor 134 | stem_y_offset = 34 * scale_factor 135 | stem_scaler = 0.35 / scale_factor 136 | stem_x_pad = 1.5 * scale_factor 137 | 138 | # stem bottom 139 | x_line = np.linspace(-stem_x_offset, stem_x_offset, num=8) 140 | y_line = np.repeat(-stem_y_offset, 8) 141 | 142 | # left part of the stem 143 | x_left = np.linspace(-(stem_x_offset - stem_x_pad), -stem_x_pad, num=6) 144 | y_left = stem_scaler * np.power(x_left + stem_x_offset, 2) - stem_y_offset 145 | 146 | # right part of the stem 147 | x_right = np.linspace(stem_x_pad, stem_x_offset - stem_x_pad, num=6) 148 | y_right = stem_scaler * np.power(x_right - stem_x_offset, 2) - stem_y_offset 149 | 150 | x_stem = [x_line, x_left, x_right] 151 | y_stem = [y_line, y_left, y_right] 152 | 153 | return x_stem, y_stem 154 | -------------------------------------------------------------------------------- /src/data_morph/bounds/interval.py: -------------------------------------------------------------------------------- 1 | """Class for working with bounds.""" 2 | 3 | from collections.abc import Iterable 4 | from numbers import Number 5 | 6 | from ._utils import _validate_2d 7 | 8 | 9 | class Interval: 10 | """ 11 | Class representing a range of numeric values. 12 | 13 | Parameters 14 | ---------- 15 | bounds : Iterable[numbers.Number] 16 | A 2-dimensional numeric iterable. 17 | inclusive : bool, default ``False`` 18 | Whether the bounds include the endpoints. Default 19 | is exclusive. 20 | """ 21 | 22 | def __init__( 23 | self, 24 | bounds: Iterable[Number], 25 | inclusive: bool = False, 26 | ) -> None: 27 | self._bounds = self._validate_bounds(bounds) 28 | self._inclusive = inclusive 29 | 30 | def __contains__(self, value: Number) -> bool: 31 | """ 32 | Add support for using the ``in`` operator to check whether 33 | ``value`` is in the interval. 34 | 35 | Parameters 36 | ---------- 37 | value : numbers.Number 38 | A numeric value. 39 | 40 | Returns 41 | ------- 42 | bool 43 | Whether ``value`` is contained in the interval. 44 | """ 45 | if not isinstance(value, Number) or isinstance(value, bool) or value is None: 46 | raise TypeError('This operation is only supported for numeric values.') 47 | 48 | if self._inclusive: 49 | return self._bounds[0] <= value <= self._bounds[1] 50 | return self._bounds[0] < value < self._bounds[1] 51 | 52 | def __eq__(self, other: 'Interval') -> bool: 53 | """ 54 | Check whether two :class:`.Interval` objects are equivalent. 55 | 56 | Parameters 57 | ---------- 58 | other : Interval 59 | A :class:`.Interval` object. 60 | 61 | Returns 62 | ------- 63 | bool 64 | Whether the two :class:`.Interval` objects are equivalent. 65 | """ 66 | if not isinstance(other, Interval): 67 | raise TypeError('Equality is only defined between Interval objects.') 68 | return self._bounds == other._bounds and self._inclusive == other._inclusive 69 | 70 | def __getitem__(self, index: int) -> Number: 71 | """ 72 | Add support for indexing into the bounds. 73 | 74 | Parameters 75 | ---------- 76 | index : int 77 | The index to access. 78 | 79 | Returns 80 | ------- 81 | numbers.Number 82 | The value for the bounds at ``index``. 83 | """ 84 | return self._bounds[index] 85 | 86 | def __iter__(self) -> Number: 87 | """ 88 | Iterate over the bounds. 89 | 90 | Returns 91 | ------- 92 | numbers.Number 93 | The next value of the bounds. 94 | """ 95 | return iter(self._bounds) 96 | 97 | def __repr__(self) -> str: 98 | values = ', '.join(map(str, self._bounds)) 99 | if self._inclusive: 100 | interval = f'[{values}]' 101 | kind = 'inclusive' 102 | else: 103 | interval = f'({values})' 104 | kind = 'exclusive' 105 | return f'' 106 | 107 | def _validate_bounds(self, bounds: Iterable[Number]) -> Iterable[Number]: 108 | """ 109 | Validate the proposed bounds. 110 | 111 | Parameters 112 | ---------- 113 | bounds : Iterable[numbers.Number] 114 | An iterable of min/max bounds. 115 | 116 | Returns 117 | ------- 118 | Iterable[numbers.Number] 119 | An iterable of min/max bounds. 120 | """ 121 | bounds = list(_validate_2d(bounds, 'bounds')) 122 | 123 | if bounds[0] >= bounds[1]: 124 | raise ValueError('Right bound must be strictly greater than left bound.') 125 | return bounds 126 | 127 | def adjust_bounds(self, value: Number) -> None: 128 | """ 129 | Adjust bound range. 130 | 131 | Parameters 132 | ---------- 133 | value : numbers.Number 134 | The amount to change the range by (half will be applied to each end). 135 | """ 136 | if isinstance(value, bool) or not isinstance(value, Number) or value is None: 137 | raise TypeError('value must be a numeric value') 138 | if not value: 139 | raise ValueError('value must be non-zero') 140 | 141 | offset = value / 2 142 | self._bounds[0] -= offset 143 | self._bounds[1] += offset 144 | 145 | def clone(self) -> 'Interval': 146 | """ 147 | Clone this instance. 148 | 149 | Returns 150 | ------- 151 | Interval 152 | A new :class:`.Interval` instance with the same bounds. 153 | """ 154 | return Interval(self._bounds[:], self._inclusive) 155 | 156 | @property 157 | def range(self) -> Number: 158 | """ 159 | Calculate the range (width) of the interval. 160 | 161 | Returns 162 | ------- 163 | numbers.Number 164 | The range covered by the interval. 165 | """ 166 | return abs(self._bounds[1] - self._bounds[0]) 167 | 168 | @property 169 | def center(self) -> Number: 170 | """ 171 | Calculate the center of the interval. 172 | 173 | Returns 174 | ------- 175 | numbers.Number 176 | The center of the interval. 177 | """ 178 | return sum(self) / 2 179 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | We as members, contributors, and leaders pledge to make participation in our 6 | community a harassment-free experience for everyone, regardless of age, body 7 | size, visible or invisible disability, ethnicity, sex characteristics, gender 8 | identity and expression, level of experience, education, socio-economic status, 9 | nationality, personal appearance, race, religion, or sexual identity 10 | and orientation. 11 | 12 | We pledge to act and interact in ways that contribute to an open, welcoming, 13 | diverse, inclusive, and healthy community. 14 | 15 | ## Our Standards 16 | 17 | Examples of behavior that contributes to a positive environment for our 18 | community include: 19 | 20 | * Demonstrating empathy and kindness toward other people 21 | * Being respectful of differing opinions, viewpoints, and experiences 22 | * Giving and gracefully accepting constructive feedback 23 | * Accepting responsibility and apologizing to those affected by our mistakes, 24 | and learning from the experience 25 | * Focusing on what is best not just for us as individuals, but for the 26 | overall community 27 | 28 | Examples of unacceptable behavior include: 29 | 30 | * The use of sexualized language or imagery, and sexual attention or 31 | advances of any kind 32 | * Trolling, insulting or derogatory comments, and personal or political attacks 33 | * Public or private harassment 34 | * Publishing others' private information, such as a physical or email 35 | address, without their explicit permission 36 | * Other conduct which could reasonably be considered inappropriate in a 37 | professional setting 38 | 39 | ## Enforcement Responsibilities 40 | 41 | Community leaders are responsible for clarifying and enforcing our standards of 42 | acceptable behavior and will take appropriate and fair corrective action in 43 | response to any behavior that they deem inappropriate, threatening, offensive, 44 | or harmful. 45 | 46 | Community leaders have the right and responsibility to remove, edit, or reject 47 | comments, commits, code, wiki edits, issues, and other contributions that are 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation 49 | decisions when appropriate. 50 | 51 | ## Scope 52 | 53 | This Code of Conduct applies within all community spaces, and also applies when 54 | an individual is officially representing the community in public spaces. 55 | Examples of representing our community include using an official e-mail address, 56 | posting via an official social media account, or acting as an appointed 57 | representative at an online or offline event. 58 | 59 | ## Enforcement 60 | 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 62 | reported to the community leaders responsible for enforcement at 63 | data-morph-coc@stefaniemolin.com. 64 | All complaints will be reviewed and investigated promptly and fairly. 65 | 66 | All community leaders are obligated to respect the privacy and security of the 67 | reporter of any incident. 68 | 69 | ## Enforcement Guidelines 70 | 71 | Community leaders will follow these Community Impact Guidelines in determining 72 | the consequences for any action they deem in violation of this Code of Conduct: 73 | 74 | ### 1. Correction 75 | 76 | **Community Impact**: Use of inappropriate language or other behavior deemed 77 | unprofessional or unwelcome in the community. 78 | 79 | **Consequence**: A private, written warning from community leaders, providing 80 | clarity around the nature of the violation and an explanation of why the 81 | behavior was inappropriate. A public apology may be requested. 82 | 83 | ### 2. Warning 84 | 85 | **Community Impact**: A violation through a single incident or series 86 | of actions. 87 | 88 | **Consequence**: A warning with consequences for continued behavior. No 89 | interaction with the people involved, including unsolicited interaction with 90 | those enforcing the Code of Conduct, for a specified period of time. This 91 | includes avoiding interactions in community spaces as well as external channels 92 | like social media. Violating these terms may lead to a temporary or 93 | permanent ban. 94 | 95 | ### 3. Temporary Ban 96 | 97 | **Community Impact**: A serious violation of community standards, including 98 | sustained inappropriate behavior. 99 | 100 | **Consequence**: A temporary ban from any sort of interaction or public 101 | communication with the community for a specified period of time. No public or 102 | private interaction with the people involved, including unsolicited interaction 103 | with those enforcing the Code of Conduct, is allowed during this period. 104 | Violating these terms may lead to a permanent ban. 105 | 106 | ### 4. Permanent Ban 107 | 108 | **Community Impact**: Demonstrating a pattern of violation of community 109 | standards, including sustained inappropriate behavior, harassment of an 110 | individual, or aggression toward or disparagement of classes of individuals. 111 | 112 | **Consequence**: A permanent ban from any sort of public interaction within 113 | the community. 114 | 115 | ## Attribution 116 | 117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 118 | version 2.0, available at 119 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. 120 | 121 | Community Impact Guidelines were inspired by [Mozilla's code of conduct 122 | enforcement ladder](https://github.com/mozilla/diversity). 123 | 124 | [homepage]: https://www.contributor-covenant.org 125 | 126 | For answers to common questions about this code of conduct, see the FAQ at 127 | https://www.contributor-covenant.org/faq. Translations are available at 128 | https://www.contributor-covenant.org/translations. 129 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | build-backend = "setuptools.build_meta" 3 | 4 | requires = [ 5 | "setuptools==76.1.0", 6 | "setuptools-scm", 7 | ] 8 | 9 | [project] 10 | name = "data-morph-ai" 11 | description = "Teaching tool on the importance of data visualization." 12 | readme = "README.md" 13 | keywords = [ 14 | "data animation", 15 | "data visualization", 16 | "summary statistics", 17 | ] 18 | license = { file = "LICENSE" } 19 | maintainers = [ 20 | { name = "Stefanie Molin" }, 21 | ] 22 | authors = [ 23 | { name = "Stefanie Molin", email = "data-morph@stefaniemolin.com" }, 24 | { name = "Aaron Stevens", email = "bheklilr2@gmail.com" }, 25 | { name = "Justin Matejka", email = "Justin.Matejka@Autodesk.com" }, 26 | ] 27 | requires-python = ">=3.10" 28 | classifiers = [ 29 | "Development Status :: 4 - Beta", 30 | "Framework :: Matplotlib", 31 | "Intended Audience :: Education", 32 | "Operating System :: OS Independent", 33 | "Programming Language :: Python :: 3 :: Only", 34 | "Programming Language :: Python :: 3.10", 35 | "Programming Language :: Python :: 3.11", 36 | "Programming Language :: Python :: 3.12", 37 | "Programming Language :: Python :: 3.13", 38 | "Programming Language :: Python :: 3.14", 39 | "Topic :: Scientific/Engineering :: Artificial Intelligence", 40 | "Topic :: Scientific/Engineering :: Visualization", 41 | ] 42 | dynamic = [ 43 | "version", 44 | ] 45 | 46 | dependencies = [ 47 | "matplotlib>=3.10", 48 | "numpy>=1.23.0", 49 | "pandas>=2.1", 50 | "rich>=13.9.4", 51 | ] 52 | 53 | urls."Bug Tracker" = "https://github.com/stefmolin/data-morph/issues" 54 | urls."Documentation" = "https://stefaniemolin.com/data-morph/stable/api.html" 55 | urls."Homepage" = "https://stefaniemolin.com/data-morph" 56 | urls."Source" = "https://github.com/stefmolin/data-morph" 57 | scripts.data-morph = "data_morph.cli:main" 58 | 59 | [dependency-groups] 60 | dev = [ "pre-commit", { include-group = "docs" }, { include-group = "test" } ] 61 | 62 | test = [ 63 | "pytest>=8.3.4", 64 | "pytest-cov>=4.1.0", 65 | "pytest-mock>=3.10.0", 66 | "pytest-randomly>=3.12.0", 67 | "pytest-xdist>=3.6.1", 68 | ] 69 | 70 | docs = [ 71 | "pydata-sphinx-theme>=0.15.3", 72 | "sphinx>=7.2.6", 73 | "sphinx-argparse-cli>=1.16.0", 74 | "sphinx-copybutton>=0.5.1", 75 | "sphinx-design>=0.6.1", 76 | ] 77 | 78 | [tool.setuptools.packages.find] 79 | where = [ 80 | "src", 81 | ] 82 | 83 | [tool.setuptools.package-data] 84 | "data_morph.data.starter_shapes" = [ 85 | "*", 86 | ] 87 | "data_morph.plotting.config" = [ 88 | "*", 89 | ] 90 | 91 | [tool.setuptools.dynamic] 92 | version = { attr = "data_morph.__version__" } 93 | 94 | [tool.ruff] 95 | line-length = 88 96 | format.indent-style = "space" 97 | format.quote-style = "single" 98 | format.docstring-code-format = true 99 | lint.select = [ 100 | "ANN", # flake8-annotations 101 | "B", # flake8-bugbear 102 | "C4", # flake8-comprehensions 103 | "E", # pycodestyle error 104 | "ERA", # eradicate (commented out code) 105 | "F", # pyflakes 106 | "FA", # flake8-future-annotations 107 | "I", # isort 108 | "ICN", # flake8-import-conventions 109 | "N", # pep8-naming 110 | "NPY", # numpy 111 | "PD", # pandas-vet 112 | "PERF", # perflint 113 | "PT", # flake8-pytest-style 114 | "PTH", # flake8-use-pathlib 115 | "RET", # flake8-return 116 | "RSE", # flake8-raise 117 | "RUF", # ruff-specific rules 118 | "SIM", # flake8-simplify 119 | "TC", # flake8-type-checking (performance improvements) 120 | "TRY", # tryceratops 121 | "UP", # pyupgrade 122 | "W", # pycodestyle warning 123 | ] 124 | lint.ignore = [ 125 | "E501", # line-too-long 126 | "TRY003", # avoid specifying long messages outside the exception class (revisit later and consider making custom exceptions) 127 | ] 128 | 129 | lint.extend-per-file-ignores."docs/*" = [ 130 | "ANN", 131 | "ERA", 132 | ] # docs build (allow commented code and don't require annotations) 133 | 134 | lint.extend-per-file-ignores."tests/*" = [ "ANN" ] # don't require annotations for tests 135 | lint.isort.known-first-party = [ 136 | "data_morph", 137 | ] 138 | 139 | [tool.codespell] 140 | ignore-words-list = "recuse" 141 | ignore-regex = 'https://([\w/\.])+' 142 | 143 | [tool.pytest.ini_options] 144 | addopts = [ 145 | "-ra", 146 | "-l", 147 | "-v", 148 | "-n=auto", # use as many workers as possible with pytest-xdist 149 | "--tb=short", 150 | "--import-mode=importlib", 151 | "--strict-markers", 152 | "--strict-config", 153 | "--cov=data_morph", 154 | "--cov=tests", 155 | "--no-cov-on-fail", 156 | "--cov-report=term-missing", 157 | ] 158 | xfail_strict = true 159 | testpaths = [ 160 | "tests", 161 | ] 162 | markers = [ 163 | "bounds: Run tests on bounds logic.", 164 | "circles: Run tests on circular shapes.", 165 | "cli: Run tests involving the CLI.", 166 | "dataset: Run tests on Dataset logic.", 167 | "input_validation: Run tests on input validation.", 168 | "lines: Run tests on LineCollection and related shapes.", 169 | "morpher: Run tests on the morphing process,", 170 | "plotting: Run tests on plotting logic.", 171 | "points: Run tests on PointCollection and related shapes.", 172 | "polygons: Run tests on polygon shapes.", 173 | "shapes: Run tests related to shapes.", 174 | ] 175 | 176 | [tool.numpydoc_validation] 177 | checks = [ 178 | "all", # report on all checks 179 | "ES01", # but don't require an extended summary 180 | "EX01", # or examples 181 | "SA01", # or a see also section 182 | "SS06", # and don't require the summary to fit on one line 183 | ] 184 | exclude = [ # don't report on checks for these 185 | '\.__init__$', 186 | '\.__iter__$', 187 | '\.__repr__$', 188 | '\.__str__$', 189 | ] 190 | override_SS05 = [ # allow docstrings to start with these words 191 | '^Unambiguous ', 192 | ] 193 | --------------------------------------------------------------------------------