├── tests
├── __init__.py
├── unit
│ ├── __init__.py
│ ├── plugins
│ │ ├── __init__.py
│ │ ├── framework_specific
│ │ │ └── __init__.py
│ │ ├── expected
│ │ │ ├── script_complex_h
│ │ │ │ └── sample_output
│ │ │ │ │ ├── f.pkl
│ │ │ │ │ └── h.pkl
│ │ │ ├── script_pipeline_a0_b0_dependencies
│ │ │ │ └── sample_output
│ │ │ │ │ ├── a0.pkl
│ │ │ │ │ └── b0.pkl
│ │ │ └── script_pipeline_housing_w_dependencies
│ │ │ │ └── sample_output
│ │ │ │ ├── y.pkl
│ │ │ │ └── p_value.pkl
│ │ ├── test_task.py
│ │ └── test_utils.py
│ ├── graph_reader
│ │ ├── inputs
│ │ │ ├── simple_twovar
│ │ │ ├── simple
│ │ │ ├── mutate_after_save
│ │ │ ├── extract_common
│ │ │ ├── module_import
│ │ │ ├── module_import_alias
│ │ │ ├── linear
│ │ │ ├── complex
│ │ │ ├── module_import_from
│ │ │ └── housing
│ │ └── test_artifact_get_code.py
│ ├── system_tracing
│ │ └── test_op_stack.py
│ ├── transformer
│ │ ├── test_transform_code.py
│ │ └── test_source_giver.py
│ ├── cli
│ │ └── test_cli.py
│ ├── utils
│ │ └── test_config.py
│ ├── db
│ │ ├── test_db_utils.py
│ │ └── test_literal_node.py
│ └── migration
│ │ └── test_migrations.py
├── integration
│ ├── __init__.py
│ ├── .gitignore
│ ├── sources
│ │ └── matplotlib-tutorial
│ │ │ ├── figures
│ │ │ └── .gitkeep
│ │ │ └── scripts
│ │ │ ├── alpha.py
│ │ │ ├── aliased.py
│ │ │ ├── dash_joinstyle.py
│ │ │ └── exercice_3.py
│ ├── __snapshots__
│ │ └── test_slice
│ │ │ ├── test_slice[pandas_timeseries].py
│ │ │ ├── test_slice[pandas_deleting].py
│ │ │ ├── test_slice[matplotlib_aliased].py
│ │ │ ├── test_slice[pandas_apply].py
│ │ │ ├── test_slice[sklearn_semi_supervised_plot_label_propagation_structure].py
│ │ │ ├── test_slice[matplotlib_exercise_3].py
│ │ │ ├── test_slice[sklearn_multioutput_plot_classifier_chain_yeast].py
│ │ │ ├── test_slice[pandas_stats].py
│ │ │ ├── test_slice[sklearn_model_selection_plot_randomized_search].py
│ │ │ ├── test_slice[pandas_merge].py
│ │ │ ├── test_slice[sklearn_tree_plot_cost_complexity_pruning].py
│ │ │ ├── test_slice[sklearn_preprocessing_plot_scaling_importance].py
│ │ │ ├── test_slice[matplotlib_dash_joinstyle].py
│ │ │ ├── test_slice[sklearn_compose_plot_feature_union].py
│ │ │ └── test_slice[pytorch_intro_torchscript].py
│ └── slices
│ │ ├── pandas_timeseries.py
│ │ ├── matplotlib_alpha.py
│ │ ├── matplotlib_aliased.py
│ │ ├── pandas_apply.py
│ │ ├── pandas_deleting.py
│ │ ├── matplotlib_exercise_3.py
│ │ ├── sklearn_semi_supervised_plot_label_propagation_structure.py
│ │ ├── pandas_stats.py
│ │ ├── sklearn_multioutput_plot_classifier_chain_yeast.py
│ │ ├── sklearn_model_selection_plot_randomized_search.py
│ │ ├── xgboost_sklearn_examples.py
│ │ ├── pandas_merge.py
│ │ ├── sklearn_tree_plot_cost_complexity_pruning.py
│ │ ├── sklearn_preprocessing_plot_scaling_importance.py
│ │ ├── matplotlib_dash_joinstyle.py
│ │ ├── sklearn_compose_plot_feature_union.py
│ │ └── pytorch_vision_tensor_transform.py
├── notebook
│ ├── .gitignore
│ ├── pyproject.toml
│ └── test_is_executing.ipynb
├── outputs
│ ├── generated
│ │ └── .keep
│ └── expected
│ │ ├── sliced_housing_simple_requirements.txt
│ │ ├── sliced_housing_multiple_requirements.txt
│ │ ├── sliced_housing_multiple_w_dependencies_requirements.txt
│ │ ├── sliced_housing_simple_script_dag.py
│ │ ├── sliced_housing_multiple_script_dag.py
│ │ ├── sliced_housing_multiple_w_dependencies_script_dag.py
│ │ ├── sliced_housing_simple_Dockerfile
│ │ ├── sliced_housing_multiple_Dockerfile
│ │ ├── sliced_housing_multiple_w_dependencies_Dockerfile
│ │ ├── sliced_housing_simple.py
│ │ ├── sliced_housing_simple_dag.py
│ │ ├── sliced_housing_multiple_dag.py
│ │ ├── sliced_housing_multiple_w_dependencies_dag.py
│ │ ├── sliced_housing_multiple.py
│ │ └── sliced_housing_multiple_w_dependencies.py
├── end_to_end
│ ├── import_data
│ │ ├── __init__.py
│ │ └── utils
│ │ │ ├── __init__.py
│ │ │ ├── __will_not_import.py
│ │ │ ├── __error_on_load.py
│ │ │ ├── __no_imported_submodule_prime.py
│ │ │ └── __no_imported_submodule.py
│ ├── import_with_name_conflict
│ │ ├── data.py
│ │ └── __init__.py
│ ├── __snapshots__
│ │ ├── test_misc
│ │ │ ├── TestEndToEnd.test_messy_nodes.1.py
│ │ │ ├── TestEndToEnd.test_messy_nodes_slice.py
│ │ │ ├── TestEndToEnd.test_housing.py
│ │ │ └── TestEndToEnd.test_simple.py
│ │ ├── test_literal
│ │ │ └── test_ellipsis.py
│ │ ├── test_list_comp
│ │ │ └── test_returns_value.py
│ │ ├── test_op
│ │ │ ├── test_sub.py
│ │ │ ├── test_invert.py
│ │ │ └── test_not.py
│ │ ├── test_var_aliasing
│ │ │ ├── test_variable_alias.py
│ │ │ └── test_alias_by_value.py
│ │ ├── test_lambda
│ │ │ └── test_lambda_with_primitives.py
│ │ └── test_assign_destruc
│ │ │ └── test_variable_alias_nested.py
│ ├── test_literal.py
│ ├── test_list_comp.py
│ ├── test_decorator.py
│ ├── test_dictionary.py
│ ├── test_lists.py
│ ├── test_delete.py
│ ├── test_dask.py
│ ├── test_classdef.py
│ ├── test_blackbox_tracing.py
│ ├── test_list_slice.py
│ ├── test_set.py
│ └── test_stack_trace.py
├── simple_data.csv
├── README.md
├── simple.py
├── pyproject.toml
├── tools
│ └── print_ast.py
├── housing.py
├── __snapshots__
│ └── test_ipython
│ │ ├── test_to_airflow[no_config-module].py
│ │ └── test_to_airflow[with_config-module].py
├── test_globals_dict.py
└── test_api.py
├── lineapy
├── api
│ ├── __init__.py
│ └── models
│ │ └── __init__.py
├── cli
│ └── __init__.py
├── db
│ └── __init__.py
├── _alembic
│ ├── __init__.py
│ ├── versions
│ │ ├── __init__.py
│ │ └── 41a413504720_add_named_var.py
│ ├── README
│ └── script.py.mako
├── data
│ └── __init__.py
├── editors
│ └── __init__.py
├── plugins
│ ├── __init__.py
│ ├── serializers
│ │ └── __init__.py
│ ├── jinja_templates
│ │ ├── task
│ │ │ ├── cwdpickle
│ │ │ │ ├── task_ser.jinja
│ │ │ │ └── task_deser.jinja
│ │ │ ├── parameterizedpickle
│ │ │ │ ├── task_ser.jinja
│ │ │ │ └── task_deser.jinja
│ │ │ ├── tmpdirpickle
│ │ │ │ ├── task_deser.jinja
│ │ │ │ ├── task_teardown.jinja
│ │ │ │ ├── task_setup.jinja
│ │ │ │ └── task_ser.jinja
│ │ │ └── task_function.jinja
│ │ ├── dvc
│ │ │ ├── dvc_dag_SingleStageAllSessions.jinja
│ │ │ ├── dvc_dag_params.jinja
│ │ │ ├── dvc_dag_PythonOperator.jinja
│ │ │ ├── dvc_dockerfile.jinja
│ │ │ └── dvc_dag_StagePerArtifact.jinja
│ │ ├── module
│ │ │ ├── session_function.jinja
│ │ │ └── module.jinja
│ │ ├── script_dockerfile.jinja
│ │ ├── ray
│ │ │ ├── ray_dag_remote.jinja
│ │ │ ├── ray_dag_workflow.jinja
│ │ │ ├── ray_dockerfile.jinja
│ │ │ └── ray_dag_base.jinja
│ │ ├── airflow
│ │ │ ├── airflow_dockerfile.jinja
│ │ │ └── airflow_dag_PythonOperator.jinja
│ │ ├── argo
│ │ │ └── argo_dockerfile.jinja
│ │ └── kubeflow
│ │ │ └── kubeflow_dockerfile.jinja
│ ├── loader.py
│ └── pipeline_writer_factory.py
├── utils
│ ├── __init__.py
│ ├── analytics
│ │ ├── __init__.py
│ │ └── utils.py
│ ├── __error_on_load.py
│ ├── __no_imported_submodule_prime.py
│ ├── version.py
│ ├── __no_imported_submodule.py
│ ├── validate_annotation_spec.py
│ └── migration.py
├── exceptions
│ ├── __init__.py
│ ├── l_import_error.py
│ ├── db_exceptions.py
│ ├── flag.py
│ └── create_frame.py
├── execution
│ └── __init__.py
├── graph_reader
│ ├── __init__.py
│ └── types.py
├── instrumentation
│ └── __init__.py
├── transformer
│ ├── __init__.py
│ ├── transformer_util.py
│ ├── py38_transformer.py
│ ├── source_giver.py
│ └── py37_transformer.py
├── __main__.py
├── annotations
│ ├── external
│ │ ├── joblib.annotations.yaml
│ │ ├── numpy.annotations.yaml
│ │ ├── tensorflow.annotations.yaml
│ │ ├── opencv.annotations.yaml
│ │ ├── prophet.annotations.yaml
│ │ ├── statsforecast.annotations.yaml
│ │ ├── pillow.annotations.yaml
│ │ ├── torch.annotations.yaml
│ │ ├── boto3.annotations.yaml
│ │ ├── keras.annotations.yaml
│ │ ├── sklearn.annotations.yaml
│ │ └── gym.annotations.yaml
│ └── internal
│ │ ├── pickle.annotations.yaml
│ │ ├── tempfile.annotations.yaml
│ │ ├── io.annotations.yaml
│ │ └── operator.annotations.yaml
├── system_tracing
│ ├── function_call.py
│ ├── _object_side_effect.py
│ ├── __init__.py
│ └── exec_and_record_function_calls.py
└── visualizer
│ ├── README.md
│ └── optimize_svg.py
├── docs
├── .gitignore
├── mkdocs
│ ├── images
│ │ ├── .gitkeep
│ │ ├── sample_graph.png
│ │ ├── example_graph.png
│ │ ├── function_components.png
│ │ ├── icon-lineapy-white.png
│ │ ├── graph_reader_classes.png
│ │ ├── lineapy-square-light.png
│ │ └── pipeline-example-diagram.png
│ ├── tutorials
│ │ ├── .gitkeep
│ │ └── README.md
│ ├── reference
│ │ └── lineapy
│ │ │ ├── index.md
│ │ │ ├── api
│ │ │ ├── index.md
│ │ │ ├── api.md
│ │ │ ├── api_utils.md
│ │ │ ├── models
│ │ │ │ ├── index.md
│ │ │ │ ├── pipeline.md
│ │ │ │ ├── linea_artifact.md
│ │ │ │ └── linea_artifact_store.md
│ │ │ └── artifact_serializer.md
│ │ │ ├── cli
│ │ │ ├── index.md
│ │ │ └── cli.md
│ │ │ ├── db
│ │ │ ├── db.md
│ │ │ ├── index.md
│ │ │ ├── utils.md
│ │ │ └── relational.md
│ │ │ ├── data
│ │ │ ├── index.md
│ │ │ ├── graph.md
│ │ │ └── types.md
│ │ │ ├── utils
│ │ │ ├── index.md
│ │ │ ├── utils.md
│ │ │ ├── config.md
│ │ │ ├── version.md
│ │ │ ├── benchmarks.md
│ │ │ ├── constants.md
│ │ │ ├── migration.md
│ │ │ ├── analytics
│ │ │ │ ├── index.md
│ │ │ │ ├── utils.md
│ │ │ │ ├── event_schemas.md
│ │ │ │ └── usage_tracking.md
│ │ │ ├── lineabuiltins.md
│ │ │ ├── tree_logger.md
│ │ │ ├── __error_on_load.md
│ │ │ ├── logging_config.md
│ │ │ ├── deprecation_utils.md
│ │ │ ├── __no_imported_submodule.md
│ │ │ ├── validate_annotation_spec.md
│ │ │ └── __no_imported_submodule_prime.md
│ │ │ ├── editors
│ │ │ ├── index.md
│ │ │ ├── ipython.md
│ │ │ └── ipython_cell_storage.md
│ │ │ ├── plugins
│ │ │ ├── index.md
│ │ │ ├── loader.md
│ │ │ ├── task.md
│ │ │ ├── utils.md
│ │ │ ├── taskgen.md
│ │ │ ├── serializers
│ │ │ │ ├── index.md
│ │ │ │ └── mlflow_io.md
│ │ │ ├── session_writers.md
│ │ │ ├── argo_pipeline_writer.md
│ │ │ ├── base_pipeline_writer.md
│ │ │ ├── dvc_pipeline_writer.md
│ │ │ ├── ray_pipeline_writer.md
│ │ │ ├── airflow_pipeline_writer.md
│ │ │ ├── pipeline_writer_factory.md
│ │ │ └── kubeflow_pipeline_writer.md
│ │ │ ├── exceptions
│ │ │ ├── index.md
│ │ │ ├── flag.md
│ │ │ ├── excepthook.md
│ │ │ ├── create_frame.md
│ │ │ ├── db_exceptions.md
│ │ │ ├── l_import_error.md
│ │ │ └── user_exception.md
│ │ │ ├── execution
│ │ │ ├── index.md
│ │ │ ├── context.md
│ │ │ ├── executor.md
│ │ │ ├── globals_dict.md
│ │ │ ├── side_effects.md
│ │ │ └── inspect_function.md
│ │ │ ├── transformer
│ │ │ ├── index.md
│ │ │ ├── source_giver.md
│ │ │ ├── transform_code.md
│ │ │ ├── base_transformer.md
│ │ │ ├── node_transformer.md
│ │ │ ├── py37_transformer.md
│ │ │ ├── py38_transformer.md
│ │ │ ├── transformer_util.md
│ │ │ └── conditional_transformer.md
│ │ │ ├── visualizer
│ │ │ ├── index.md
│ │ │ ├── graphviz.md
│ │ │ ├── optimize_svg.md
│ │ │ └── visual_graph.md
│ │ │ ├── graph_reader
│ │ │ ├── index.md
│ │ │ ├── types.md
│ │ │ ├── utils.md
│ │ │ ├── graph_printer.md
│ │ │ ├── program_slice.md
│ │ │ ├── node_collection.md
│ │ │ ├── session_artifacts.md
│ │ │ └── artifact_collection.md
│ │ │ ├── instrumentation
│ │ │ ├── index.md
│ │ │ ├── tracer.md
│ │ │ ├── annotation_spec.md
│ │ │ ├── tracer_context.md
│ │ │ ├── mutation_tracker.md
│ │ │ └── control_flow_tracker.md
│ │ │ └── system_tracing
│ │ │ ├── index.md
│ │ │ ├── _op_stack.md
│ │ │ ├── _trace_func.md
│ │ │ ├── function_call.md
│ │ │ ├── _object_side_effect.md
│ │ │ ├── exec_and_record_function_calls.md
│ │ │ ├── function_calls_to_side_effects.md
│ │ │ ├── _object_side_effects_to_side_effects.md
│ │ │ └── _function_calls_to_object_side_effects.md
│ ├── guides
│ │ ├── contributing
│ │ │ ├── areas
│ │ │ │ └── add-test.md
│ │ │ └── tips.md
│ │ └── support.md
│ └── concepts
│ │ ├── artifact.md
│ │ ├── pipeline.md
│ │ └── artifact-store.md
├── requirements.txt
├── overrides
│ └── main.html
└── gen_ref_pages.py
├── examples
├── .gitignore
├── self-hosting-lineapy
│ ├── airflow
│ │ ├── requirements.txt
│ │ ├── webserver_config.py
│ │ ├── airflow-start.sh
│ │ ├── airflow.cfg
│ │ └── Dockerfile
│ ├── lineapy-notebook
│ │ ├── requirements.txt
│ │ ├── verify_environment.py
│ │ ├── lineapy_config.json
│ │ ├── notebook-start.sh
│ │ └── Dockerfile
│ ├── .env
│ └── .gitignore
├── use_cases
│ ├── creating_reusable_components
│ │ └── .gitignore
│ ├── discover_and_trace_past_work
│ │ ├── README.md
│ │ └── artifact_store.zip
│ └── README.md
├── .gitattributes
├── tutorials
│ └── README.md
└── README.md
├── test_pipeline_dvc_req.txt
├── test_pipeline_airflow_req.txt
├── .colab
├── creating_reusable_components
│ └── .gitignore
├── discover_and_trace_past_work
│ ├── README.md
│ └── artifact_store.zip
└── README.md
├── jupyterlab-workspaces
├── .gitignore
├── README.md
└── default-37a8.jupyterlab-workspace
├── test_pipeline_ray_req.txt
├── .dockerignore
├── ports.png
├── MANIFEST.in
├── .gitattributes
├── Dockerfile-airflow
├── .github
├── ISSUE_TEMPLATE
│ ├── eng_item.md
│ ├── feature_request.md
│ └── bug_report.md
├── pull_request_template.md
└── workflows
│ └── publish.yml
├── HISTORY.md
├── airflow_webserver_config.py
├── CONTRIBUTING.md
├── .devcontainer
└── start.sh
├── .cspell
└── custom-dictionary-workspace.txt
├── conftest.py
├── .flake8
├── requirements.txt
├── Dockerfile
├── PERFORMANCE.md
├── docker-compose.yml
├── pyproject.toml
└── .pre-commit-config.yaml
/tests/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/lineapy/api/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/lineapy/cli/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/lineapy/db/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tests/unit/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/docs/.gitignore:
--------------------------------------------------------------------------------
1 | site/
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/images/.gitkeep:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/lineapy/_alembic/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/lineapy/data/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/lineapy/editors/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/lineapy/plugins/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/lineapy/utils/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/docs/mkdocs/tutorials/.gitkeep:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/lineapy/api/models/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/lineapy/exceptions/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/lineapy/execution/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/lineapy/graph_reader/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tests/integration/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tests/notebook/.gitignore:
--------------------------------------------------------------------------------
1 | dag.py
--------------------------------------------------------------------------------
/tests/outputs/generated/.keep:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tests/unit/plugins/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/examples/.gitignore:
--------------------------------------------------------------------------------
1 | outputs
2 | deem
--------------------------------------------------------------------------------
/lineapy/_alembic/versions/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/lineapy/instrumentation/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/lineapy/utils/analytics/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/test_pipeline_dvc_req.txt:
--------------------------------------------------------------------------------
1 | dvc==2.38.1
--------------------------------------------------------------------------------
/tests/integration/.gitignore:
--------------------------------------------------------------------------------
1 | envs
2 |
--------------------------------------------------------------------------------
/lineapy/plugins/serializers/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tests/end_to_end/import_data/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tests/end_to_end/import_data/utils/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/test_pipeline_airflow_req.txt:
--------------------------------------------------------------------------------
1 | apache-airflow==2.2.4
--------------------------------------------------------------------------------
/tests/unit/plugins/framework_specific/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/.colab/creating_reusable_components/.gitignore:
--------------------------------------------------------------------------------
1 | !data/*.csv
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/index.md:
--------------------------------------------------------------------------------
1 | ::: lineapy
2 |
--------------------------------------------------------------------------------
/examples/self-hosting-lineapy/airflow/requirements.txt:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/jupyterlab-workspaces/.gitignore:
--------------------------------------------------------------------------------
1 | *.jupyterlab-workspace
--------------------------------------------------------------------------------
/test_pipeline_ray_req.txt:
--------------------------------------------------------------------------------
1 | ray==2.2.0
2 | ray[data]
3 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/api/index.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.api
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/cli/index.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.cli
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/db/db.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.db.db
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/db/index.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.db
2 |
--------------------------------------------------------------------------------
/examples/self-hosting-lineapy/lineapy-notebook/requirements.txt:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tests/integration/sources/matplotlib-tutorial/figures/.gitkeep:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/api/api.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.api.api
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/cli/cli.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.cli.cli
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/data/index.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.data
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/db/utils.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.db.utils
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/utils/index.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.utils
2 |
--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | tests/integration
3 | .git
4 | *housing.py
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/data/graph.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.data.graph
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/data/types.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.data.types
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/editors/index.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.editors
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/plugins/index.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.plugins
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/utils/utils.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.utils.utils
2 |
--------------------------------------------------------------------------------
/examples/use_cases/creating_reusable_components/.gitignore:
--------------------------------------------------------------------------------
1 | !data/*.csv
--------------------------------------------------------------------------------
/ports.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LineaLabs/lineapy/HEAD/ports.png
--------------------------------------------------------------------------------
/tests/end_to_end/import_data/utils/__will_not_import.py:
--------------------------------------------------------------------------------
1 | some_var = 1
2 |
--------------------------------------------------------------------------------
/tests/simple_data.csv:
--------------------------------------------------------------------------------
1 | a,b
2 | 1,2
3 | 3,4
4 | 5,6
5 | 7,8
6 | 9,10
7 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/api/api_utils.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.api.api_utils
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/api/models/index.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.api.models
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/db/relational.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.db.relational
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/exceptions/index.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.exceptions
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/execution/index.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.execution
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/plugins/loader.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.plugins.loader
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/plugins/task.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.plugins.task
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/plugins/utils.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.plugins.utils
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/transformer/index.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.transformer
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/utils/config.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.utils.config
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/utils/version.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.utils.version
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/visualizer/index.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.visualizer
2 |
--------------------------------------------------------------------------------
/lineapy/transformer/__init__.py:
--------------------------------------------------------------------------------
1 | # TODO: copy from `lineapy_experimental`
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/editors/ipython.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.editors.ipython
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/exceptions/flag.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.exceptions.flag
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/graph_reader/index.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.graph_reader
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/plugins/taskgen.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.plugins.taskgen
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/utils/benchmarks.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.utils.benchmarks
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/utils/constants.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.utils.constants
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/utils/migration.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.utils.migration
2 |
--------------------------------------------------------------------------------
/examples/self-hosting-lineapy/airflow/webserver_config.py:
--------------------------------------------------------------------------------
1 | AUTH_ROLE_PUBLIC = "Admin"
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/api/models/pipeline.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.api.models.pipeline
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/execution/context.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.execution.context
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/execution/executor.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.execution.executor
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/graph_reader/types.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.graph_reader.types
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/graph_reader/utils.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.graph_reader.utils
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/instrumentation/index.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.instrumentation
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/system_tracing/index.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.system_tracing
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/utils/analytics/index.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.utils.analytics
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/utils/lineabuiltins.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.utils.lineabuiltins
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/utils/tree_logger.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.utils.tree_logger
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/visualizer/graphviz.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.visualizer.graphviz
2 |
--------------------------------------------------------------------------------
/lineapy/exceptions/l_import_error.py:
--------------------------------------------------------------------------------
1 | class LImportError(Exception):
2 | pass
3 |
--------------------------------------------------------------------------------
/tests/end_to_end/import_with_name_conflict/data.py:
--------------------------------------------------------------------------------
1 | import_with_name_conflict = 1
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/exceptions/excepthook.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.exceptions.excepthook
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/utils/__error_on_load.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.utils.__error_on_load
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/utils/analytics/utils.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.utils.analytics.utils
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/utils/logging_config.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.utils.logging_config
2 |
--------------------------------------------------------------------------------
/examples/.gitattributes:
--------------------------------------------------------------------------------
1 | examples/data/diabetes.csv filter=lfs diff=lfs merge=lfs -text
2 |
--------------------------------------------------------------------------------
/lineapy/exceptions/db_exceptions.py:
--------------------------------------------------------------------------------
1 | class ArtifactSaveException(Exception):
2 | pass
3 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/api/artifact_serializer.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.api.artifact_serializer
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/exceptions/create_frame.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.exceptions.create_frame
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/exceptions/db_exceptions.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.exceptions.db_exceptions
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/execution/globals_dict.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.execution.globals_dict
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/execution/side_effects.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.execution.side_effects
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/instrumentation/tracer.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.instrumentation.tracer
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/plugins/serializers/index.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.plugins.serializers
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/plugins/session_writers.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.plugins.session_writers
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/system_tracing/_op_stack.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.system_tracing._op_stack
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/transformer/source_giver.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.transformer.source_giver
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/utils/deprecation_utils.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.utils.deprecation_utils
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/visualizer/optimize_svg.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.visualizer.optimize_svg
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/visualizer/visual_graph.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.visualizer.visual_graph
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/api/models/linea_artifact.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.api.models.linea_artifact
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/exceptions/l_import_error.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.exceptions.l_import_error
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/exceptions/user_exception.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.exceptions.user_exception
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/execution/inspect_function.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.execution.inspect_function
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/graph_reader/graph_printer.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.graph_reader.graph_printer
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/graph_reader/program_slice.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.graph_reader.program_slice
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/system_tracing/_trace_func.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.system_tracing._trace_func
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/transformer/transform_code.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.transformer.transform_code
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/editors/ipython_cell_storage.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.editors.ipython_cell_storage
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/graph_reader/node_collection.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.graph_reader.node_collection
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/plugins/argo_pipeline_writer.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.plugins.argo_pipeline_writer
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/plugins/base_pipeline_writer.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.plugins.base_pipeline_writer
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/plugins/dvc_pipeline_writer.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.plugins.dvc_pipeline_writer
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/plugins/ray_pipeline_writer.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.plugins.ray_pipeline_writer
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/plugins/serializers/mlflow_io.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.plugins.serializers.mlflow_io
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/system_tracing/function_call.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.system_tracing.function_call
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/transformer/base_transformer.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.transformer.base_transformer
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/transformer/node_transformer.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.transformer.node_transformer
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/transformer/py37_transformer.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.transformer.py37_transformer
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/transformer/py38_transformer.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.transformer.py38_transformer
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/transformer/transformer_util.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.transformer.transformer_util
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/utils/__no_imported_submodule.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.utils.__no_imported_submodule
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/utils/analytics/event_schemas.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.utils.analytics.event_schemas
2 |
--------------------------------------------------------------------------------
/examples/self-hosting-lineapy/.env:
--------------------------------------------------------------------------------
1 | AIRFLOW_PORT=8080
2 | MINIO_CONSOLE_PORT=9001
3 | JUPYTER_PORT=8888
4 |
--------------------------------------------------------------------------------
/examples/self-hosting-lineapy/.gitignore:
--------------------------------------------------------------------------------
1 | # Jupyter Notebook
2 | .ipynb_checkpoints
3 | !examples/data/*.csv
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/api/models/linea_artifact_store.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.api.models.linea_artifact_store
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/graph_reader/session_artifacts.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.graph_reader.session_artifacts
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/instrumentation/annotation_spec.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.instrumentation.annotation_spec
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/instrumentation/tracer_context.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.instrumentation.tracer_context
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/plugins/airflow_pipeline_writer.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.plugins.airflow_pipeline_writer
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/plugins/pipeline_writer_factory.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.plugins.pipeline_writer_factory
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/utils/analytics/usage_tracking.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.utils.analytics.usage_tracking
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/utils/validate_annotation_spec.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.utils.validate_annotation_spec
2 |
--------------------------------------------------------------------------------
/lineapy/__main__.py:
--------------------------------------------------------------------------------
1 | from lineapy.cli.cli import python
2 |
3 | if __name__ == "__main__":
4 | python()
5 |
--------------------------------------------------------------------------------
/lineapy/exceptions/flag.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | REWRITE_EXCEPTIONS = "LINEA_NO_EXCEPTIONS" not in os.environ
4 |
--------------------------------------------------------------------------------
/tests/README.md:
--------------------------------------------------------------------------------
1 | # lineapy Tests
2 |
3 | Please review the test section in [Contributing](/CONTRIBUTING.md).
4 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/graph_reader/artifact_collection.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.graph_reader.artifact_collection
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/instrumentation/mutation_tracker.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.instrumentation.mutation_tracker
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/plugins/kubeflow_pipeline_writer.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.plugins.kubeflow_pipeline_writer
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/system_tracing/_object_side_effect.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.system_tracing._object_side_effect
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/images/sample_graph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LineaLabs/lineapy/HEAD/docs/mkdocs/images/sample_graph.png
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/instrumentation/control_flow_tracker.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.instrumentation.control_flow_tracker
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/transformer/conditional_transformer.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.transformer.conditional_transformer
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/utils/__no_imported_submodule_prime.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.utils.__no_imported_submodule_prime
2 |
--------------------------------------------------------------------------------
/lineapy/utils/__error_on_load.py:
--------------------------------------------------------------------------------
1 | """
2 | Module that raises an error on loading, for testing
3 | """
4 |
5 | 1 / 0
6 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include lineapy *.jinja
2 | recursive-include lineapy *.annotations.yaml
3 | include lineapy/alembic.ini
--------------------------------------------------------------------------------
/docs/mkdocs/images/example_graph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LineaLabs/lineapy/HEAD/docs/mkdocs/images/example_graph.png
--------------------------------------------------------------------------------
/examples/self-hosting-lineapy/airflow/airflow-start.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | pip install -r /requirements.txt
3 | airflow standalone
--------------------------------------------------------------------------------
/tests/end_to_end/__snapshots__/test_misc/TestEndToEnd.test_messy_nodes.1.py:
--------------------------------------------------------------------------------
1 | a = 1
2 | b = a + 2
3 | c = 2
4 | f = a * b * c
5 |
--------------------------------------------------------------------------------
/docs/mkdocs/guides/contributing/areas/add-test.md:
--------------------------------------------------------------------------------
1 | # Adding tests
2 |
3 | [COMING SOON]
4 | [//]: # (TODO: LIN-742)
5 |
6 |
7 |
--------------------------------------------------------------------------------
/tests/end_to_end/__snapshots__/test_misc/TestEndToEnd.test_messy_nodes_slice.py:
--------------------------------------------------------------------------------
1 | a = 1
2 | b = a + 2
3 | c = 2
4 | f = a * b * c
5 |
--------------------------------------------------------------------------------
/docs/mkdocs/images/function_components.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LineaLabs/lineapy/HEAD/docs/mkdocs/images/function_components.png
--------------------------------------------------------------------------------
/docs/mkdocs/images/icon-lineapy-white.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LineaLabs/lineapy/HEAD/docs/mkdocs/images/icon-lineapy-white.png
--------------------------------------------------------------------------------
/lineapy/plugins/jinja_templates/task/cwdpickle/task_ser.jinja:
--------------------------------------------------------------------------------
1 | pickle.dump({{return_variable}}, open('{{return_variable}}.pickle','wb'))
--------------------------------------------------------------------------------
/docs/mkdocs/images/graph_reader_classes.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LineaLabs/lineapy/HEAD/docs/mkdocs/images/graph_reader_classes.png
--------------------------------------------------------------------------------
/docs/mkdocs/images/lineapy-square-light.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LineaLabs/lineapy/HEAD/docs/mkdocs/images/lineapy-square-light.png
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/system_tracing/exec_and_record_function_calls.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.system_tracing.exec_and_record_function_calls
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/system_tracing/function_calls_to_side_effects.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.system_tracing.function_calls_to_side_effects
2 |
--------------------------------------------------------------------------------
/lineapy/plugins/jinja_templates/dvc/dvc_dag_SingleStageAllSessions.jinja:
--------------------------------------------------------------------------------
1 | stages:
2 | run_all_sessions:
3 | cmd: {{MODULE_COMMAND}}
4 |
--------------------------------------------------------------------------------
/tests/end_to_end/import_data/utils/__error_on_load.py:
--------------------------------------------------------------------------------
1 | """
2 | Module that raises an error on loading, for testing
3 | """
4 |
5 | 1 / 0
6 |
--------------------------------------------------------------------------------
/docs/mkdocs/images/pipeline-example-diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LineaLabs/lineapy/HEAD/docs/mkdocs/images/pipeline-example-diagram.png
--------------------------------------------------------------------------------
/tests/simple.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 |
3 | assets = pd.read_csv("ames_train_cleaned.csv")
4 | assets["is_new"] = assets["Year_Built"] > 1970
5 |
--------------------------------------------------------------------------------
/.colab/discover_and_trace_past_work/README.md:
--------------------------------------------------------------------------------
1 | When you need to recreate the artifact store (both db and pickle files), use the `demo_setup.ipynb`.
2 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/system_tracing/_object_side_effects_to_side_effects.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.system_tracing._object_side_effects_to_side_effects
2 |
--------------------------------------------------------------------------------
/lineapy/plugins/jinja_templates/task/cwdpickle/task_deser.jinja:
--------------------------------------------------------------------------------
1 | {{loaded_input_variable}} = pickle.load(open('{{loaded_input_variable}}.pickle','rb'))
--------------------------------------------------------------------------------
/lineapy/plugins/jinja_templates/task/parameterizedpickle/task_ser.jinja:
--------------------------------------------------------------------------------
1 | pickle.dump({{return_variable}}, open(variable_{{return_variable}}_path,'wb'))
--------------------------------------------------------------------------------
/tests/end_to_end/import_with_name_conflict/__init__.py:
--------------------------------------------------------------------------------
1 | from .data import import_with_name_conflict
2 |
3 | __all__ = ["import_with_name_conflict"]
4 |
--------------------------------------------------------------------------------
/tests/outputs/expected/sliced_housing_simple_requirements.txt:
--------------------------------------------------------------------------------
1 | seaborn==0.11.2
2 | pandas==1.3.5
3 | altair==4.2.0
4 | lineapy
5 | scikit-learn==1.0.2
6 |
--------------------------------------------------------------------------------
/docs/mkdocs/reference/lineapy/system_tracing/_function_calls_to_object_side_effects.md:
--------------------------------------------------------------------------------
1 | ::: lineapy.system_tracing._function_calls_to_object_side_effects
2 |
--------------------------------------------------------------------------------
/tests/outputs/expected/sliced_housing_multiple_requirements.txt:
--------------------------------------------------------------------------------
1 | seaborn==0.11.2
2 | lineapy
3 | pandas==1.3.5
4 | scikit-learn==1.0.2
5 | altair==4.2.0
6 |
--------------------------------------------------------------------------------
/.colab/discover_and_trace_past_work/artifact_store.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LineaLabs/lineapy/HEAD/.colab/discover_and_trace_past_work/artifact_store.zip
--------------------------------------------------------------------------------
/examples/use_cases/discover_and_trace_past_work/README.md:
--------------------------------------------------------------------------------
1 | When you need to recreate the artifact store (both db and pickle files), use the `demo_setup.ipynb`.
2 |
--------------------------------------------------------------------------------
/tests/unit/graph_reader/inputs/simple_twovar:
--------------------------------------------------------------------------------
1 | import lineapy
2 |
3 | art = dict()
4 | p = "p"
5 | n = 5
6 | pn = p * n
7 | art["pn"] = lineapy.save(pn, "pn")
8 |
--------------------------------------------------------------------------------
/lineapy/plugins/jinja_templates/task/parameterizedpickle/task_deser.jinja:
--------------------------------------------------------------------------------
1 | {{loaded_input_variable}} = pickle.load(open(variable_{{loaded_input_variable}}_path,'rb'))
--------------------------------------------------------------------------------
/tests/end_to_end/test_literal.py:
--------------------------------------------------------------------------------
1 | def test_ellipsis(execute):
2 | code = """x = ...
3 | """
4 | res = execute(code)
5 | assert res.values["x"] == ...
6 |
--------------------------------------------------------------------------------
/tests/outputs/expected/sliced_housing_multiple_w_dependencies_requirements.txt:
--------------------------------------------------------------------------------
1 | scikit-learn==1.0.2
2 | pandas==1.3.5
3 | lineapy
4 | altair==4.2.0
5 | seaborn==0.11.2
6 |
--------------------------------------------------------------------------------
/examples/self-hosting-lineapy/airflow/airflow.cfg:
--------------------------------------------------------------------------------
1 | [webserver]
2 | expose_config: True
3 |
4 | [scheduler]
5 | min_file_process_interval: 10
6 | dag_dir_list_interval: 10
7 |
--------------------------------------------------------------------------------
/tests/unit/graph_reader/inputs/simple:
--------------------------------------------------------------------------------
1 | import lineapy
2 |
3 | art = {}
4 | b0 = 0
5 | art["b0"] = lineapy.save(b0, "b0")
6 | a = b0 + 1
7 | art["a"] = lineapy.save(a, "a")
8 |
--------------------------------------------------------------------------------
/lineapy/plugins/jinja_templates/dvc/dvc_dag_params.jinja:
--------------------------------------------------------------------------------
1 | {% for var_name, var_value in input_parameters_dict.items() -%}
2 | {{var_name}}: {{var_value}}
3 | {% endfor -%}
4 |
5 |
--------------------------------------------------------------------------------
/tests/unit/plugins/expected/script_complex_h/sample_output/f.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LineaLabs/lineapy/HEAD/tests/unit/plugins/expected/script_complex_h/sample_output/f.pkl
--------------------------------------------------------------------------------
/tests/unit/plugins/expected/script_complex_h/sample_output/h.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LineaLabs/lineapy/HEAD/tests/unit/plugins/expected/script_complex_h/sample_output/h.pkl
--------------------------------------------------------------------------------
/examples/use_cases/discover_and_trace_past_work/artifact_store.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LineaLabs/lineapy/HEAD/examples/use_cases/discover_and_trace_past_work/artifact_store.zip
--------------------------------------------------------------------------------
/lineapy/plugins/jinja_templates/task/tmpdirpickle/task_deser.jinja:
--------------------------------------------------------------------------------
1 | {{loaded_input_variable}} = pickle.load(open('/tmp/{{pipeline_name}}/variable_{{loaded_input_variable}}.pickle','rb'))
--------------------------------------------------------------------------------
/tests/outputs/expected/sliced_housing_simple_script_dag.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import sliced_housing_simple
4 |
5 | if __name__ == "__main__":
6 |
7 | sliced_housing_simple.p_value()
8 |
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | *.sqlite filter=lfs diff=lfs merge=lfs -text
2 | examples/data/mushroom.csv filter=lfs diff=lfs merge=lfs -text
3 | examples/data/diabetes.csv filter=lfs diff=lfs merge=lfs -text
4 |
--------------------------------------------------------------------------------
/lineapy/_alembic/README:
--------------------------------------------------------------------------------
1 | This directory contains database migration scripts, which can be found in the `versions` directory.
2 | For more information, see `https://alembic.sqlalchemy.org/en/latest/`.
--------------------------------------------------------------------------------
/lineapy/plugins/jinja_templates/task/tmpdirpickle/task_teardown.jinja:
--------------------------------------------------------------------------------
1 | pickle_files = pathlib.Path('/tmp').joinpath('{{pipeline_name}}').glob('*.pickle')
2 | for f in pickle_files:
3 | f.unlink()
--------------------------------------------------------------------------------
/lineapy/utils/__no_imported_submodule_prime.py:
--------------------------------------------------------------------------------
1 | """
2 | This file exists for testing to make sure we can differentiate between imports of different submodules
3 | """
4 |
5 | is_prime = True
6 |
--------------------------------------------------------------------------------
/lineapy/utils/version.py:
--------------------------------------------------------------------------------
1 | # This file contains the package version for Lineapy
2 | # all other references to the package version should read
3 | # from this file.
4 |
5 | __version__ = "0.2.3"
6 |
--------------------------------------------------------------------------------
/lineapy/plugins/jinja_templates/task/tmpdirpickle/task_setup.jinja:
--------------------------------------------------------------------------------
1 | pickle_folder = pathlib.Path('/tmp').joinpath('{{pipeline_name}}')
2 | if not pickle_folder.exists():
3 | pickle_folder.mkdir()
4 |
--------------------------------------------------------------------------------
/tests/unit/graph_reader/inputs/mutate_after_save:
--------------------------------------------------------------------------------
1 | import lineapy
2 |
3 | art = {}
4 | a = [1]
5 | art["a"] = lineapy.save(a, "a")
6 | a.append(2)
7 | b = a[-1] + 1
8 | art["b"] = lineapy.save(b, "b")
9 |
--------------------------------------------------------------------------------
/tests/unit/graph_reader/inputs/extract_common:
--------------------------------------------------------------------------------
1 | import lineapy
2 |
3 | art = {}
4 | a = 1
5 | a += 1
6 | b = a + 1
7 | art["b"] = lineapy.save(b, "b")
8 | c = a + 2
9 | art["c"] = lineapy.save(c, "c")
10 |
--------------------------------------------------------------------------------
/Dockerfile-airflow:
--------------------------------------------------------------------------------
1 | ARG IMAGE_NAME=ghcr.io/linealabs/lineapy:main
2 | FROM $IMAGE_NAME
3 |
4 | RUN pip install apache-airflow==2.2.0
5 | RUN airflow db init
6 |
7 | COPY . .
8 |
9 | CMD [ "airflow"]
10 |
--------------------------------------------------------------------------------
/tests/end_to_end/import_data/utils/__no_imported_submodule_prime.py:
--------------------------------------------------------------------------------
1 | """
2 | This file exists for testing to make sure we can differentiate between imports of different submodules
3 | """
4 |
5 | is_prime = True
6 |
--------------------------------------------------------------------------------
/tests/unit/plugins/expected/script_pipeline_a0_b0_dependencies/sample_output/a0.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LineaLabs/lineapy/HEAD/tests/unit/plugins/expected/script_pipeline_a0_b0_dependencies/sample_output/a0.pkl
--------------------------------------------------------------------------------
/tests/unit/plugins/expected/script_pipeline_a0_b0_dependencies/sample_output/b0.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LineaLabs/lineapy/HEAD/tests/unit/plugins/expected/script_pipeline_a0_b0_dependencies/sample_output/b0.pkl
--------------------------------------------------------------------------------
/lineapy/utils/__no_imported_submodule.py:
--------------------------------------------------------------------------------
1 | """
2 | This file exists to represent a module that was not loaded in the parent module, utils, so that we can
3 | test importing it with Linea.
4 | """
5 |
6 | is_prime = False
7 |
--------------------------------------------------------------------------------
/tests/unit/plugins/expected/script_pipeline_housing_w_dependencies/sample_output/y.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LineaLabs/lineapy/HEAD/tests/unit/plugins/expected/script_pipeline_housing_w_dependencies/sample_output/y.pkl
--------------------------------------------------------------------------------
/tests/unit/plugins/expected/script_pipeline_housing_w_dependencies/sample_output/p_value.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LineaLabs/lineapy/HEAD/tests/unit/plugins/expected/script_pipeline_housing_w_dependencies/sample_output/p_value.pkl
--------------------------------------------------------------------------------
/lineapy/annotations/external/joblib.annotations.yaml:
--------------------------------------------------------------------------------
1 | - module: joblib
2 | annotations:
3 | - criteria:
4 | function_name: dump
5 | side_effects:
6 | - mutated_value:
7 | external_state: file_system
8 |
--------------------------------------------------------------------------------
/tests/end_to_end/import_data/utils/__no_imported_submodule.py:
--------------------------------------------------------------------------------
1 | """
2 | This file exists to represent a module that was not loaded in the parent module, utils, so that we can
3 | test importing it with Linea.
4 | """
5 |
6 | is_prime = False
7 |
--------------------------------------------------------------------------------
/tests/outputs/expected/sliced_housing_multiple_script_dag.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import sliced_housing_multiple
4 |
5 | if __name__ == "__main__":
6 |
7 | sliced_housing_multiple.y()
8 |
9 | sliced_housing_multiple.p_value()
10 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/eng_item.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Eng item
3 | about: new engineering TODO for Linea devs
4 | title: ''
5 | labels: ''
6 | assignees: ''
7 |
8 | ---
9 |
10 | **What**:
11 |
12 | **Why**:
13 |
14 | **Possible Approaches**:
15 |
--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | mkdocs==1.4.2
2 | mkdocs-material==8.5.11
3 | mkdocstrings==0.19.1
4 | mkdocstrings-python==0.8.2
5 | mkdocs-jupyter==0.22.0
6 | mkdocs-gen-files==0.4.0
7 | mkdocs-literate-nav==0.6.0
8 | mkdocs-section-index==0.3.4
9 | mike==1.1.2
10 |
--------------------------------------------------------------------------------
/tests/pyproject.toml:
--------------------------------------------------------------------------------
1 | # Add coverage file to this directory, so that tests run in a subprocess
2 | # in this directory will find it and also use branch data.
3 | # Otherwise this will break when coverage tries to combine files.
4 | [tool.coverage.run]
5 | branch = true
--------------------------------------------------------------------------------
/HISTORY.md:
--------------------------------------------------------------------------------
1 | ## 0.1.5
2 |
3 | * DB schema has been updated as per https://github.com/LineaLabs/lineapy/pull/702. For compatibility, users are asked to delete and recreate `.lineapy` folder. This crude resolution shall be replaced by a more systematic DB migration process.
4 |
--------------------------------------------------------------------------------
/airflow_webserver_config.py:
--------------------------------------------------------------------------------
1 | # turn off auth https://airflow.apache.org/docs/apache-airflow/stable/security/webserver.html#web-authentication
2 | AUTH_ROLE_PUBLIC = "Admin"
3 | # Turn off CSRF so we can submit froms from another URL on codespaces
4 | WTF_CSRF_ENABLED = False
5 |
--------------------------------------------------------------------------------
/tests/notebook/pyproject.toml:
--------------------------------------------------------------------------------
1 | # Add coverage file to this directory, so that tests run in a subprocess
2 | # in this directory will find it and also use branch data.
3 | # Otherwise this will break when coverage tries to combine files.
4 | [tool.coverage.run]
5 | branch = true
--------------------------------------------------------------------------------
/lineapy/annotations/internal/pickle.annotations.yaml:
--------------------------------------------------------------------------------
1 | - module: pickle
2 | annotations:
3 | - criteria:
4 | function_name: dump # Note: `load` doesn't need annotation
5 | side_effects:
6 | - mutated_value:
7 | positional_argument_index: 1
8 |
--------------------------------------------------------------------------------
/tests/unit/graph_reader/inputs/module_import:
--------------------------------------------------------------------------------
1 | import pandas
2 |
3 | import lineapy
4 |
5 | art = {}
6 |
7 | df = pandas.DataFrame({"a": [1, 2]})
8 | art["df"] = lineapy.save(df, "df")
9 |
10 | df2 = pandas.concat([df, df])
11 | art["df2"] = lineapy.save(df2, "df2")
12 |
--------------------------------------------------------------------------------
/tests/unit/graph_reader/inputs/module_import_alias:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 |
3 | import lineapy
4 |
5 | art = {}
6 |
7 | df = pd.DataFrame({"a": [1, 2]})
8 | art["df"] = lineapy.save(df, "df")
9 |
10 | df2 = pd.concat([df, df])
11 | art["df2"] = lineapy.save(df2, "df2")
12 |
--------------------------------------------------------------------------------
/lineapy/annotations/external/numpy.annotations.yaml:
--------------------------------------------------------------------------------
1 | - module: numpy
2 | annotations:
3 | - criteria:
4 | function_names:
5 | - savetxt
6 | - savez
7 | side_effects:
8 | - mutated_value:
9 | external_state: file_system
10 |
--------------------------------------------------------------------------------
/lineapy/plugins/jinja_templates/task/tmpdirpickle/task_ser.jinja:
--------------------------------------------------------------------------------
1 | if not pathlib.Path('/tmp').joinpath('{{pipeline_name}}').exists(): pathlib.Path('/tmp').joinpath('{{pipeline_name}}').mkdir()
2 | pickle.dump({{return_variable}}, open('/tmp/{{pipeline_name}}/variable_{{return_variable}}.pickle','wb'))
--------------------------------------------------------------------------------
/tests/outputs/expected/sliced_housing_multiple_w_dependencies_script_dag.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import sliced_housing_multiple_w_dependencies
4 |
5 | if __name__ == "__main__":
6 |
7 | sliced_housing_multiple_w_dependencies.p_value()
8 |
9 | sliced_housing_multiple_w_dependencies.y()
10 |
--------------------------------------------------------------------------------
/tests/unit/graph_reader/inputs/linear:
--------------------------------------------------------------------------------
1 | import lineapy
2 |
3 | linear_first = 1
4 | linear_second = linear_first + 1
5 | linear_third = linear_second + linear_first
6 | lineapy.save(linear_first, "linear_first")
7 | lineapy.save(linear_second, "linear_second")
8 | lineapy.save(linear_third, "linear_third")
9 |
--------------------------------------------------------------------------------
/lineapy/annotations/external/tensorflow.annotations.yaml:
--------------------------------------------------------------------------------
1 | - module: tensorflow.keras.utils
2 | annotations:
3 | - criteria:
4 | function_name: get_file
5 | side_effects:
6 | - dependency:
7 | external_state: file_system
8 | - mutated_value:
9 | external_state: file_system
10 |
--------------------------------------------------------------------------------
/tests/unit/system_tracing/test_op_stack.py:
--------------------------------------------------------------------------------
1 | import inspect
2 |
3 | import pytest
4 |
5 | from lineapy.system_tracing._op_stack import OpStack
6 |
7 |
8 | def test_stack_access():
9 | f = inspect.currentframe()
10 | assert f
11 | op_stack = OpStack(f)
12 | with pytest.raises(IndexError):
13 | op_stack[-1000]
14 |
--------------------------------------------------------------------------------
/jupyterlab-workspaces/README.md:
--------------------------------------------------------------------------------
1 | Create a directory for jupyterlab workspaces, so we can use a default one in the repo.
2 |
3 | I am not sure what's up with the magic string `37a8` in the name. If I change it,
4 | JupyterLab won't recognize the default.
5 |
6 | Anytime you run jupyterlab, it will re-save this file. Before committing, reformat
7 | it.
8 |
--------------------------------------------------------------------------------
/lineapy/annotations/internal/tempfile.annotations.yaml:
--------------------------------------------------------------------------------
1 | - module: tempfile
2 | annotations:
3 | - criteria:
4 | function_name: TemporaryFile
5 | side_effects:
6 | - mutated_value:
7 | external_state: file_system
8 | - views:
9 | - result: RESULT
10 | - external_state: file_system
11 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | Thank you for your interest in contributing to LineaPy! We believe that it is the community that makes an open source project truly great and successful, so we welcome contribution from any new members.
2 |
3 | Please check out the project [documentation](https://docs.lineapy.org/latest/guides/contributing/process/) to learn how you can contribute!
4 |
--------------------------------------------------------------------------------
/lineapy/annotations/external/opencv.annotations.yaml:
--------------------------------------------------------------------------------
1 | - module: cv2
2 | annotations:
3 | - criteria:
4 | class_instance: Algorithm
5 | class_method_name: train
6 | side_effects:
7 | - mutated_value:
8 | self_ref: SELF_REF
9 | - views:
10 | - self_ref: SELF_REF
11 | - result: RESULT
--------------------------------------------------------------------------------
/lineapy/annotations/external/prophet.annotations.yaml:
--------------------------------------------------------------------------------
1 | - module: prophet
2 | annotations:
3 | - criteria:
4 | class_instance: Prophet
5 | class_method_name: fit
6 | side_effects:
7 | - mutated_value:
8 | self_ref: SELF_REF
9 | - views:
10 | - self_ref: SELF_REF
11 | - result: RESULT
--------------------------------------------------------------------------------
/lineapy/transformer/transformer_util.py:
--------------------------------------------------------------------------------
1 | import ast
2 | from typing import Dict, List
3 |
4 | """
5 | AST synthesizers used by node_transformers
6 | """
7 |
8 |
9 | def create_lib_attributes(names: List[ast.alias]) -> Dict[str, str]:
10 | return {
11 | alias.asname if alias.asname else alias.name: alias.name
12 | for alias in names
13 | }
14 |
--------------------------------------------------------------------------------
/lineapy/annotations/external/statsforecast.annotations.yaml:
--------------------------------------------------------------------------------
1 | - module: statsforecast.models
2 | annotations:
3 | - criteria:
4 | class_instance: _TS
5 | class_method_name: fit
6 | side_effects:
7 | - mutated_value:
8 | self_ref: SELF_REF
9 | - views:
10 | - self_ref: SELF_REF
11 | - result: RESULT
--------------------------------------------------------------------------------
/docs/mkdocs/guides/support.md:
--------------------------------------------------------------------------------
1 | # More Help
2 |
3 | ## Community
4 |
5 | The quickest way to get support for your unresolved issue is to join our [community on Slack](https://join.slack.com/t/lineacommunity/shared_invite/zt-18kizfn3b-1Qu_HDT3ahGudnAwoFAw9Q).
6 | You can post your issue on the ``#support`` channel, and it will be answered promptly. We are always happy and ready to help you!
7 |
--------------------------------------------------------------------------------
/tests/unit/plugins/test_task.py:
--------------------------------------------------------------------------------
1 | from lineapy.plugins.task import TaskGraph
2 |
3 |
4 | def test_task_graph():
5 | g = TaskGraph(
6 | ["a", "b", "c"],
7 | {"c": {"a", "b"}},
8 | )
9 | g = g.remap_nodes({"a": "a_p", "b": "b_p", "c": "c_p"})
10 | expected_orders = [["a_p", "b_p", "c_p"], ["b_p", "a_p", "c_p"]]
11 | assert g.get_taskorder() in expected_orders
12 |
--------------------------------------------------------------------------------
/examples/self-hosting-lineapy/lineapy-notebook/verify_environment.py:
--------------------------------------------------------------------------------
1 | import lineapy
2 |
3 | assert(lineapy.options.get('database_url') == 'postgresql://lineapy:lineapypassword@postgres-lineapy:5432/lineapy_artifact_store')
4 | assert(lineapy.options.get('artifact_storage_dir') == 's3://lineapy-artifact-store')
5 | assert(lineapy.options.get('storage_options') is not None)
6 |
7 | print("Lineapy configuration verified.")
--------------------------------------------------------------------------------
/tests/integration/__snapshots__/test_slice/test_slice[pandas_timeseries].py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 |
3 | url = "https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/09_Time_Series/Apple_Stock/appl_1980_2014.csv"
4 | apple = pd.read_csv(url)
5 | apple.Date = pd.to_datetime(apple.Date)
6 | apple = apple.set_index("Date")
7 | apple_months = apple.resample("BM").mean()
8 | linea_artifact_value = apple_months
9 |
--------------------------------------------------------------------------------
/.devcontainer/start.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Modified from https://docs.docker.com/config/containers/multi-service_container/
3 |
4 | # Install lineapy in develop mode
5 | python setup.py develop
6 |
7 | # turn on bash's job control
8 | set -m
9 |
10 | # Start the first process
11 | make jupyterlab_start &> /tmp/jupyterlab_log &
12 |
13 | # Start the second process
14 | make airflow_home airflow_start &> /tmp/airflow_log &
15 |
--------------------------------------------------------------------------------
/lineapy/annotations/external/pillow.annotations.yaml:
--------------------------------------------------------------------------------
1 | - module: PIL.Image
2 | annotations:
3 | - criteria:
4 | class_instance: Image
5 | class_method_name: save
6 | side_effects:
7 | - mutated_value:
8 | external_state: file_system
9 | - criteria:
10 | function_name: open
11 | side_effects:
12 | - dependency:
13 | external_state: file_system
14 |
--------------------------------------------------------------------------------
/tests/integration/__snapshots__/test_slice/test_slice[pandas_deleting].py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 |
3 | url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
4 | iris = pd.read_csv(url)
5 | iris.columns = ["sepal_length", "sepal_width", "petal_length", "petal_width", "class"]
6 | del iris["class"]
7 | iris = iris.dropna(how="any")
8 | iris = iris.reset_index(drop=True)
9 | linea_artifact_value = iris
10 |
--------------------------------------------------------------------------------
/lineapy/plugins/jinja_templates/module/session_function.jinja:
--------------------------------------------------------------------------------
1 | def {{session_function_name}}({{session_input_parameters_body}}):
2 | # Given multiple artifacts, we need to save each right after
3 | # its calculation to protect from any irrelevant downstream
4 | # mutations (e.g., inside other artifact calculations)
5 | import copy
6 | {{return_dict_name}} = dict()
7 | {{session_function_body | indent(4, True) }}
8 | return {{return_dict_name}}
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Feature request
3 | about: Suggest a new feature for Linea
4 | title: ''
5 | labels: ''
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 |
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 |
--------------------------------------------------------------------------------
/tests/integration/__snapshots__/test_slice/test_slice[matplotlib_aliased].py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 |
3 | size = 128, 16
4 | dpi = 72.0
5 | figsize = size[0] / float(dpi), size[1] / float(dpi)
6 | fig = plt.figure(figsize=figsize, dpi=dpi)
7 | plt.axes([0, 0, 1, 1], frameon=False)
8 | plt.text(0.5, 0.5, "Aliased", ha="center", va="center")
9 | plt.xlim(0, 1), plt.ylim(0, 1)
10 | plt.xticks([]), plt.yticks([])
11 | plt.savefig("../figures/aliased.png", dpi=dpi)
12 |
--------------------------------------------------------------------------------
/lineapy/annotations/external/torch.annotations.yaml:
--------------------------------------------------------------------------------
1 | - module: torch
2 | annotations:
3 | - criteria:
4 | function_name: manual_seed
5 | side_effects:
6 | - mutated_value:
7 | self_ref: SELF_REF
8 | - module: torch.jit._script
9 | annotations:
10 | - criteria:
11 | class_method_name: save
12 | class_instance: ScriptModule
13 | side_effects:
14 | - mutated_value:
15 | external_state: file_system
16 |
--------------------------------------------------------------------------------
/lineapy/plugins/jinja_templates/dvc/dvc_dag_PythonOperator.jinja:
--------------------------------------------------------------------------------
1 | {% if task_parameters|length>0 -%}
2 | import dvc.api
3 | {% endif %}
4 | import {{ MODULE_NAME }}
5 | import pickle
6 |
7 | {{ TASK_CODE }}
8 |
9 | if __name__ == "__main__":
10 | {% for param in task_parameters -%}
11 | {{param}} = dvc.api.params_show()["{{param}}"]
12 | {% endfor -%}
13 | task_{{ task_name }}({% for param in task_parameters -%}{{param}}{{ ", " if not loop.last else "" }}{% endfor %})
14 |
--------------------------------------------------------------------------------
/tests/outputs/expected/sliced_housing_simple_Dockerfile:
--------------------------------------------------------------------------------
1 | FROM apache/airflow:latest-python{python_version}
2 |
3 | RUN mkdir /tmp/installers
4 | WORKDIR /tmp/installers
5 |
6 | # copy all the requirements to run the current dag
7 | COPY ./sliced_housing_simple_requirements.txt ./
8 | # install the required libs
9 | RUN pip install -r ./sliced_housing_simple_requirements.txt
10 |
11 | WORKDIR /opt/airflow/dags
12 | COPY . .
13 |
14 | WORKDIR /opt/airflow
15 |
16 | CMD [ "standalone" ]
--------------------------------------------------------------------------------
/tests/outputs/expected/sliced_housing_multiple_Dockerfile:
--------------------------------------------------------------------------------
1 | FROM apache/airflow:latest-python{python_version}
2 |
3 | RUN mkdir /tmp/installers
4 | WORKDIR /tmp/installers
5 |
6 | # copy all the requirements to run the current dag
7 | COPY ./sliced_housing_multiple_requirements.txt ./
8 | # install the required libs
9 | RUN pip install -r ./sliced_housing_multiple_requirements.txt
10 |
11 | WORKDIR /opt/airflow/dags
12 | COPY . .
13 |
14 | WORKDIR /opt/airflow
15 |
16 | CMD [ "standalone" ]
--------------------------------------------------------------------------------
/lineapy/plugins/jinja_templates/script_dockerfile.jinja:
--------------------------------------------------------------------------------
1 | FROM python:{{ python_version }}
2 |
3 | RUN mkdir /tmp/installers
4 | WORKDIR /tmp/installers
5 |
6 | # Copy all the requirements to run current DAG
7 | COPY ./{{ pipeline_name }}_requirements.txt ./
8 |
9 | # Install required libs
10 | RUN pip install -r ./{{ pipeline_name }}_requirements.txt
11 |
12 | WORKDIR /home
13 | COPY ./{{ pipeline_name }}_module.py ./
14 |
15 | ENTRYPOINT [ "python", "/home/{{ pipeline_name }}_module.py" ]
16 |
--------------------------------------------------------------------------------
/lineapy/system_tracing/function_call.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from dataclasses import dataclass, field
4 | from typing import Any, Callable, Dict, List
5 |
6 |
7 | @dataclass
8 | class FunctionCall:
9 | """
10 | A record of a function call that happened in the tracer.
11 | """
12 |
13 | fn: Callable
14 | args: List[Any] = field(default_factory=list)
15 | kwargs: Dict[str, Any] = field(default_factory=dict)
16 | res: Any = field(default=None)
17 |
--------------------------------------------------------------------------------
/lineapy/plugins/jinja_templates/ray/ray_dag_remote.jinja:
--------------------------------------------------------------------------------
1 | {% extends "ray/ray_dag_base.jinja" %}
2 |
3 | {% block bind_or_remote %}remote{% endblock %}
4 |
5 | {%- block ray_dag_execution %}
6 | # Execute actors to get remote objects
7 | # Make changes here to access any additional objects needed.
8 | {%- for task_name in sink_tasks %}
9 | ray.get([{%- for var in tasks[task_name].return_vars %}{{ var }}{{ ',' if not loop.last else '' }}{%- endfor %}])
10 | {%- endfor %}
11 | {% endblock %}
12 |
13 |
--------------------------------------------------------------------------------
/tests/integration/sources/matplotlib-tutorial/scripts/alpha.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 |
3 | size = 256,16
4 | dpi = 72.0
5 | figsize= size[0]/float(dpi),size[1]/float(dpi)
6 | fig = plt.figure(figsize=figsize, dpi=dpi)
7 | fig.patch.set_alpha(0)
8 | plt.axes([0,0.1,1,.8], frameon=False)
9 |
10 | for i in range(1,11):
11 | plt.axvline(i, linewidth=1, color='blue',alpha=.25+.75*i/10.)
12 |
13 | plt.xlim(0,11)
14 | plt.xticks([]), plt.yticks([])
15 | plt.savefig('../figures/alpha.png', dpi=dpi)
16 |
--------------------------------------------------------------------------------
/tests/end_to_end/__snapshots__/test_misc/TestEndToEnd.test_housing.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | from sklearn.ensemble import RandomForestClassifier
3 |
4 | assets = pd.read_csv("ames_train_cleaned.csv")
5 |
6 |
7 | def is_new(col):
8 | return col > 1970
9 |
10 |
11 | assets["is_new"] = is_new(assets["Year_Built"])
12 | clf = RandomForestClassifier(random_state=0)
13 | y = assets["is_new"]
14 | x = assets[["SalePrice", "Lot_Area", "Garage_Area"]]
15 | clf.fit(x, y)
16 | p = clf.predict([[100 * 1000, 10, 4]])
17 |
--------------------------------------------------------------------------------
/lineapy/plugins/jinja_templates/ray/ray_dag_workflow.jinja:
--------------------------------------------------------------------------------
1 | {% extends "ray/ray_dag_base.jinja" %}
2 |
3 | {% block bind_or_remote %}bind{% endblock %}
4 |
5 | {% block ray_dag_execution %}
6 | from packaging import version
7 | if version.parse(ray.__version__) < version.parse('2.0'):
8 | raise RuntimeError(f"Ray Workflows requires version >2.0 but {ray.__version__} was found")
9 | {%- for task_name in sink_tasks %}
10 | ray.workflow.run({{tasks[task_name].return_vars[0]}})
11 | {%- endfor %}
12 | {% endblock %}
13 |
14 |
15 |
--------------------------------------------------------------------------------
/.cspell/custom-dictionary-workspace.txt:
--------------------------------------------------------------------------------
1 | __getitem__
2 | # Custom Dictionary Words
3 | astpretty
4 | asttokens
5 | builtins
6 | chdir
7 | dataclass
8 | dataframe
9 | getattr
10 | graphviz
11 | ipython
12 | isinstance
13 | jupyterlab
14 | kwargs
15 | linea
16 | lineabuiltins
17 | lineapy
18 | listify
19 | maxdepth
20 | nbconvert
21 | nbformat
22 | nbsphinx
23 | nbval
24 | NBVAL_IGNORE_OUTPUT
25 | networkx
26 | orms
27 | psycopg2
28 | Pydantic
29 | scipy
30 | sklearn
31 | sqlalchemy
32 | templating
33 | toctree
34 | traceback
35 | xdist
36 |
--------------------------------------------------------------------------------
/tests/integration/sources/matplotlib-tutorial/scripts/aliased.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 |
3 | size = 128,16
4 | dpi = 72.0
5 | figsize= size[0]/float(dpi),size[1]/float(dpi)
6 | fig = plt.figure(figsize=figsize, dpi=dpi)
7 | fig.patch.set_alpha(0)
8 | plt.axes([0,0,1,1], frameon=False)
9 |
10 | plt.rcParams['text.antialiased'] = False
11 | plt.text(0.5,0.5,"Aliased",ha='center',va='center')
12 |
13 | plt.xlim(0,1),plt.ylim(0,1),
14 | plt.xticks([]),plt.yticks([])
15 |
16 | plt.savefig('../figures/aliased.png', dpi=dpi)
17 |
--------------------------------------------------------------------------------
/tests/outputs/expected/sliced_housing_multiple_w_dependencies_Dockerfile:
--------------------------------------------------------------------------------
1 | FROM apache/airflow:latest-python{python_version}
2 |
3 | RUN mkdir /tmp/installers
4 | WORKDIR /tmp/installers
5 |
6 | # copy all the requirements to run the current dag
7 | COPY ./sliced_housing_multiple_w_dependencies_requirements.txt ./
8 | # install the required libs
9 | RUN pip install -r ./sliced_housing_multiple_w_dependencies_requirements.txt
10 |
11 | WORKDIR /opt/airflow/dags
12 | COPY . .
13 |
14 | WORKDIR /opt/airflow
15 |
16 | CMD [ "standalone" ]
--------------------------------------------------------------------------------
/lineapy/plugins/jinja_templates/ray/ray_dockerfile.jinja:
--------------------------------------------------------------------------------
1 | FROM python:{{ python_version }}
2 |
3 | RUN mkdir /tmp/installers
4 | WORKDIR /tmp/installers
5 |
6 | # Copy all the requirements to run current DAG
7 | COPY ./{{ pipeline_name }}_requirements.txt ./
8 |
9 | # Install ray
10 | RUN apt update
11 | RUN pip install ray
12 |
13 | # Install required libs
14 | RUN pip install -r ./{{ pipeline_name }}_requirements.txt
15 |
16 | WORKDIR /home
17 | COPY ./{{ pipeline_name }}_module.py ./
18 | COPY ./{{ pipeline_name }}_dag.py ./
19 |
20 |
--------------------------------------------------------------------------------
/lineapy/annotations/external/boto3.annotations.yaml:
--------------------------------------------------------------------------------
1 | - module: boto3
2 | annotations:
3 | - criteria:
4 | function_names:
5 | - upload_file
6 | - upload_fileobj
7 | side_effects:
8 | - mutated_value:
9 | external_state: file_system
10 | - module: boto.s3.inject
11 | annotations:
12 | - criteria:
13 | function_names:
14 | - upload_file
15 | - upload_fileobj
16 | side_effects:
17 | - mutated_value:
18 | external_state: file_system
19 |
--------------------------------------------------------------------------------
/tests/integration/__snapshots__/test_slice/test_slice[pandas_apply].py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 |
3 | url = "https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/04_Apply/US_Crime_Rates/US_Crime_Rates_1960_2014.csv"
4 | crime = pd.read_csv(url)
5 | crime.Year = pd.to_datetime(crime.Year, format="%Y")
6 | crime = crime.set_index("Year", drop=True)
7 | del crime["Total"]
8 | crimes = crime.resample("10AS").sum()
9 | population = crime["Population"].resample("10AS").max()
10 | crimes["Population"] = population
11 | linea_artifact_value = crimes
12 |
--------------------------------------------------------------------------------
/tests/integration/__snapshots__/test_slice/test_slice[sklearn_semi_supervised_plot_label_propagation_structure].py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from sklearn.datasets import make_circles
3 | from sklearn.semi_supervised import LabelSpreading
4 |
5 | n_samples = 200
6 | X, y = make_circles(n_samples=n_samples, shuffle=False)
7 | outer, inner = 0, 1
8 | labels = np.full(n_samples, -1.0)
9 | labels[0] = outer
10 | labels[-1] = inner
11 | label_spread = LabelSpreading(kernel="knn", alpha=0.8)
12 | label_spread.fit(X, labels)
13 | linea_artifact_value = label_spread
14 |
--------------------------------------------------------------------------------
/tests/end_to_end/__snapshots__/test_literal/test_ellipsis.py:
--------------------------------------------------------------------------------
1 | import datetime
2 | from pathlib import *
3 | from lineapy.data.types import *
4 | from lineapy.utils.utils import get_new_id
5 |
6 | source_1 = SourceCode(
7 | code="""x = ...
8 | """,
9 | location=PosixPath("[source file path]"),
10 | )
11 | literal_1 = LiteralNode(
12 | source_location=SourceLocation(
13 | lineno=1,
14 | col_offset=4,
15 | end_lineno=1,
16 | end_col_offset=7,
17 | source_code=source_1.id,
18 | ),
19 | value=Ellipsis,
20 | )
21 |
--------------------------------------------------------------------------------
/lineapy/plugins/jinja_templates/airflow/airflow_dockerfile.jinja:
--------------------------------------------------------------------------------
1 | FROM apache/airflow:latest-python{{ python_version }}
2 |
3 | RUN mkdir /tmp/installers
4 | WORKDIR /tmp/installers
5 |
6 | # copy all the requirements to run the current dag
7 | COPY ./{{ pipeline_name }}_requirements.txt ./
8 | # install the required libs
9 | RUN pip install -r ./{{ pipeline_name }}_requirements.txt
10 |
11 | WORKDIR /opt/airflow/dags
12 | COPY ./{{ pipeline_name }}_module.py ./
13 | COPY ./{{ pipeline_name }}_dag.py ./
14 |
15 | WORKDIR /opt/airflow
16 |
17 | CMD [ "standalone" ]
18 |
--------------------------------------------------------------------------------
/tests/unit/graph_reader/inputs/complex:
--------------------------------------------------------------------------------
1 | import lineapy
2 |
3 | art = {}
4 | a0 = 0
5 | a0 += 1
6 | art["a0"] = lineapy.save(a0, "a0")
7 | a = 1
8 | art["a"] = lineapy.save(a, "a")
9 |
10 | a += 1
11 | b = a * 2 + a0
12 | c = b + 3
13 | d = a * 4
14 | e = d + 5
15 | e += 6
16 | art["c"] = lineapy.save(c, "c")
17 | art["e"] = lineapy.save(e, "e")
18 |
19 | f = c + 7
20 | art["f"] = lineapy.save(f, "f")
21 | a += 1
22 | g = c + e * 2
23 | art["g2"] = lineapy.save(g, "g2")
24 | h = a + g
25 | art["h"] = lineapy.save(h, "h")
26 | z = [1]
27 | z.append(h)
28 | art["z"] = lineapy.save(z, "z")
29 |
--------------------------------------------------------------------------------
/tests/integration/__snapshots__/test_slice/test_slice[matplotlib_exercise_3].py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | import numpy as np
3 |
4 | plt.figure(figsize=(8, 5), dpi=80)
5 | plt.subplot(111)
6 | X = np.linspace(-np.pi, np.pi, 256, endpoint=True)
7 | C, S = np.cos(X), np.sin(X)
8 | plt.plot(X, C, color="blue", linewidth=2.5, linestyle="-")
9 | plt.plot(X, S, color="red", linewidth=2.5, linestyle="-")
10 | plt.xlim(-4.0, 4.0)
11 | plt.xticks(np.linspace(-4, 4, 9, endpoint=True))
12 | plt.ylim(-1.0, 1.0)
13 | plt.yticks(np.linspace(-1, 1, 5, endpoint=True))
14 | linea_artifact_value = plt.gcf()
15 |
--------------------------------------------------------------------------------
/docs/mkdocs/guides/contributing/tips.md:
--------------------------------------------------------------------------------
1 | # Recommended Practices
2 |
3 | ## Organize each PR with relevant changes
4 |
5 | To maintain a linear/cleaner project history, the project was set up to apply “squashing” when merging a PR.
6 | That is, if a PR contains more than one commit, GitHub will combine them into a single commit where the summary
7 | equals the PR title (followed by the PR number) and the description consists of commit messages for all squashed
8 | commits (in date order). Hence, we ask you to organize each PR with related changes only so that it can represent
9 | a single unit of meaningful change.
10 |
--------------------------------------------------------------------------------
/conftest.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | from lineapy.cli.cli import setup_ipython_dir
4 |
5 |
6 | # Set the IPYTHONDIR globally when running any tests
7 | # This needs to be in the root directory, so that even notebooks
8 | # tested in `./examples` use this plugin
9 | def pytest_configure(config):
10 | setup_ipython_dir()
11 | os.environ["LINEAPY_DO_NOT_TRACK"] = "true"
12 | os.environ["AIRFLOW_HOME"] = "/tmp/airflow_home"
13 |
14 |
15 | def pytest_collectstart(collector):
16 | if collector.fspath and collector.fspath.ext == ".ipynb":
17 |
18 | collector.skip_compare += ("image/svg+xml", "text/html")
19 |
--------------------------------------------------------------------------------
/tests/outputs/expected/sliced_housing_simple.py:
--------------------------------------------------------------------------------
1 | import pickle
2 |
3 |
4 | def p_value():
5 | import pandas as pd
6 | from sklearn.ensemble import RandomForestClassifier
7 |
8 | assets = pd.read_csv("ames_train_cleaned.csv")
9 |
10 | def is_new(col):
11 | return col > 1970
12 |
13 | assets["is_new"] = is_new(assets["Year_Built"])
14 | clf = RandomForestClassifier(random_state=0)
15 | y = assets["is_new"]
16 | x = assets[["SalePrice", "Lot_Area", "Garage_Area"]]
17 | clf.fit(x, y)
18 | p = clf.predict([[100 * 1000, 10, 4]])
19 | pickle.dump(p, open("pickle-sample.pkl", "wb"))
20 |
--------------------------------------------------------------------------------
/tests/tools/print_ast.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | """
3 | Pretty prints the AST of some Python code you pass in from the CLI
4 | """
5 |
6 | import ast
7 | import dis
8 |
9 | import click
10 | from astpretty import pprint
11 |
12 |
13 | @click.command()
14 | @click.argument("code")
15 | def linea_cli(code):
16 |
17 | ast_ = ast.parse(code)
18 | print("*** AST ***")
19 | pprint(ast_)
20 | print("\n*** TRACER ***")
21 | # print(astor.to_source(NodeTransformer("dummy").visit(ast_)))
22 | print("\n*** Bytecoce ***")
23 | dis.dis(code)
24 |
25 |
26 | if __name__ == "__main__":
27 | linea_cli()
28 |
--------------------------------------------------------------------------------
/lineapy/plugins/jinja_templates/dvc/dvc_dockerfile.jinja:
--------------------------------------------------------------------------------
1 | FROM python:{{ python_version }}
2 |
3 | RUN mkdir /tmp/installers
4 | WORKDIR /tmp/installers
5 |
6 | # Copy all the requirements to run current DAG
7 | COPY ./{{ pipeline_name }}_requirements.txt ./
8 |
9 | # Install git and dvc
10 | RUN apt update
11 | RUN apt install -y git
12 | RUN pip install dvc
13 |
14 | # Install required libs
15 | RUN pip install -r ./{{ pipeline_name }}_requirements.txt
16 |
17 | WORKDIR /home
18 | COPY . .
19 |
20 | # Initialize workdir as a dvc repo
21 | RUN git init
22 | RUN dvc init
23 |
24 | ENTRYPOINT [ "dvc", "repro", "run_all_sessions"]
25 |
--------------------------------------------------------------------------------
/lineapy/visualizer/README.md:
--------------------------------------------------------------------------------
1 | # Visualizer
2 |
3 | We use `graphviz` to show the internal state of lineapy. We use the graphs
4 | to support demos and debugging/tests.
5 |
6 | The graph can be created two ways: (1) with the tracer, which will contain more
7 | rich run-time information, such as the variable names, and mutation nodes, and
8 | (2) without run time information, such as when we load the artifact from the database.
9 |
10 | There are four different ways to access the visualizer currently, with slightly
11 | different configurations (you can find the full list in `__init__.py`):
12 |
13 | - ipython
14 | - snapshots
15 | - cli
--------------------------------------------------------------------------------
/lineapy/_alembic/script.py.mako:
--------------------------------------------------------------------------------
1 | """${message}
2 |
3 | Revision ID: ${up_revision}
4 | Revises: ${down_revision | comma,n}
5 | Create Date: ${create_date}
6 |
7 | """
8 | from alembic import op
9 | import sqlalchemy as sa
10 | ${imports if imports else ""}
11 |
12 | # revision identifiers, used by Alembic.
13 | revision = ${repr(up_revision)}
14 | down_revision = ${repr(down_revision)}
15 | branch_labels = ${repr(branch_labels)}
16 | depends_on = ${repr(depends_on)}
17 |
18 |
19 | def upgrade() -> None:
20 | ${upgrades if upgrades else "pass"}
21 |
22 |
23 | def downgrade() -> None:
24 | ${downgrades if downgrades else "pass"}
25 |
--------------------------------------------------------------------------------
/tests/end_to_end/test_list_comp.py:
--------------------------------------------------------------------------------
1 | def test_returns_value(execute):
2 | res = execute("x = [i + 1 for i in range(3)]")
3 | assert res.values["x"] == [1, 2, 3]
4 |
5 |
6 | def test_depends_on_prev_value(execute):
7 | res = execute(
8 | "y = range(3)\nx = [i + 1 for i in y]",
9 | snapshot=False,
10 | artifacts=["x"],
11 | )
12 | # Verify that i isn't set in the local scope
13 | assert res.values["x"] == [1, 2, 3]
14 | assert res.values["y"] == range(3)
15 | assert "i" not in res.values
16 | sliced_code = res.slice("x")
17 | assert execute(sliced_code).values["x"] == [1, 2, 3]
18 |
--------------------------------------------------------------------------------
/docs/overrides/main.html:
--------------------------------------------------------------------------------
1 | {% extends "base.html" %}
2 |
3 | {% block announce %}
4 |
5 | Have questions? Join our
6 |
7 | {% include ".icons/fontawesome/brands/slack.svg" %}
8 |
9 | Slack community and ask away!
10 |
11 | {% endblock %}
12 |
13 | {% block outdated %}
14 | You are viewing an old version of the documentation.
15 |
16 | Click here to go to the latest version.
17 |
18 | {% endblock %}
--------------------------------------------------------------------------------
/lineapy/annotations/external/keras.annotations.yaml:
--------------------------------------------------------------------------------
1 | - module: keras.engine.training
2 | annotations:
3 | - criteria:
4 | class_method_name: compile
5 | class_instance: Model
6 | side_effects:
7 | - mutated_value:
8 | self_ref: SELF_REF
9 | - criteria:
10 | class_method_name: fit
11 | class_instance: Model
12 | side_effects:
13 | - mutated_value:
14 | self_ref: SELF_REF
15 | - criteria:
16 | class_method_name: save
17 | class_instance: Model
18 | side_effects:
19 | - mutated_value:
20 | external_state: file_system
21 |
--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
1 | # Description
2 |
3 | Please include a summary of the change and which issue is fixed. Please also include relevant motivation and context. List any dependencies that are required for this change.
4 |
5 | Fixes # (issue)
6 |
7 | ## Type of change
8 |
9 | Please delete options that are not relevant.
10 |
11 | - [ ] Bug fix (non-breaking change which fixes an issue)
12 | - [ ] New feature (non-breaking change which adds functionality)
13 | - [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected)
14 | - [ ] This change requires a documentation update
15 |
16 | # How Has This Been Tested?
17 |
--------------------------------------------------------------------------------
/tests/unit/transformer/test_transform_code.py:
--------------------------------------------------------------------------------
1 | from mock import MagicMock, patch
2 |
3 | from lineapy.transformer.transform_code import transform
4 |
5 |
6 | @patch(
7 | "lineapy.transformer.transform_code.NodeTransformer",
8 | )
9 | def test_transform_fn(nt_mock: MagicMock):
10 | """
11 | Test that the transform function calls the NodeTransformer
12 | """
13 | mocked_tracer = MagicMock()
14 | source_location = MagicMock()
15 | transform("x = 1", source_location, mocked_tracer)
16 | nt_mock.assert_called_once()
17 | mocked_tracer.db.commit.assert_called_once()
18 | # TODO - test that source giver is called only for 3.7 and below
19 |
--------------------------------------------------------------------------------
/.colab/README.md:
--------------------------------------------------------------------------------
1 | # LineaPy Tutorials
2 |
3 | ## `00_lineapy_quickstart`
4 |
5 | This tutorial gives you a quick tour of core functionalities of LineaPy. If you are new to LineaPy, start here!
6 |
7 | ## `01_using_artifacts`
8 |
9 | This tutorial uses simple examples to demonstrate how to work with LineaPy artifacts.
10 |
11 | ## `02_pipeline_building`
12 |
13 | Data science workflows revolve around building and refining pipelines, but it is often manual and time-consuming work. Having the complete development process stored in artifacts, LineaPy can automate pipeline building, accelerating transition from development to production. This tutorial demonstrates how this can be done.
14 |
--------------------------------------------------------------------------------
/lineapy/annotations/external/sklearn.annotations.yaml:
--------------------------------------------------------------------------------
1 | - module: sklearn.base
2 | annotations:
3 | - criteria:
4 | class_instance: BaseEstimator
5 | class_method_name: fit
6 | side_effects:
7 | - mutated_value:
8 | self_ref: SELF_REF # self is a keyword...
9 | - views:
10 | - self_ref: SELF_REF
11 | - result: RESULT
12 | - criteria:
13 | class_instance: BaseEstimator
14 | class_method_name: fit_transform
15 | side_effects:
16 | - mutated_value:
17 | self_ref: SELF_REF
18 | - views:
19 | - self_ref: SELF_REF
20 | - result: RESULT
21 |
--------------------------------------------------------------------------------
/tests/outputs/expected/sliced_housing_simple_dag.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import sliced_housing_simple
4 | from airflow import DAG
5 | from airflow.operators.python_operator import PythonOperator
6 | from airflow.utils.dates import days_ago
7 |
8 | default_dag_args = {"owner": "airflow", "retries": 2, "start_date": days_ago(1)}
9 |
10 | dag = DAG(
11 | dag_id="sliced_housing_simple_dag",
12 | schedule_interval="*/15 * * * *",
13 | max_active_runs=1,
14 | catchup=False,
15 | default_args=default_dag_args,
16 | )
17 |
18 |
19 | p_value = PythonOperator(
20 | dag=dag,
21 | task_id="p_value_task",
22 | python_callable=sliced_housing_simple.p_value,
23 | )
24 |
--------------------------------------------------------------------------------
/lineapy/annotations/external/gym.annotations.yaml:
--------------------------------------------------------------------------------
1 | - module: gym.wrappers.monitor
2 | annotations:
3 | - criteria:
4 | class_instance: Monitor
5 | class_method_name: seed
6 | side_effects:
7 | - mutated_value:
8 | self_ref: SELF_REF # self is a keyword...
9 | - views:
10 | - self_ref: SELF_REF
11 | - result: RESULT
12 | - criteria:
13 | class_instance: Monitor
14 | class_method_name: reset
15 | side_effects:
16 | - mutated_value:
17 | self_ref: SELF_REF # self is a keyword...
18 | - views:
19 | - self_ref: SELF_REF
20 | - result: RESULT
21 |
--------------------------------------------------------------------------------
/tests/unit/cli/test_cli.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from lineapy.cli import cli
4 |
5 |
6 | @pytest.mark.parametrize(
7 | "test_input,expected",
8 | [
9 | ("xyz", "xyz"),
10 | ("test.yml", "test.yml"),
11 | ("test.yaml", "test"),
12 | ("test.annotations.yaml", "test"),
13 | ("tet.annotation.yaml", "tet.annotation"),
14 | ("tet.annotation.yml", "tet.annotation.yml"),
15 | ("explicit .yaml", "explicit"),
16 | ("implicit . annotations . yaml", "implicit"),
17 | ],
18 | )
19 | def test_remove_annotations_file_extension(test_input, expected):
20 |
21 | assert cli.remove_annotations_file_extension(test_input) == expected
22 |
--------------------------------------------------------------------------------
/docs/mkdocs/tutorials/README.md:
--------------------------------------------------------------------------------
1 | # LineaPy Tutorials
2 |
3 | ## `00_lineapy_quickstart`
4 |
5 | This tutorial gives you a quick tour of core functionalities of LineaPy. If you are new to LineaPy, start here!
6 |
7 | ## `01_using_artifacts`
8 |
9 | This tutorial uses simple examples to demonstrate how to work with LineaPy artifacts.
10 |
11 | ## `02_pipeline_building`
12 |
13 | Data science workflows revolve around building and refining pipelines, but it is often manual and time-consuming work. Having the complete development process stored in artifacts, LineaPy can automate pipeline building, accelerating transition from development to production. This tutorial demonstrates how this can be done.
14 |
--------------------------------------------------------------------------------
/examples/tutorials/README.md:
--------------------------------------------------------------------------------
1 | # LineaPy Tutorials
2 |
3 | ## `00_lineapy_quickstart`
4 |
5 | This tutorial gives you a quick tour of core functionalities of LineaPy. If you are new to LineaPy, start here!
6 |
7 | ## `01_using_artifacts`
8 |
9 | This tutorial uses simple examples to demonstrate how to work with LineaPy artifacts.
10 |
11 | ## `02_pipeline_building`
12 |
13 | Data science workflows revolve around building and refining pipelines, but it is often manual and time-consuming work. Having the complete development process stored in artifacts, LineaPy can automate pipeline building, accelerating transition from development to production. This tutorial demonstrates how this can be done.
14 |
--------------------------------------------------------------------------------
/tests/integration/__snapshots__/test_slice/test_slice[sklearn_multioutput_plot_classifier_chain_yeast].py:
--------------------------------------------------------------------------------
1 | from sklearn.datasets import fetch_openml
2 | from sklearn.linear_model import LogisticRegression
3 | from sklearn.model_selection import train_test_split
4 | from sklearn.multioutput import ClassifierChain
5 |
6 | X, Y = fetch_openml("yeast", version=4, return_X_y=True)
7 | Y = Y == "TRUE"
8 | X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0)
9 | base_lr = LogisticRegression()
10 | chains = [ClassifierChain(base_lr, order="random", random_state=i) for i in range(10)]
11 | for chain in chains:
12 | chain.fit(X_train, Y_train)
13 | linea_artifact_value = chains
14 |
--------------------------------------------------------------------------------
/examples/self-hosting-lineapy/lineapy-notebook/lineapy_config.json:
--------------------------------------------------------------------------------
1 | {
2 | "storage_options": {
3 | "key": "lineapy",
4 | "secret": "lineapypassword",
5 | "client_kwargs": {
6 | "endpoint_url": "http://minio:9000"
7 | }
8 | },
9 | "artifact_storage_dir": "s3://lineapy-artifact-store",
10 | "customized_annotation_folder": "/home/jovyan/work/.lineapy/custom-annotations",
11 | "database_url": "postgresql://lineapy:lineapypassword@postgres-lineapy:5432/lineapy_artifact_store",
12 | "do_not_track": "True",
13 | "home_dir": "/home/jovyan/work/.lineapy",
14 | "logging_file": "/home/jovyan/work/.lineapy/lineapy.log",
15 | "logging_level": "INFO"
16 | }
--------------------------------------------------------------------------------
/tests/integration/__snapshots__/test_slice/test_slice[pandas_stats].py:
--------------------------------------------------------------------------------
1 | import datetime
2 |
3 | import pandas as pd
4 |
5 | data_url = "https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/06_Stats/Wind_Stats/wind.data"
6 | data = pd.read_csv(data_url, sep="\\s+", parse_dates=[[0, 1, 2]])
7 |
8 |
9 | def fix_century(x):
10 | year = x.year - 100 if x.year > 1989 else x.year
11 | return datetime.date(year, x.month, x.day)
12 |
13 |
14 | data["Yr_Mo_Dy"] = data["Yr_Mo_Dy"].apply(fix_century)
15 | data["Yr_Mo_Dy"] = pd.to_datetime(data["Yr_Mo_Dy"])
16 | data = data.set_index("Yr_Mo_Dy")
17 | weekly = data.resample("W").agg(["min", "max", "mean", "std"])
18 | linea_artifact_value = weekly
19 |
--------------------------------------------------------------------------------
/tests/integration/__snapshots__/test_slice/test_slice[sklearn_model_selection_plot_randomized_search].py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from sklearn.datasets import load_digits
3 | from sklearn.linear_model import SGDClassifier
4 | from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
5 |
6 | X, y = load_digits(return_X_y=True, n_class=3)
7 | clf = SGDClassifier(loss="hinge", penalty="elasticnet", fit_intercept=True)
8 | param_grid = {
9 | "average": [True, False],
10 | "l1_ratio": np.linspace(0, 1, num=10),
11 | "alpha": np.power(10, np.arange(-2, 1, dtype=float)),
12 | }
13 | grid_search = GridSearchCV(clf, param_grid=param_grid)
14 | grid_search.fit(X, y)
15 | linea_artifact_value = grid_search
16 |
--------------------------------------------------------------------------------
/lineapy/plugins/jinja_templates/task/task_function.jinja:
--------------------------------------------------------------------------------
1 | {{ function_decorator }}
2 | def task_{{function_name}}({{user_input_variables}}):
3 | {%- if include_imports_locally %}
4 | import {{ MODULE_NAME }}
5 | import pickle, pathlib
6 | {%- endif %}
7 | {% for typing_block in typing_blocks %}
8 | {{typing_block | indent(4, True) }}
9 | {% endfor %}
10 | {% for loading_block in loading_blocks %}
11 | {{loading_block | indent(4, True) }}
12 | {% endfor %}
13 | {{ pre_call_block | indent(4, True) }}
14 | {{ call_block | indent(4, True) }}
15 | {{ post_call_block | indent(4, True) }}
16 | {% for dumping_block in dumping_blocks %}
17 | {{dumping_block | indent(4, True) }}
18 | {% endfor %}
19 | {{return_block | indent(4, True) }}
--------------------------------------------------------------------------------
/lineapy/system_tracing/_object_side_effect.py:
--------------------------------------------------------------------------------
1 | """
2 | These classes represent side effects, where the values are actual
3 | Python object, in comparison to the other two representations,
4 | where the values are either references to a certain argument (i.e. the first arg)
5 | or to a node.
6 | """
7 | from dataclasses import dataclass
8 | from typing import List, Union
9 |
10 |
11 | @dataclass
12 | class ViewOfObjects:
13 | objects: List[object]
14 |
15 |
16 | @dataclass
17 | class MutatedObject:
18 | object: object
19 |
20 |
21 | @dataclass
22 | class ImplicitDependencyObject:
23 | object: object
24 |
25 |
26 | ObjectSideEffect = Union[
27 | ViewOfObjects, MutatedObject, ImplicitDependencyObject
28 | ]
29 |
--------------------------------------------------------------------------------
/lineapy/plugins/jinja_templates/dvc/dvc_dag_StagePerArtifact.jinja:
--------------------------------------------------------------------------------
1 | stages:
2 | {% for task_name, task_def in TASK_DEFS.items() %}
3 | {{ task_name }}:
4 | cmd: python task_{{ task_name }}.py
5 | deps:
6 | - {{ MODULE_NAME }}.py
7 | - task_{{ task_name }}.py
8 | {%- if task_def.loaded_input_variables|length > 0 %}
9 | {%- for dep in task_def.loaded_input_variables %}
10 | - {{ dep }}.pickle
11 | {%- endfor %}
12 | {%- endif %}
13 | {%- if task_def.return_vars|length > 0 %}
14 | outs:
15 | {%- for out in task_def.return_vars %}
16 | - {{ out }}.pickle
17 | {%- endfor %}
18 | {%- endif %}
19 | {% endfor %}
20 |
21 |
--------------------------------------------------------------------------------
/tests/integration/slices/pandas_timeseries.py:
--------------------------------------------------------------------------------
1 | # This is the manual slice of:
2 | # apple_months
3 | # from file:
4 | # sources/pandas_exercises/09_Time_Series/Apple_Stock/Exercises-with-solutions-code.ipynb
5 |
6 | # To verify that linea produces the same slice, run:
7 | # pytest -m integration --runxfail -vv 'tests/integration/test_slice.py::test_slice[pandas_timeseries]'
8 |
9 | import pandas as pd
10 |
11 | url = "https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/09_Time_Series/Apple_Stock/appl_1980_2014.csv"
12 | apple = pd.read_csv(url)
13 | apple.Date = pd.to_datetime(apple.Date)
14 | apple = apple.set_index("Date")
15 | apple_months = apple.resample("BM").mean()
16 | linea_artifact_value = apple_months
17 |
--------------------------------------------------------------------------------
/lineapy/transformer/py38_transformer.py:
--------------------------------------------------------------------------------
1 | import ast
2 |
3 | from lineapy.transformer.base_transformer import BaseTransformer
4 |
5 |
6 | class Py38Transformer(BaseTransformer):
7 | def visit_Index(self, node: ast.Index) -> ast.AST:
8 | # ignoring types because these classes were entirely removed without backward support in 3.9
9 | return self.visit(node.value) # type: ignore
10 |
11 | def visit_ExtSlice(self, node: ast.ExtSlice) -> ast.Tuple:
12 | # ignoring types because these classes were entirely removed without backward support in 3.9
13 | elem_nodes = [self.visit(elem) for elem in node.dims] # type: ignore
14 | return ast.Tuple(
15 | elts=list(elem_nodes),
16 | )
17 |
--------------------------------------------------------------------------------
/tests/end_to_end/__snapshots__/test_list_comp/test_returns_value.py:
--------------------------------------------------------------------------------
1 | import datetime
2 | from pathlib import *
3 | from lineapy.data.types import *
4 | from lineapy.utils.utils import get_new_id
5 |
6 | source_1 = SourceCode(
7 | code="x = [i + 1 for i in range(3)]",
8 | location=PosixPath("[source file path]"),
9 | )
10 | call_1 = CallNode(
11 | source_location=SourceLocation(
12 | lineno=1,
13 | col_offset=4,
14 | end_lineno=1,
15 | end_col_offset=29,
16 | source_code=source_1.id,
17 | ),
18 | function_id=LookupNode(
19 | name="l_exec_expr",
20 | ).id,
21 | positional_args=[
22 | LiteralNode(
23 | value="[i + 1 for i in range(3)]",
24 | ).id
25 | ],
26 | )
27 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Bug report
3 | about: Report a bug in Linea
4 | title: ''
5 | labels: bug
6 | assignees: ''
7 |
8 | ---
9 |
10 | **python version**:
11 | *what python version are you using?*
12 |
13 | **lineapy version**
14 | *what version of lineapy are you using or which commit if installed from source?*
15 |
16 |
17 | **Your code**:
18 | *What code did you try to run with lineapy?*
19 |
20 | ```python
21 | ...
22 | ```
23 |
24 | **Issue:
25 | *What went wrong when trying to run this code?*
26 |
27 | **Notebook(s) or script(s) to reproduce the issue**
28 | Alternatively, please upload a linear notebook or python script (or a set of notebooks/scripts if the bug is caused by cross session interactions) that can reproduce the bug.
29 |
--------------------------------------------------------------------------------
/tests/integration/__snapshots__/test_slice/test_slice[pandas_merge].py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 |
3 | raw_data_1 = {
4 | "subject_id": ["1", "2", "3", "4", "5"],
5 | "first_name": ["Alex", "Amy", "Allen", "Alice", "Ayoung"],
6 | "last_name": ["Anderson", "Ackerman", "Ali", "Aoni", "Atiches"],
7 | }
8 | raw_data_2 = {
9 | "subject_id": ["4", "5", "6", "7", "8"],
10 | "first_name": ["Billy", "Brian", "Bran", "Bryce", "Betty"],
11 | "last_name": ["Bonder", "Black", "Balwner", "Brice", "Btisan"],
12 | }
13 | data1 = pd.DataFrame(raw_data_1, columns=["subject_id", "first_name", "last_name"])
14 | data2 = pd.DataFrame(raw_data_2, columns=["subject_id", "first_name", "last_name"])
15 | all_data_col = pd.concat([data1, data2], axis=1)
16 | linea_artifact_value = all_data_col
17 |
--------------------------------------------------------------------------------
/tests/integration/sources/matplotlib-tutorial/scripts/dash_joinstyle.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | import numpy as np
3 |
4 | size = 256,16
5 | dpi = 72.0
6 | figsize= size[0]/float(dpi),size[1]/float(dpi)
7 | fig = plt.figure(figsize=figsize, dpi=dpi)
8 | fig.patch.set_alpha(0)
9 | plt.axes([0,0,1,1], frameon=False)
10 |
11 | plt.plot(np.arange(3), [0,1,0], color="blue", dashes=[12,5], linewidth=8, dash_joinstyle = 'miter')
12 | plt.plot(4+np.arange(3), [0,1,0], color="blue", dashes=[12,5], linewidth=8, dash_joinstyle = 'bevel')
13 | plt.plot(8+np.arange(3), [0,1,0], color="blue", dashes=[12,5], linewidth=8, dash_joinstyle = 'round')
14 |
15 | plt.xlim(0,12), plt.ylim(-1,2)
16 | plt.xticks([]), plt.yticks([])
17 |
18 | plt.savefig('../figures/dash_joinstyle.png', dpi=dpi)
19 | #show()
20 |
--------------------------------------------------------------------------------
/tests/housing.py:
--------------------------------------------------------------------------------
1 | import altair as alt
2 | import pandas as pd
3 | import seaborn as sns
4 | from sklearn.ensemble import RandomForestClassifier
5 |
6 | import lineapy
7 |
8 | alt.data_transformers.enable("json")
9 | alt.renderers.enable("mimetype")
10 |
11 | assets = pd.read_csv("ames_train_cleaned.csv")
12 |
13 | sns.relplot(data=assets, x="Year_Built", y="SalePrice", size="Lot_Area")
14 |
15 |
16 | def is_new(col):
17 | return col > 1970
18 |
19 |
20 | assets["is_new"] = is_new(assets["Year_Built"])
21 |
22 | clf = RandomForestClassifier(random_state=0)
23 | y = assets["is_new"]
24 | x = assets[["SalePrice", "Lot_Area", "Garage_Area"]]
25 |
26 | clf.fit(x, y)
27 | p = clf.predict([[100 * 1000, 10, 4]])
28 | lineapy.save(x, "x")
29 | lineapy.save(y, "y")
30 | lineapy.save(p, "p value")
31 |
--------------------------------------------------------------------------------
/tests/integration/slices/matplotlib_alpha.py:
--------------------------------------------------------------------------------
1 | # This is the manual slice of:
2 | # lineapy.file_system
3 | # from file:
4 | # sources/matplotlib-tutorial/scripts/alpha.py
5 |
6 | # To verify that linea produces the same slice, run:
7 | # pytest -m integration --runxfail -vv 'tests/integration/test_slice.py::test_slice[matplotlib_alpha]'
8 |
9 | import matplotlib.pyplot as plt
10 |
11 | size = 256, 16
12 | dpi = 72.0
13 | figsize = size[0] / float(dpi), size[1] / float(dpi)
14 | fig = plt.figure(figsize=figsize, dpi=dpi)
15 | fig.patch.set_alpha(0)
16 | plt.axes([0, 0.1, 1, 0.8], frameon=False)
17 | for i in range(1, 11):
18 | plt.axvline(i, linewidth=1, color="blue", alpha=0.25 + 0.75 * i / 10.0)
19 | plt.xlim(0, 11)
20 | plt.xticks([]), plt.yticks([])
21 | plt.savefig("../figures/alpha.png", dpi=dpi)
22 |
--------------------------------------------------------------------------------
/tests/__snapshots__/test_ipython/test_to_airflow[no_config-module].py:
--------------------------------------------------------------------------------
1 | def get_a():
2 | a = [1, 2, 3]
3 | return a
4 |
5 |
6 | def run_session_including_a():
7 | # Given multiple artifacts, we need to save each right after
8 | # its calculation to protect from any irrelevant downstream
9 | # mutations (e.g., inside other artifact calculations)
10 | import copy
11 |
12 | artifacts = dict()
13 | a = get_a()
14 | artifacts["a"] = copy.deepcopy(a)
15 | return artifacts
16 |
17 |
18 | def run_all_sessions():
19 | artifacts = dict()
20 | artifacts.update(run_session_including_a())
21 | return artifacts
22 |
23 |
24 | if __name__ == "__main__":
25 | # Edit this section to customize the behavior of artifacts
26 | artifacts = run_all_sessions()
27 | print(artifacts)
28 |
--------------------------------------------------------------------------------
/tests/__snapshots__/test_ipython/test_to_airflow[with_config-module].py:
--------------------------------------------------------------------------------
1 | def get_a():
2 | a = [1, 2, 3]
3 | return a
4 |
5 |
6 | def run_session_including_a():
7 | # Given multiple artifacts, we need to save each right after
8 | # its calculation to protect from any irrelevant downstream
9 | # mutations (e.g., inside other artifact calculations)
10 | import copy
11 |
12 | artifacts = dict()
13 | a = get_a()
14 | artifacts["a"] = copy.deepcopy(a)
15 | return artifacts
16 |
17 |
18 | def run_all_sessions():
19 | artifacts = dict()
20 | artifacts.update(run_session_including_a())
21 | return artifacts
22 |
23 |
24 | if __name__ == "__main__":
25 | # Edit this section to customize the behavior of artifacts
26 | artifacts = run_all_sessions()
27 | print(artifacts)
28 |
--------------------------------------------------------------------------------
/tests/integration/slices/matplotlib_aliased.py:
--------------------------------------------------------------------------------
1 | # This is the manual slice of:
2 | # lineapy.file_system
3 | # from file:
4 | # sources/matplotlib-tutorial/scripts/aliased.py
5 |
6 | # To verify that linea produces the same slice, run:
7 | # pytest -m integration --runxfail -vv 'tests/integration/test_slice.py::test_slice[matplotlib_aliased]'
8 |
9 | import matplotlib.pyplot as plt
10 |
11 | size = 128, 16
12 | dpi = 72.0
13 | figsize = size[0] / float(dpi), size[1] / float(dpi)
14 | fig = plt.figure(figsize=figsize, dpi=dpi)
15 | fig.patch.set_alpha(0)
16 | plt.axes([0, 0, 1, 1], frameon=False)
17 | plt.rcParams["text.antialiased"] = False
18 | plt.text(0.5, 0.5, "Aliased", ha="center", va="center")
19 | plt.xlim(0, 1), plt.ylim(0, 1)
20 | plt.xticks([]), plt.yticks([])
21 | plt.savefig("../figures/aliased.png", dpi=dpi)
22 |
--------------------------------------------------------------------------------
/tests/outputs/expected/sliced_housing_multiple_dag.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import sliced_housing_multiple
4 | from airflow import DAG
5 | from airflow.operators.python_operator import PythonOperator
6 | from airflow.utils.dates import days_ago
7 |
8 | default_dag_args = {"owner": "airflow", "retries": 2, "start_date": days_ago(1)}
9 |
10 | dag = DAG(
11 | dag_id="sliced_housing_multiple_dag",
12 | schedule_interval="*/15 * * * *",
13 | max_active_runs=1,
14 | catchup=False,
15 | default_args=default_dag_args,
16 | )
17 |
18 |
19 | p_value = PythonOperator(
20 | dag=dag,
21 | task_id="p_value_task",
22 | python_callable=sliced_housing_multiple.p_value,
23 | )
24 |
25 | y = PythonOperator(
26 | dag=dag,
27 | task_id="y_task",
28 | python_callable=sliced_housing_multiple.y,
29 | )
30 |
--------------------------------------------------------------------------------
/examples/README.md:
--------------------------------------------------------------------------------
1 | # LineaPy Examples
2 |
3 | We believe examples are the best way to learn something, so we have created hands-on notebooks that illustrate different uses of LineaPy.
4 |
5 | - `self-hosting-lineapy`: This demo folder provides an easy to run, local data science development environment that showcases the capabilities of LineaPy. It gives users an easy way to run end-to-end tutorials and prototype solutions to their problems using LineaPy.
6 |
7 | - `tutorials`: These tutorial notebooks focus on walking you through key functionalities of LineaPy with simple examples. If you are new to LineaPy and want to learn the basics, start here!
8 |
9 | - `use-cases`: These use-case notebooks contain more realistic examples that show how LineaPy can help data science work in various domains such as real estate, finance, and medicine.
10 |
--------------------------------------------------------------------------------
/lineapy/plugins/jinja_templates/argo/argo_dockerfile.jinja:
--------------------------------------------------------------------------------
1 | # Be sure to build this docker file with the following command
2 | # docker build -t {{ pipeline_name }}:lineapy -f {{ pipeline_name }}_Dockerfile .
3 |
4 | FROM python:{{ python_version }}
5 |
6 | RUN mkdir /tmp/installers
7 | WORKDIR /tmp/installers
8 |
9 | # Copy all the requirements to run current DAG
10 | COPY ./{{ pipeline_name }}_requirements.txt ./
11 |
12 | # Install required libs
13 | RUN pip install -r ./{{ pipeline_name }}_requirements.txt
14 |
15 | WORKDIR /opt/argo/dags
16 |
17 | # Install git and argo
18 | RUN apt update
19 | RUN apt install -y git
20 | RUN pip install argo-workflows
21 | RUN pip install hera-workflows
22 |
23 | COPY ./{{ pipeline_name }}_module.py ./
24 | COPY ./{{ pipeline_name }}_dag.py ./
25 |
26 | ENTRYPOINT [ "argo", "repro", "run_all_sessions"]
--------------------------------------------------------------------------------
/tests/integration/__snapshots__/test_slice/test_slice[sklearn_tree_plot_cost_complexity_pruning].py:
--------------------------------------------------------------------------------
1 | from sklearn.datasets import load_breast_cancer
2 | from sklearn.model_selection import train_test_split
3 | from sklearn.tree import DecisionTreeClassifier
4 |
5 | X, y = load_breast_cancer(return_X_y=True)
6 | X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
7 | clf = DecisionTreeClassifier(random_state=0)
8 | path = clf.cost_complexity_pruning_path(X_train, y_train)
9 | ccp_alphas, impurities = path.ccp_alphas, path.impurities
10 | clfs = []
11 | for ccp_alpha in ccp_alphas:
12 | clf = DecisionTreeClassifier(random_state=0, ccp_alpha=ccp_alpha)
13 | clf.fit(X_train, y_train)
14 | clfs.append(clf)
15 | clfs = clfs[:-1]
16 | depth = [clf.tree_.max_depth for clf in clfs]
17 | linea_artifact_value = depth
18 |
--------------------------------------------------------------------------------
/tests/integration/slices/pandas_apply.py:
--------------------------------------------------------------------------------
1 | # This is the manual slice of:
2 | # crimes
3 | # from file:
4 | # sources/pandas_exercises/04_Apply/US_Crime_Rates/Exercises_with_solutions.ipynb
5 |
6 | # To verify that linea produces the same slice, run:
7 | # pytest -m integration --runxfail -vv 'tests/integration/test_slice.py::test_slice[pandas_apply]'
8 |
9 | import pandas as pd
10 |
11 | url = "https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/04_Apply/US_Crime_Rates/US_Crime_Rates_1960_2014.csv"
12 | crime = pd.read_csv(url)
13 | crime.Year = pd.to_datetime(crime.Year, format="%Y")
14 | crime = crime.set_index("Year", drop=True)
15 | del crime["Total"]
16 | crimes = crime.resample("10AS").sum()
17 | population = crime["Population"].resample("10AS").max()
18 | crimes["Population"] = population
19 | linea_artifact_value = crimes
20 |
--------------------------------------------------------------------------------
/tests/integration/__snapshots__/test_slice/test_slice[sklearn_preprocessing_plot_scaling_importance].py:
--------------------------------------------------------------------------------
1 | from sklearn.datasets import load_wine
2 | from sklearn.decomposition import PCA
3 | from sklearn.model_selection import train_test_split
4 | from sklearn.naive_bayes import GaussianNB
5 | from sklearn.pipeline import make_pipeline
6 | from sklearn.preprocessing import StandardScaler
7 |
8 | RANDOM_STATE = 42
9 | features, target = load_wine(return_X_y=True)
10 | X_train, X_test, y_train, y_test = train_test_split(
11 | features, target, test_size=0.3, random_state=RANDOM_STATE
12 | )
13 | unscaled_clf = make_pipeline(PCA(n_components=2), GaussianNB())
14 | unscaled_clf.fit(X_train, y_train)
15 | std_clf = make_pipeline(StandardScaler(), PCA(n_components=2), GaussianNB())
16 | std_clf.fit(X_train, y_train)
17 | linea_artifact_value = unscaled_clf, std_clf
18 |
--------------------------------------------------------------------------------
/tests/integration/slices/pandas_deleting.py:
--------------------------------------------------------------------------------
1 | # This is the manual slice of:
2 | # iris
3 | # from file:
4 | # sources/pandas_exercises/10_Deleting/Iris/Exercises_with_solutions_and_code.ipynb
5 |
6 | # To verify that linea produces the same slice, run:
7 | # pytest -m integration --runxfail -vv 'tests/integration/test_slice.py::test_slice[pandas_deleting]'
8 |
9 | import numpy as np
10 | import pandas as pd
11 |
12 | url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
13 | iris = pd.read_csv(url)
14 | iris.columns = ["sepal_length", "sepal_width", "petal_length", "petal_width", "class"]
15 | iris.iloc[10:30, 2:3] = np.nan
16 | iris.petal_length.fillna(1, inplace=True)
17 | del iris["class"]
18 | iris.iloc[0:3, :] = np.nan
19 | iris = iris.dropna(how="any")
20 | iris = iris.reset_index(drop=True)
21 | linea_artifact_value = iris
22 |
--------------------------------------------------------------------------------
/examples/self-hosting-lineapy/airflow/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM apache/airflow:slim-latest-python3.10
2 |
3 | USER $AIRFLOW_UID
4 |
5 | # prevent pip timing out on slow internet connections
6 | RUN export PIP_DEFAULT_TIMEOUT=1000
7 |
8 | # deps for lineapy
9 | RUN pip install -U pip
10 | RUN pip install fsspec s3fs psycopg2-binary
11 |
12 | # install lineapy
13 | RUN pip install lineapy==0.2.1
14 |
15 | # other nice to have libraries
16 | RUN pip install pandas==1.4.3 scikit-learn==1.1.2
17 |
18 | COPY airflow.cfg /opt/airflow/airflow.cfg
19 | COPY airflow-start.sh /airflow-start.sh
20 |
21 | USER root
22 | RUN chown $AIRFLOW_UID /opt/airflow/airflow.cfg
23 | RUN chown $AIRFLOW_UID /airflow-start.sh
24 | RUN chmod +x /airflow-start.sh
25 | USER $AIRFLOW_UID
26 |
27 | RUN mkdir /opt/airflow/plugins
28 |
29 | ENV AIRFLOW_HOME=/opt/airflow
30 |
31 | ENTRYPOINT ["/airflow-start.sh"]
--------------------------------------------------------------------------------
/tests/unit/graph_reader/inputs/module_import_from:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | from sklearn.linear_model import LinearRegression
3 |
4 | import lineapy
5 |
6 | art = {}
7 | # Load train data
8 | url1 = "https://raw.githubusercontent.com/LineaLabs/lineapy/main/examples/tutorials/data/iris.csv"
9 | train_df = pd.read_csv(url1)
10 | # Initiate the model
11 | mod = LinearRegression()
12 | # Fit the model
13 | mod.fit(
14 | X=train_df[["petal.width"]],
15 | y=train_df["petal.length"],
16 | )
17 | # Save the fitted model as an artifact
18 | art["model"] = lineapy.save(mod, "iris_model")
19 | # Load data to predict (assume it comes from a different source)
20 | pred_df = pd.read_csv(url1)
21 | # Make predictions
22 | petal_length_pred = mod.predict(X=pred_df[["petal.width"]])
23 | # Save the predictions
24 | art["pred"] = lineapy.save(petal_length_pred, "iris_petal_length_pred")
25 |
--------------------------------------------------------------------------------
/tests/integration/slices/matplotlib_exercise_3.py:
--------------------------------------------------------------------------------
1 | # This is the manual slice of:
2 | # plt.gcf()
3 | # from file:
4 | # sources/matplotlib-tutorial/scripts/exercice_3.py
5 |
6 | # To verify that linea produces the same slice, run:
7 | # pytest -m integration --runxfail -vv 'tests/integration/test_slice.py::test_slice[matplotlib_exercise_3]'
8 |
9 | import matplotlib.pyplot as plt
10 | import numpy as np
11 |
12 | plt.figure(figsize=(8, 5), dpi=80)
13 | plt.subplot(111)
14 | X = np.linspace(-np.pi, np.pi, 256, endpoint=True)
15 | C, S = np.cos(X), np.sin(X)
16 | plt.plot(X, C, color="blue", linewidth=2.5, linestyle="-")
17 | plt.plot(X, S, color="red", linewidth=2.5, linestyle="-")
18 | plt.xlim(-4.0, 4.0)
19 | plt.xticks(np.linspace(-4, 4, 9, endpoint=True))
20 | plt.ylim(-1.0, 1.0)
21 | plt.yticks(np.linspace(-1, 1, 5, endpoint=True))
22 | linea_artifact_value = plt.gcf()
23 |
--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | extend-exclude = __snapshots__,examples,sliced_housing_dag*.py,./lineapy/editors/ipython.py,.ipython,tutorials,slices,sources,envs,*housing.py,tests/integration/sources,tests/integration/slices,outputs,env
3 | per-file-ignores =
4 | # ignore get_ipython missing
5 | lineapy/ipython.py: F821
6 |
7 | # E203 is for compat with black
8 | # (https://black.readthedocs.io/en/stable/guides/using_black_with_other_tools.html#configuration)
9 |
10 | # E501 is line length, which is already covered by black, and was raising
11 | # errors on string/comment lines that were too long
12 |
13 | # W291 is for trailing whitespace, which is also already covered by black
14 | # besides in this case we want to ignore trailing whitespace in docstrings
15 |
16 | # F841 is raised on unused local variables, which sometimes we don't mind
17 | extend-ignore = E203,E501,W291,F841
18 |
--------------------------------------------------------------------------------
/lineapy/system_tracing/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | This module adds support for using sys.settrace to understand what happens
3 | during a subset of code execution that's passed in. In the context of how
4 | it's currently used, it's limited to the "blackbox" execs — `l_exec_statement`
5 | It can be used used and tested independently.
6 |
7 | At a high level, users could:
8 |
9 | 1. Use the `exec_and_record_function_calls.py` as an entry point and uses
10 | `sys.settrace` to trace every bytecode execution and `_op_stack.py` to look at
11 | the bytecode stack during tracing. It translates different bytecode
12 | instructions into the corresponding Python function calls.
13 | 2. Use `function_calls_to_side_effects.py` to translate the sequence of calls
14 | that were recorded into the side effects produced on nodes (mapping Python
15 | changes to graph changes).
16 | """
17 |
--------------------------------------------------------------------------------
/tests/end_to_end/test_decorator.py:
--------------------------------------------------------------------------------
1 | from lineapy.utils.utils import prettify
2 |
3 |
4 | def test_user_defined_decorator(execute):
5 | code = """x=[]
6 | def append1(func):
7 | def wrapper():
8 | func()
9 | x.append(1)
10 |
11 | return wrapper
12 |
13 |
14 | @append1
15 | def append2():
16 | x.append(2)
17 |
18 | append2()
19 | """
20 | res = execute(code, artifacts=["x"])
21 | assert len(res.values["x"]) == 2
22 | assert res.values["x"][0] == 2 and res.values["x"][1] == 1
23 | assert res.artifacts["x"] == prettify(code)
24 |
25 |
26 | def test_functools_decorator(execute):
27 | code = """from functools import lru_cache
28 | @lru_cache(maxsize=1)
29 | def f():
30 | return 1
31 |
32 | x = f()
33 | """
34 | res = execute(code, artifacts=["x"])
35 | assert res.values["x"] == 1
36 | assert res.artifacts["x"] == prettify(code)
37 |
--------------------------------------------------------------------------------
/tests/integration/sources/matplotlib-tutorial/scripts/exercice_3.py:
--------------------------------------------------------------------------------
1 | # -----------------------------------------------------------------------------
2 | # Copyright (c) 2015, Nicolas P. Rougier. All Rights Reserved.
3 | # Distributed under the (new) BSD License. See LICENSE.txt for more info.
4 | # -----------------------------------------------------------------------------
5 | import numpy as np
6 | import matplotlib.pyplot as plt
7 |
8 | plt.figure(figsize=(8,5), dpi=80)
9 | plt.subplot(111)
10 |
11 | X = np.linspace(-np.pi, np.pi, 256,endpoint=True)
12 | C,S = np.cos(X), np.sin(X)
13 |
14 | plt.plot(X, C, color="blue", linewidth=2.5, linestyle="-")
15 | plt.plot(X, S, color="red", linewidth=2.5, linestyle="-")
16 |
17 | plt.xlim(-4.0,4.0)
18 | plt.xticks(np.linspace(-4,4,9,endpoint=True))
19 |
20 | plt.ylim(-1.0,1.0)
21 | plt.yticks(np.linspace(-1,1,5,endpoint=True))
22 |
23 | plt.show()
24 |
--------------------------------------------------------------------------------
/docs/mkdocs/concepts/artifact.md:
--------------------------------------------------------------------------------
1 | # Artifact
2 |
3 | In LineaPy, an artifact refers to any intermediate result from the development process. Most often, an artifact
4 | manifests as a variable that stores data in a specific state (e.g., `my_num = your_num + 10`). In the data science
5 | workflow, an artifact can be a model, a chart, a statistic, or a dataframe, or a feature function.
6 |
7 | What makes LineaPy special is that it treats an artifact as both code and value. That is, when storing an artifact,
8 | LineaPy not only records the state (i.e., value) of the variable but also traces and saves all relevant operations
9 | leading to this state — as code. Such a complete development history or *lineage* then allows LineaPy to fully reproduce
10 | the given artifact. Furthermore, it provides the ground to automate data engineering work to bring data science from development to production.
11 |
--------------------------------------------------------------------------------
/tests/end_to_end/test_dictionary.py:
--------------------------------------------------------------------------------
1 | from lineapy.utils.utils import prettify
2 |
3 |
4 | def test_basic_dict(execute):
5 | res = execute("x = {'a': 1, 'b': 2}")
6 | assert res.values["x"] == {"a": 1, "b": 2}
7 |
8 |
9 | def test_splatting(execute):
10 | res = execute("x = {1: 2, 2:2, **{1: 3, 2: 3}, 1: 4}")
11 | assert res.values["x"] == {1: 4, 2: 3}
12 |
13 |
14 | def test_dictionary_support(execute):
15 | DICTIONARY_SUPPORT = """import pandas as pd
16 | df = pd.DataFrame({"id": [1,2]})
17 | x = df["id"].sum()
18 | """
19 | res = execute(DICTIONARY_SUPPORT)
20 | assert res.values["x"] == 3
21 |
22 |
23 | def test_dict_update_mutates(execute):
24 | code = """x = {'a': 1, 'b': 2}
25 | x.update({'a': 3})
26 | """
27 |
28 | res = execute(code, artifacts=["x"])
29 | assert res.values["x"] == {"a": 3, "b": 2}
30 | assert res.slice("x") == prettify(code)
31 |
--------------------------------------------------------------------------------
/tests/unit/graph_reader/inputs/housing:
--------------------------------------------------------------------------------
1 | import altair as alt
2 | import pandas as pd
3 | import seaborn as sns
4 | from sklearn.ensemble import RandomForestClassifier
5 |
6 | import lineapy
7 |
8 | alt.data_transformers.enable("json")
9 | alt.renderers.enable("mimetype")
10 |
11 | assets = pd.read_csv(
12 | "https://raw.githubusercontent.com/LineaLabs/lineapy/main/tests/ames_train_cleaned.csv"
13 | )
14 |
15 | sns.relplot(data=assets, x="Year_Built", y="SalePrice", size="Lot_Area")
16 |
17 |
18 | def is_new(col):
19 | return col > 1970
20 |
21 |
22 | assets["is_new"] = is_new(assets["Year_Built"])
23 |
24 | clf = RandomForestClassifier(random_state=0)
25 | y = assets["is_new"]
26 | x = assets[["SalePrice", "Lot_Area", "Garage_Area"]]
27 |
28 | clf.fit(x, y)
29 | p = clf.predict([[100 * 1000, 10, 4]])
30 | lineapy.save(x, "x")
31 | lineapy.save(y, "y")
32 | lineapy.save(p, "p value")
33 |
--------------------------------------------------------------------------------
/tests/end_to_end/test_lists.py:
--------------------------------------------------------------------------------
1 | def test_list_setitem_mutates(execute):
2 | code = """x = [1]
3 | x[0] = 10
4 | """
5 | res = execute(code, artifacts=["x"])
6 | assert res.values["x"] == [10]
7 | assert res.slice("x") == code
8 |
9 |
10 | def test_list_getitem_view(execute):
11 | code = """y = []
12 | x = [y]
13 | y.append(10)
14 | """
15 | res = execute(code, artifacts=["x"])
16 | assert res.slice("x") == code
17 |
18 |
19 | def test_list_append_mutates(execute):
20 | code = """x = []
21 | x.append(10)
22 | """
23 | res = execute(code, artifacts=["x"])
24 | assert res.slice("x") == code
25 |
26 |
27 | def test_list_append_mutates_inner(execute):
28 | code = """x = []
29 | y = [x]
30 | x.append(10)
31 | y[0].append(11)
32 | """
33 | res = execute(code, artifacts=["x", "y"])
34 | assert res.slice("x") == code
35 | assert res.slice("y") == code
36 |
--------------------------------------------------------------------------------
/tests/integration/slices/sklearn_semi_supervised_plot_label_propagation_structure.py:
--------------------------------------------------------------------------------
1 | # This is the manual slice of:
2 | # label_spread
3 | # from file:
4 | # sources/scikit-learn/examples/semi_supervised/plot_label_propagation_structure.py
5 |
6 | # To verify that linea produces the same slice, run:
7 | # pytest -m integration --runxfail -vv 'tests/integration/test_slice.py::test_slice[sklearn_semi_supervised_plot_label_propagation_structure]'
8 |
9 | import numpy as np
10 | from sklearn.datasets import make_circles
11 | from sklearn.semi_supervised import LabelSpreading
12 |
13 | n_samples = 200
14 | X, y = make_circles(n_samples=n_samples, shuffle=False)
15 | outer, inner = 0, 1
16 | labels = np.full(n_samples, -1.0)
17 | labels[0] = outer
18 | labels[-1] = inner
19 | label_spread = LabelSpreading(kernel="knn", alpha=0.8)
20 | label_spread.fit(X, labels)
21 | linea_artifact_value = label_spread
22 |
--------------------------------------------------------------------------------
/examples/use_cases/README.md:
--------------------------------------------------------------------------------
1 | # LineaPy Use Cases
2 |
3 | ## `predict_house_price`
4 |
5 | This use case illustrates how LineaPy can facilitate an end-to-end data science workflow for housing price prediction.
6 | The notebook comes in 3 main sections:
7 |
8 | 1. ***Exploratory Data Analysis and Feature Engineering.*** Using various statistics and visualizations, we explore the given data
9 | to create useful features. We use LineaPy to store the transformed data as an artifact, which allows us to automatically refactor and clean up the code.
10 |
11 | 2. ***Training a Model.*** Using the transformed data, we train a model that can predict housing prices. We then store
12 | the trained model as an artifact.
13 |
14 | 3. ***Building an End-to-End Pipeline.*** Using artifacts saved in this session, we quickly build an end-to-end
15 | pipeline that combines data preprocessing and model training, moving closer to production.
16 |
--------------------------------------------------------------------------------
/lineapy/annotations/internal/io.annotations.yaml:
--------------------------------------------------------------------------------
1 | - module: io
2 | annotations:
3 | - criteria:
4 | function_name: open
5 | side_effects:
6 | - dependency:
7 | external_state: file_system
8 | - views:
9 | - result: RESULT
10 | - external_state: file_system
11 | - criteria:
12 | class_method_names:
13 | - close
14 | - flush
15 | - readline
16 | - readlines
17 | - seek
18 | - truncate
19 | - writelines
20 | - write
21 | class_instance: IOBase
22 | side_effects:
23 | - mutated_value:
24 | self_ref: SELF_REF
25 | - criteria:
26 | class_method_names:
27 | - __enter__
28 | class_instance: IOBase
29 | side_effects:
30 | - views:
31 | - result: RESULT
32 | - self_ref: SELF_REF
33 |
--------------------------------------------------------------------------------
/lineapy/plugins/jinja_templates/kubeflow/kubeflow_dockerfile.jinja:
--------------------------------------------------------------------------------
1 | # Be sure to build this docker file with the following command
2 | # docker build -t {{ pipeline_name }}:lineapy -f {{ pipeline_name }}_Dockerfile .
3 |
4 | FROM python:{{ python_version }}
5 |
6 | RUN mkdir /tmp/installers
7 | WORKDIR /tmp/installers
8 |
9 | # Copy all the requirements to run current DAG
10 | COPY ./{{ pipeline_name }}_requirements.txt ./
11 |
12 | # Install kubeflow python sdk
13 | RUN apt update
14 | RUN pip install kfp
15 |
16 | # Install required libs
17 | RUN pip install -r ./{{ pipeline_name }}_requirements.txt
18 |
19 | WORKDIR /home
20 | COPY ./{{ pipeline_name }}_module.py ./
21 | COPY ./{{ pipeline_name }}_dag.py ./
22 |
23 | # Set environment variable so module file can be
24 | # found by kubeflow components
25 | ENV PYTHONPATH=/home:${PYTHON_PATH}
26 |
27 | ENTRYPOINT ["python", "{{ pipeline_name }}_module.py"]
28 |
29 |
30 |
--------------------------------------------------------------------------------
/tests/outputs/expected/sliced_housing_multiple_w_dependencies_dag.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import sliced_housing_multiple_w_dependencies
4 | from airflow import DAG
5 | from airflow.operators.python_operator import PythonOperator
6 | from airflow.utils.dates import days_ago
7 |
8 | default_dag_args = {"owner": "airflow", "retries": 2, "start_date": days_ago(1)}
9 |
10 | dag = DAG(
11 | dag_id="sliced_housing_multiple_w_dependencies_dag",
12 | schedule_interval="*/15 * * * *",
13 | max_active_runs=1,
14 | catchup=False,
15 | default_args=default_dag_args,
16 | )
17 |
18 |
19 | p_value = PythonOperator(
20 | dag=dag,
21 | task_id="p_value_task",
22 | python_callable=sliced_housing_multiple_w_dependencies.p_value,
23 | )
24 |
25 | y = PythonOperator(
26 | dag=dag,
27 | task_id="y_task",
28 | python_callable=sliced_housing_multiple_w_dependencies.y,
29 | )
30 |
31 |
32 | p_value >> y
33 |
--------------------------------------------------------------------------------
/tests/end_to_end/__snapshots__/test_op/test_sub.py:
--------------------------------------------------------------------------------
1 | import datetime
2 | from pathlib import *
3 | from lineapy.data.types import *
4 | from lineapy.utils.utils import get_new_id
5 |
6 | source_1 = SourceCode(
7 | code="""x = 1
8 | y=-x""",
9 | location=PosixPath("[source file path]"),
10 | )
11 | call_1 = CallNode(
12 | source_location=SourceLocation(
13 | lineno=2,
14 | col_offset=2,
15 | end_lineno=2,
16 | end_col_offset=4,
17 | source_code=source_1.id,
18 | ),
19 | function_id=LookupNode(
20 | name="neg",
21 | ).id,
22 | positional_args=[
23 | LiteralNode(
24 | source_location=SourceLocation(
25 | lineno=1,
26 | col_offset=4,
27 | end_lineno=1,
28 | end_col_offset=5,
29 | source_code=source_1.id,
30 | ),
31 | value=1,
32 | ).id
33 | ],
34 | )
35 |
--------------------------------------------------------------------------------
/tests/integration/__snapshots__/test_slice/test_slice[matplotlib_dash_joinstyle].py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | import numpy as np
3 |
4 | size = 256, 16
5 | dpi = 72.0
6 | figsize = size[0] / float(dpi), size[1] / float(dpi)
7 | fig = plt.figure(figsize=figsize, dpi=dpi)
8 | plt.axes([0, 0, 1, 1], frameon=False)
9 | plt.plot(
10 | np.arange(3),
11 | [0, 1, 0],
12 | color="blue",
13 | dashes=[12, 5],
14 | linewidth=8,
15 | dash_joinstyle="miter",
16 | )
17 | plt.plot(
18 | 4 + np.arange(3),
19 | [0, 1, 0],
20 | color="blue",
21 | dashes=[12, 5],
22 | linewidth=8,
23 | dash_joinstyle="bevel",
24 | )
25 | plt.plot(
26 | 8 + np.arange(3),
27 | [0, 1, 0],
28 | color="blue",
29 | dashes=[12, 5],
30 | linewidth=8,
31 | dash_joinstyle="round",
32 | )
33 | plt.xlim(0, 12), plt.ylim(-1, 2)
34 | plt.xticks([]), plt.yticks([])
35 | plt.savefig("../figures/dash_joinstyle.png", dpi=dpi)
36 |
--------------------------------------------------------------------------------
/tests/outputs/expected/sliced_housing_multiple.py:
--------------------------------------------------------------------------------
1 | import pickle
2 |
3 |
4 | def p_value():
5 | import pandas as pd
6 | from sklearn.ensemble import RandomForestClassifier
7 |
8 | assets = pd.read_csv("ames_train_cleaned.csv")
9 |
10 | def is_new(col):
11 | return col > 1970
12 |
13 | assets["is_new"] = is_new(assets["Year_Built"])
14 | clf = RandomForestClassifier(random_state=0)
15 | y = assets["is_new"]
16 | x = assets[["SalePrice", "Lot_Area", "Garage_Area"]]
17 | clf.fit(x, y)
18 | p = clf.predict([[100 * 1000, 10, 4]])
19 | pickle.dump(p, open("pickle-sample.pkl", "wb"))
20 |
21 |
22 | def y():
23 | import pandas as pd
24 |
25 | assets = pd.read_csv("ames_train_cleaned.csv")
26 |
27 | def is_new(col):
28 | return col > 1970
29 |
30 | assets["is_new"] = is_new(assets["Year_Built"])
31 | y = assets["is_new"]
32 | pickle.dump(y, open("pickle-sample.pkl", "wb"))
33 |
--------------------------------------------------------------------------------
/jupyterlab-workspaces/default-37a8.jupyterlab-workspace:
--------------------------------------------------------------------------------
1 | {"data":{"layout-restorer:data":{"main":{"dock":{"type":"tab-area","currentIndex":0,"widgets":["notebook:examples/Demo_1_Preprocessing.ipynb","notebook:examples/Demo_2_Modeling.ipynb"]},"current":"notebook:examples/Demo_1_Preprocessing.ipynb"},"down":{"size":0,"widgets":[]},"left":{"collapsed":true,"widgets":["filebrowser","running-sessions","@jupyterlab/toc:plugin","extensionmanager.main-view"]},"right":{"collapsed":true,"widgets":["jp-property-inspector","debugger-sidebar"]},"relativeSizes":[0,1,0]},"file-browser-filebrowser:cwd":{"path":"examples"},"cloned-outputs:examples/Preprocessing.ipynb:0":{"data":{"path":"examples/Preprocessing.ipynb","index":0}},"notebook:examples/Demo_1_Preprocessing.ipynb":{"data":{"path":"examples/Demo_1_Preprocessing.ipynb","factory":"Notebook"}},"notebook:examples/Demo_2_Modeling.ipynb":{"data":{"path":"examples/Demo_2_Modeling.ipynb","factory":"Notebook"}}},"metadata":{"id":"default"}}
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | alembic==1.8.0
2 | altair==4.2.0
3 | astor==0.8.1
4 | astpretty==2.1.0
5 | asttokens==2.1.0
6 | black==22.3.0
7 | click==8.1.2
8 | cloudpickle==2.2.0
9 | coveralls==3.3.1
10 | fastparquet==0.8.0
11 | flake8==4.0.1
12 | graphviz==0.19.2
13 | isort==5.10.1
14 | jupyterlab==3.3.3
15 | matplotlib==3.5.1
16 | mypy==0.931
17 | nbconvert==6.5.1
18 | nbformat==5.3.0
19 | nbval==0.9.6
20 | networkx==2.6.3
21 | pandas==1.3.5
22 | pandoc==2.2
23 | pdbpp==0.10.3
24 | pg==0.1
25 | Pillow==9.1.1
26 | pre-commit==2.18.1
27 | psycopg2-binary==2.9.5
28 | pydantic==1.9.0
29 | pytest==6.2.5
30 | pytest-alembic==0.8.2
31 | pytest-cov==3.0.0
32 | pytest-virtualenv==1.7.0
33 | pytest-xdist==2.5.0
34 | requests==2.27.1
35 | rich==12.2.0
36 | scikit-learn==1.0.2
37 | scipy==1.7.3
38 | scour==0.38.2
39 | seaborn==0.11.2
40 | SQLAlchemy==1.4.35
41 | syrupy==1.4.5
42 | types-mock==4.0.15
43 | types-PyYAML==6.0.5
44 | types-requests==2.27.16
45 | typing-extensions==4.4.0
46 |
--------------------------------------------------------------------------------
/tests/end_to_end/__snapshots__/test_op/test_invert.py:
--------------------------------------------------------------------------------
1 | import datetime
2 | from pathlib import *
3 | from lineapy.data.types import *
4 | from lineapy.utils.utils import get_new_id
5 |
6 | source_1 = SourceCode(
7 | code="""a = 1
8 | b=~a""",
9 | location=PosixPath("[source file path]"),
10 | )
11 | call_1 = CallNode(
12 | source_location=SourceLocation(
13 | lineno=2,
14 | col_offset=2,
15 | end_lineno=2,
16 | end_col_offset=4,
17 | source_code=source_1.id,
18 | ),
19 | function_id=LookupNode(
20 | name="invert",
21 | ).id,
22 | positional_args=[
23 | LiteralNode(
24 | source_location=SourceLocation(
25 | lineno=1,
26 | col_offset=4,
27 | end_lineno=1,
28 | end_col_offset=5,
29 | source_code=source_1.id,
30 | ),
31 | value=1,
32 | ).id
33 | ],
34 | )
35 |
--------------------------------------------------------------------------------
/tests/end_to_end/__snapshots__/test_op/test_not.py:
--------------------------------------------------------------------------------
1 | import datetime
2 | from pathlib import *
3 | from lineapy.data.types import *
4 | from lineapy.utils.utils import get_new_id
5 |
6 | source_1 = SourceCode(
7 | code="""a = 1
8 | b=not a""",
9 | location=PosixPath("[source file path]"),
10 | )
11 | call_1 = CallNode(
12 | source_location=SourceLocation(
13 | lineno=2,
14 | col_offset=2,
15 | end_lineno=2,
16 | end_col_offset=7,
17 | source_code=source_1.id,
18 | ),
19 | function_id=LookupNode(
20 | name="not_",
21 | ).id,
22 | positional_args=[
23 | LiteralNode(
24 | source_location=SourceLocation(
25 | lineno=1,
26 | col_offset=4,
27 | end_lineno=1,
28 | end_col_offset=5,
29 | source_code=source_1.id,
30 | ),
31 | value=1,
32 | ).id
33 | ],
34 | )
35 |
--------------------------------------------------------------------------------
/docs/mkdocs/concepts/pipeline.md:
--------------------------------------------------------------------------------
1 | # Pipeline
2 |
3 | In the context of data science, a pipeline refers to a series of steps that transform
4 | data into useful information/product. For instance, a common end-to-end machine learning
5 | pipeline includes data preprocessing, model training, and model evaluation steps. These
6 | pipelines are often developed one component at a time. Once the individual components are
7 | developed, they are connected to form an end-to-end pipeline.
8 |
9 | In LineaPy, each component is represented as an artifact, and LineaPy provides APIs to create
10 | pipelines from a group of artifacts. These pipelines can then be run through specific orchestration
11 | engines to handle new data.
12 |
13 | Note that the pipelines created by LineaPy are meant to be reviewed and accepted by developers
14 | before they go into production, and we provide mechanisms to verify the generated pipelines in
15 | the development environment for validation.
16 |
--------------------------------------------------------------------------------
/tests/outputs/expected/sliced_housing_multiple_w_dependencies.py:
--------------------------------------------------------------------------------
1 | import pickle
2 |
3 |
4 | def p_value():
5 | import pandas as pd
6 | from sklearn.ensemble import RandomForestClassifier
7 |
8 | assets = pd.read_csv("ames_train_cleaned.csv")
9 |
10 | def is_new(col):
11 | return col > 1970
12 |
13 | assets["is_new"] = is_new(assets["Year_Built"])
14 | clf = RandomForestClassifier(random_state=0)
15 | y = assets["is_new"]
16 | x = assets[["SalePrice", "Lot_Area", "Garage_Area"]]
17 | clf.fit(x, y)
18 | p = clf.predict([[100 * 1000, 10, 4]])
19 | pickle.dump(p, open("pickle-sample.pkl", "wb"))
20 |
21 |
22 | def y():
23 | import pandas as pd
24 |
25 | assets = pd.read_csv("ames_train_cleaned.csv")
26 |
27 | def is_new(col):
28 | return col > 1970
29 |
30 | assets["is_new"] = is_new(assets["Year_Built"])
31 | y = assets["is_new"]
32 | pickle.dump(y, open("pickle-sample.pkl", "wb"))
33 |
--------------------------------------------------------------------------------
/tests/end_to_end/__snapshots__/test_var_aliasing/test_variable_alias.py:
--------------------------------------------------------------------------------
1 | import datetime
2 | from pathlib import *
3 | from lineapy.data.types import *
4 | from lineapy.utils.utils import get_new_id
5 |
6 | source_1 = SourceCode(
7 | code="""a = 1.2
8 | b = a
9 | """,
10 | location=PosixPath("[source file path]"),
11 | )
12 | call_1 = CallNode(
13 | source_location=SourceLocation(
14 | lineno=2,
15 | col_offset=0,
16 | end_lineno=2,
17 | end_col_offset=5,
18 | source_code=source_1.id,
19 | ),
20 | function_id=LookupNode(
21 | name="l_alias",
22 | ).id,
23 | positional_args=[
24 | LiteralNode(
25 | source_location=SourceLocation(
26 | lineno=1,
27 | col_offset=4,
28 | end_lineno=1,
29 | end_col_offset=7,
30 | source_code=source_1.id,
31 | ),
32 | value=1.2,
33 | ).id
34 | ],
35 | )
36 |
--------------------------------------------------------------------------------
/tests/end_to_end/test_delete.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | """
4 | Test the three parts of #95, to cover the Delete AST node
5 |
6 | https://docs.python.org/3/library/ast.html#ast.Delete
7 | """
8 |
9 |
10 | @pytest.mark.xfail(reason="dont support deleting a variable")
11 | def test_del_var(execute):
12 |
13 | res = execute("a = 1; del a")
14 | assert "a" not in res.values
15 |
16 |
17 | def test_del_subscript(execute):
18 | """
19 | Part of #95
20 | """
21 | res = execute("a = [1]; del a[0]")
22 | assert res.values["a"] == []
23 |
24 |
25 | def test_set_attr(execute):
26 | res = execute("import types; x = types.SimpleNamespace(); x.hi = 1")
27 | assert res.values["x"].hi == 1
28 |
29 |
30 | def test_del_attribute(execute):
31 | """
32 | Part of #95
33 | """
34 | res = execute(
35 | "import types; x = types.SimpleNamespace(); x.hi = 1; del x.hi",
36 | )
37 | x = res.values["x"]
38 | assert not hasattr(x, "hi")
39 |
--------------------------------------------------------------------------------
/tests/integration/slices/pandas_stats.py:
--------------------------------------------------------------------------------
1 | # This is the manual slice of:
2 | # weekly
3 | # from file:
4 | # sources/pandas_exercises/06_Stats/Wind_Stats/Exercises_with_solutions.ipynb
5 |
6 | # To verify that linea produces the same slice, run:
7 | # pytest -m integration --runxfail -vv 'tests/integration/test_slice.py::test_slice[pandas_stats]'
8 |
9 | import datetime
10 |
11 | import pandas as pd
12 |
13 | data_url = "https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/06_Stats/Wind_Stats/wind.data"
14 | data = pd.read_csv(data_url, sep="\\s+", parse_dates=[[0, 1, 2]])
15 |
16 |
17 | def fix_century(x):
18 | year = x.year - 100 if x.year > 1989 else x.year
19 | return datetime.date(year, x.month, x.day)
20 |
21 |
22 | data["Yr_Mo_Dy"] = data["Yr_Mo_Dy"].apply(fix_century)
23 | data["Yr_Mo_Dy"] = pd.to_datetime(data["Yr_Mo_Dy"])
24 | data = data.set_index("Yr_Mo_Dy")
25 | weekly = data.resample("W").agg(["min", "max", "mean", "std"])
26 | linea_artifact_value = weekly
27 |
--------------------------------------------------------------------------------
/lineapy/_alembic/versions/41a413504720_add_named_var.py:
--------------------------------------------------------------------------------
1 | """add named var
2 |
3 | Revision ID: 41a413504720
4 | Revises: 38d5f834d3b7
5 | Create Date: 2022-07-06 14:14:42.354458
6 |
7 | """
8 | from alembic import op
9 | import sqlalchemy as sa
10 |
11 |
12 | # revision identifiers, used by Alembic.
13 | revision = "41a413504720"
14 | down_revision = "38d5f834d3b7"
15 | branch_labels = None
16 | depends_on = None
17 |
18 |
19 | def upgrade() -> None:
20 | # ### commands auto generated by Alembic - please adjust! ###
21 | op.create_table(
22 | "assigned_variable_node",
23 | sa.Column("id", sa.String(), nullable=False),
24 | sa.Column("variable_name", sa.String(), nullable=False),
25 | sa.PrimaryKeyConstraint("id", "variable_name"),
26 | )
27 | # ### end Alembic commands ###
28 |
29 |
30 | def downgrade() -> None:
31 | # ### commands auto generated by Alembic - please adjust! ###
32 | op.drop_table("assigned_variable_node")
33 | # ### end Alembic commands ###
34 |
--------------------------------------------------------------------------------
/tests/integration/__snapshots__/test_slice/test_slice[sklearn_compose_plot_feature_union].py:
--------------------------------------------------------------------------------
1 | from sklearn.datasets import load_iris
2 | from sklearn.decomposition import PCA
3 | from sklearn.feature_selection import SelectKBest
4 | from sklearn.model_selection import GridSearchCV
5 | from sklearn.pipeline import FeatureUnion, Pipeline
6 | from sklearn.svm import SVC
7 |
8 | iris = load_iris()
9 | X, y = iris.data, iris.target
10 | pca = PCA(n_components=2)
11 | selection = SelectKBest(k=1)
12 | combined_features = FeatureUnion([("pca", pca), ("univ_select", selection)])
13 | X_features = combined_features.fit(X, y).transform(X)
14 | svm = SVC(kernel="linear")
15 | pipeline = Pipeline([("features", combined_features), ("svm", svm)])
16 | param_grid = dict(
17 | features__pca__n_components=[1, 2, 3],
18 | features__univ_select__k=[1, 2],
19 | svm__C=[0.1, 1, 10],
20 | )
21 | grid_search = GridSearchCV(pipeline, param_grid=param_grid, verbose=10)
22 | grid_search.fit(X, y)
23 | linea_artifact_value = grid_search
24 |
--------------------------------------------------------------------------------
/tests/end_to_end/test_dask.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from lineapy.utils.utils import prettify
4 |
5 | dask = pytest.importorskip("dask")
6 |
7 |
8 | def test_dask_read_csv(execute):
9 | code = """import dask.dataframe as dd
10 | df = dd.read_csv('tests/simple_data.csv')
11 | """
12 | res = execute(code, artifacts=["df"])
13 | assert res.values["df"]["a"].sum().compute() == 25
14 |
15 |
16 | def test_dask_to_csv(execute):
17 | code = """import dask.dataframe as dd
18 | df = dd.read_csv('tests/simple_data.csv')
19 | df.to_csv('tests/simple_data_dask.csv')
20 | """
21 | res = execute(code, artifacts=["lineapy.file_system"])
22 | assert res.artifacts["lineapy.file_system"] == prettify(code)
23 |
24 |
25 | def test_dask_pop(execute):
26 | code = """import dask.dataframe as dd
27 | df = dd.read_csv('tests/simple_data.csv')
28 | df.pop('a')
29 | """
30 | res = execute(code, artifacts=["df"])
31 | assert res.values["df"].columns == ["b"]
32 | assert res.artifacts["df"] == prettify(code)
33 |
--------------------------------------------------------------------------------
/tests/integration/slices/sklearn_multioutput_plot_classifier_chain_yeast.py:
--------------------------------------------------------------------------------
1 | # This is the manual slice of:
2 | # chains
3 | # from file:
4 | # sources/scikit-learn/examples/multioutput/plot_classifier_chain_yeast.py
5 |
6 | # To verify that linea produces the same slice, run:
7 | # pytest -m integration --runxfail -vv 'tests/integration/test_slice.py::test_slice[sklearn_multioutput_plot_classifier_chain_yeast]'
8 |
9 | from sklearn.datasets import fetch_openml
10 | from sklearn.linear_model import LogisticRegression
11 | from sklearn.model_selection import train_test_split
12 | from sklearn.multioutput import ClassifierChain
13 |
14 | X, Y = fetch_openml("yeast", version=4, return_X_y=True)
15 | Y = Y == "TRUE"
16 | X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0)
17 | base_lr = LogisticRegression()
18 | chains = [ClassifierChain(base_lr, order="random", random_state=i) for i in range(10)]
19 | for chain in chains:
20 | chain.fit(X_train, Y_train)
21 | linea_artifact_value = chains
22 |
--------------------------------------------------------------------------------
/lineapy/transformer/source_giver.py:
--------------------------------------------------------------------------------
1 | import ast
2 |
3 |
4 | class SourceGiver:
5 | def transform(self, nodes: ast.Module) -> None:
6 | """
7 | This call should only happen once asttoken has run its magic
8 | and embellished the ast with tokens and line numbers.
9 | At that point, all this function will do is use those tokens to
10 | figure out end_lineno and end_col_offset for every node in the tree
11 | """
12 | node: ast.AST
13 | # TODO check if the ast type is a Module instead of simply relying on mypy
14 | for node in ast.walk(nodes):
15 | if not hasattr(node, "lineno"):
16 | continue
17 |
18 | if hasattr(node, "last_token"):
19 | node.end_lineno = node.last_token.end[0] # type: ignore
20 | node.end_col_offset = node.last_token.end[1] # type: ignore
21 | # if isinstance(node, ast.ListComp):
22 | node.col_offset = node.first_token.start[1] # type: ignore
23 |
--------------------------------------------------------------------------------
/tests/integration/slices/sklearn_model_selection_plot_randomized_search.py:
--------------------------------------------------------------------------------
1 | # This is the manual slice of:
2 | # grid_search
3 | # from file:
4 | # sources/scikit-learn/examples/model_selection/plot_randomized_search.py
5 |
6 | # To verify that linea produces the same slice, run:
7 | # pytest -m integration --runxfail -vv 'tests/integration/test_slice.py::test_slice[sklearn_model_selection_plot_randomized_search]'
8 |
9 | import numpy as np
10 | from sklearn.datasets import load_digits
11 | from sklearn.linear_model import SGDClassifier
12 | from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
13 |
14 | X, y = load_digits(return_X_y=True, n_class=3)
15 | clf = SGDClassifier(loss="hinge", penalty="elasticnet", fit_intercept=True)
16 | param_grid = {
17 | "average": [True, False],
18 | "l1_ratio": np.linspace(0, 1, num=10),
19 | "alpha": np.power(10, np.arange(-2, 1, dtype=float)),
20 | }
21 | grid_search = GridSearchCV(clf, param_grid=param_grid)
22 | grid_search.fit(X, y)
23 | linea_artifact_value = grid_search
24 |
--------------------------------------------------------------------------------
/tests/integration/slices/xgboost_sklearn_examples.py:
--------------------------------------------------------------------------------
1 | # This is the manual slice of:
2 | # lineapy.file_system
3 | # from file:
4 | # sources/xgboost/demo/guide-python/sklearn_examples.py
5 |
6 | # To verify that linea produces the same slice, run:
7 | # pytest -m integration --runxfail -vv 'tests/integration/test_slice.py::test_slice[xgboost_sklearn_examples]'
8 |
9 | """
10 | Collection of examples for using sklearn interface
11 | ==================================================
12 |
13 | Created on 1 Apr 2015
14 |
15 | @author: Jamie Hall
16 | """
17 | import pickle
18 | import xgboost as xgb
19 | from sklearn.model_selection import GridSearchCV
20 | from sklearn.datasets import fetch_california_housing
21 |
22 | X, y = fetch_california_housing(return_X_y=True)
23 | xgb_model = xgb.XGBRegressor(n_jobs=1)
24 | clf = GridSearchCV(
25 | xgb_model,
26 | {"max_depth": [2, 4, 6], "n_estimators": [50, 100, 200]},
27 | verbose=1,
28 | n_jobs=1,
29 | )
30 | clf.fit(X, y)
31 | pickle.dump(clf, open("best_calif.pkl", "wb"))
32 |
--------------------------------------------------------------------------------
/tests/unit/utils/test_config.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | from fsspec.core import url_to_fs
4 | from fsspec.implementations.local import LocalFileSystem
5 |
6 | from lineapy.utils.config import options
7 |
8 |
9 | def test_artifact_storage_dir_type():
10 | """
11 | Making sure the path we are setting is correct typing, so pandas.io.common.get_handler can process it correctly.
12 | """
13 | old_artifact_storage_dir = options.safe_get("artifact_storage_dir")
14 | options.set(
15 | "artifact_storage_dir",
16 | "/tmp/somelineapytestprefix/",
17 | )
18 | assert isinstance(
19 | url_to_fs(str(options.safe_get("artifact_storage_dir")))[0],
20 | LocalFileSystem,
21 | )
22 |
23 | options.set(
24 | "artifact_storage_dir",
25 | Path("~").expanduser().resolve(),
26 | )
27 | assert isinstance(
28 | url_to_fs(str(options.safe_get("artifact_storage_dir")))[0],
29 | LocalFileSystem,
30 | )
31 |
32 | options.set("artifact_storage_dir", old_artifact_storage_dir)
33 |
--------------------------------------------------------------------------------
/tests/unit/db/test_db_utils.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from lineapy.db.utils import parse_artifact_version
4 |
5 |
6 | def test_parse_artifact_version():
7 | cases = (
8 | (-1, False, None),
9 | (-102, False, None),
10 | (1, True, 1),
11 | (0, True, 0),
12 | (2, True, 2),
13 | (3, True, 3),
14 | (4, True, 4),
15 | (5, True, 5),
16 | (0.3, True, 0),
17 | (3.0, True, 3),
18 | (1.0, True, 1),
19 | ("all", True, "all"),
20 | ("latest", True, "latest"),
21 | ("al", False, None),
22 | ("lattest", False, None),
23 | ("1", True, 1),
24 | ("3", True, 3),
25 | ("5", True, 5),
26 | ("0.3", True, 0),
27 | ("1.1", True, 1),
28 | )
29 | for version, is_valid, expected in cases:
30 | if is_valid:
31 | assert parse_artifact_version(version) == expected
32 | else:
33 | print(version)
34 | with pytest.raises(ValueError):
35 | parse_artifact_version(version)
36 |
--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
1 | name: Publish
2 |
3 | on:
4 | push:
5 | branches:
6 | - "v[0-9]+.[0-9]+.x"
7 | tags:
8 | - "v[0-9]+.[0-9]+.[0-9]+"
9 |
10 | jobs:
11 | build-n-publish:
12 | runs-on: ubuntu-latest
13 | steps:
14 | - uses: actions/checkout@v2
15 | with:
16 | lfs: true
17 | - name: Set up Python 3.9
18 | uses: actions/setup-python@v2
19 | with:
20 | python-version: 3.9
21 | - name: Install dependencies
22 | run: |
23 | python setup.py install && rm -rf build dist
24 | - name: Build Wheels
25 | run: |
26 | pip install wheel && python setup.py sdist bdist_wheel
27 | - name: Check build
28 | run: |
29 | pip install twine && twine check dist/*
30 | - name: Publish package
31 | if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags')
32 | uses: pypa/gh-action-pypi-publish@release/v1
33 | with:
34 | user: __token__
35 | password: ${{ secrets.PYPI_API_TOKEN }}
--------------------------------------------------------------------------------
/tests/integration/slices/pandas_merge.py:
--------------------------------------------------------------------------------
1 | # This is the manual slice of:
2 | # all_data_col
3 | # from file:
4 | # sources/pandas_exercises/05_Merge/Fictitous Names/Exercises_with_solutions.ipynb
5 |
6 | # To verify that linea produces the same slice, run:
7 | # pytest -m integration --runxfail -vv 'tests/integration/test_slice.py::test_slice[pandas_merge]'
8 |
9 | import pandas as pd
10 |
11 | raw_data_1 = {
12 | "subject_id": ["1", "2", "3", "4", "5"],
13 | "first_name": ["Alex", "Amy", "Allen", "Alice", "Ayoung"],
14 | "last_name": ["Anderson", "Ackerman", "Ali", "Aoni", "Atiches"],
15 | }
16 | raw_data_2 = {
17 | "subject_id": ["4", "5", "6", "7", "8"],
18 | "first_name": ["Billy", "Brian", "Bran", "Bryce", "Betty"],
19 | "last_name": ["Bonder", "Black", "Balwner", "Brice", "Btisan"],
20 | }
21 | data1 = pd.DataFrame(raw_data_1, columns=["subject_id", "first_name", "last_name"])
22 | data2 = pd.DataFrame(raw_data_2, columns=["subject_id", "first_name", "last_name"])
23 | all_data_col = pd.concat([data1, data2], axis=1)
24 | linea_artifact_value = all_data_col
25 |
--------------------------------------------------------------------------------
/lineapy/utils/validate_annotation_spec.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | """
3 | Validate the annotations.yaml files in the instrumentation directory.
4 | """
5 | import json # for pretty printing dicts
6 | from pathlib import Path
7 | from typing import Any, List
8 |
9 | import pydantic
10 | import yaml
11 |
12 | from lineapy.instrumentation.annotation_spec import ModuleAnnotation
13 |
14 |
15 | def validate_spec(spec_file: Path) -> List[Any]:
16 | """
17 | Validate all '.annotations.yaml' files at path
18 | and return all invalid items.
19 |
20 | Throws yaml.YAMLError
21 | """
22 | invalid_specs: List[Any] = []
23 | with open(spec_file, "r") as f:
24 | doc = yaml.safe_load(f)
25 |
26 | for item in doc:
27 | print(
28 | "Module specification: {}\n".format(json.dumps(item, indent=4))
29 | )
30 |
31 | try:
32 | a = ModuleAnnotation(**item)
33 | except pydantic.error_wrappers.ValidationError as e:
34 | invalid_specs.append(item)
35 | return invalid_specs
36 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | # syntax=docker/dockerfile:1.2
2 | # Pin syntax as Docker reccomens
3 | # https://docs.docker.com/language/python/build-images/#create-a-dockerfile-for-python
4 | FROM python:3.9-slim
5 |
6 | RUN apt-get update && apt-get -y install git graphviz make libpq-dev gcc && \
7 | curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | bash \
8 | && apt-get install git-lfs && git lfs install && apt clean && apt-get autoclean && apt-get autoremove
9 |
10 | WORKDIR /usr/src/base
11 |
12 | # small hack to not keep building all the time
13 | COPY ./setup.py ./
14 | COPY ./README.md ./
15 | COPY ./lineapy/__init__.py ./lineapy/
16 | COPY ./requirements.txt ./
17 | COPY ./test_pipeline_airflow_req.txt ./
18 | COPY ./Makefile ./
19 |
20 | ENV AIRFLOW_HOME=/usr/src/airflow_home
21 | ENV AIRFLOW_VENV=/usr/src/airflow_venv
22 |
23 | #RUN mkdir /usr/src/airflow_home
24 | RUN pip --disable-pip-version-check install -r requirements.txt && make airflow_venv && pip cache purge
25 |
26 | COPY . .
27 |
28 | RUN python setup.py install && rm -rf build
29 |
30 | CMD [ "lineapy" ]
31 |
--------------------------------------------------------------------------------
/lineapy/plugins/jinja_templates/ray/ray_dag_base.jinja:
--------------------------------------------------------------------------------
1 | import {{ MODULE_NAME }}
2 | import ray
3 | import pickle
4 | import pathlib
5 |
6 | ray.init(runtime_env = {{RAY_RUNTIME_ENV}}, storage = "{{RAY_STORAGE}}")
7 |
8 | {% for task_def in task_definitions %}
9 | {{ task_def }}
10 | {% endfor %}
11 |
12 | # Specify argument values for your pipeline run.
13 | pipeline_arguments = {{ dag_params }}
14 |
15 | {% for task_name, task_def in tasks.items() %}
16 | {%- if task_def.return_vars|length > 0 %}
17 | {%- for var in task_def.return_vars %}{{ var }}{{ ',' if not loop.last else '' }}{%- endfor %}
18 | {%- else %}
19 | _
20 | {%- endif %} = task_{{task_name}}.{%- block bind_or_remote %}{% endblock %}(
21 | {%- for var in task_def.user_input_variables %}pipeline_arguments["{{ var }}"]{{ ',' if not loop.last else '' }}{%- endfor %}{%- if task_def.loaded_input_variables|length > 0 and task_def.user_input_variables|length > 0 %},{%- endif %}{%- for var in task_def.loaded_input_variables %} {{var}} {{ ',' if not loop.last else '' }}{%- endfor %}
22 | )
23 | {% endfor %}
24 |
25 | {%- block ray_dag_execution %}{% endblock %}
--------------------------------------------------------------------------------
/examples/self-hosting-lineapy/lineapy-notebook/notebook-start.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Copyright (c) Jupyter Development Team.
3 | # Distributed under the terms of the Modified BSD License.
4 | # LineaPy extensions (c) Linea Labs
5 |
6 | set -e
7 |
8 | # The Jupyter command to launch
9 | # JupyterLab by default
10 | DOCKER_STACKS_JUPYTER_CMD="${DOCKER_STACKS_JUPYTER_CMD:=lab}"
11 |
12 | if [[ -n "${JUPYTERHUB_API_TOKEN}" ]]; then
13 | echo "WARNING: using start-singleuser.sh instead of start-notebook.sh to start a server associated with JupyterHub."
14 | exec /usr/local/bin/start-singleuser.sh "$@"
15 | fi
16 |
17 | wrapper=""
18 | if [[ "${RESTARTABLE}" == "yes" ]]; then
19 | wrapper="run-one-constantly"
20 | fi
21 |
22 | if [[ -f /requirements.txt ]]
23 | then
24 | echo "Installing system requirements."
25 | pip3 install -r /requirements.txt
26 | fi
27 |
28 | # Verify lineapy environment is set up correctly
29 | lineapy python /verify_environment.py
30 |
31 | # shellcheck disable=SC1091,SC2086
32 | exec /usr/local/bin/start.sh ${wrapper} lineapy jupyter ${DOCKER_STACKS_JUPYTER_CMD} "--NotebookApp.token=''"
33 |
--------------------------------------------------------------------------------
/PERFORMANCE.md:
--------------------------------------------------------------------------------
1 | # Performance Profiling
2 |
3 | We have had luck using the [py-spy](https://github.com/benfred/py-spy) tool,
4 | which runs your Python script in a separate process and samples it, to
5 | profile our tests to get a rough sense of how long things take:
6 |
7 | ```bash
8 | # Run with sudo so it can inspect the subprocess
9 | sudo py-spy record \
10 | # Save as speedscope so we can load in the browser
11 | --format speedscope \
12 | # Group by function name, instead of line number
13 | --function \
14 | # Increase the sampling rate from 100 to 200 times per second
15 | -r 200 -- pytest tests/
16 | ```
17 |
18 | After creating your trace, you can load it [in
19 | Speedscope](https://www.speedscope.app/).
20 |
21 | In this example, we are inspecting calls to `transform`.
22 | We see that it cumulatively takes up 12% of total time and that most of the
23 | time inside of it is spent visiting imports, as well as committing to the DB:
24 |
25 |
26 |
--------------------------------------------------------------------------------
/tests/unit/db/test_literal_node.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 |
4 | @pytest.mark.parametrize(
5 | "literal_value",
6 | [
7 | # Need to escape quotes since formatted string will strip these
8 | # and test this as an integer otherwise.
9 | pytest.param("'10'", id="String"),
10 | pytest.param(False, id="Boolean"),
11 | pytest.param(10, id="Int"),
12 | pytest.param(10.0, id="Float"),
13 | pytest.param(None, id="None"),
14 | pytest.param(b"10", id="Bytes"),
15 | ],
16 | )
17 | def test_literal_node_value(execute, literal_value):
18 | """
19 | Test that the literal node is serialized and deserialized correctly
20 | to the DB for supported types.
21 |
22 | TODO: Add test case for ellipses.
23 | """
24 | code = f"""import lineapy
25 | val={literal_value}
26 | art = lineapy.save(val, "val")
27 | """
28 | res = execute(
29 | code,
30 | snapshot=False,
31 | )
32 |
33 | art = res.values["art"]
34 | art_val = art.db.get_node_by_id(art.node_id).value
35 | expected_val = res.values["val"]
36 | assert art_val == expected_val
37 |
--------------------------------------------------------------------------------
/tests/integration/slices/sklearn_tree_plot_cost_complexity_pruning.py:
--------------------------------------------------------------------------------
1 | # This is the manual slice of:
2 | # depth
3 | # from file:
4 | # sources/scikit-learn/examples/tree/plot_cost_complexity_pruning.py
5 |
6 | # To verify that linea produces the same slice, run:
7 | # pytest -m integration --runxfail -vv 'tests/integration/test_slice.py::test_slice[sklearn_tree_plot_cost_complexity_pruning]'
8 |
9 | from sklearn.datasets import load_breast_cancer
10 | from sklearn.model_selection import train_test_split
11 | from sklearn.tree import DecisionTreeClassifier
12 |
13 | X, y = load_breast_cancer(return_X_y=True)
14 | X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
15 | clf = DecisionTreeClassifier(random_state=0)
16 | path = clf.cost_complexity_pruning_path(X_train, y_train)
17 | ccp_alphas, impurities = path.ccp_alphas, path.impurities
18 | clfs = []
19 | for ccp_alpha in ccp_alphas:
20 | clf = DecisionTreeClassifier(random_state=0, ccp_alpha=ccp_alpha)
21 | clf.fit(X_train, y_train)
22 | clfs.append(clf)
23 | clfs = clfs[:-1]
24 | depth = [clf.tree_.max_depth for clf in clfs]
25 | linea_artifact_value = depth
26 |
--------------------------------------------------------------------------------
/tests/end_to_end/__snapshots__/test_misc/TestEndToEnd.test_simple.py:
--------------------------------------------------------------------------------
1 | import datetime
2 | from pathlib import *
3 | from lineapy.data.types import *
4 | from lineapy.utils.utils import get_new_id
5 |
6 | source_1 = SourceCode(
7 | code="a = abs(11)",
8 | location=PosixPath("[source file path]"),
9 | )
10 | call_1 = CallNode(
11 | source_location=SourceLocation(
12 | lineno=1,
13 | col_offset=4,
14 | end_lineno=1,
15 | end_col_offset=11,
16 | source_code=source_1.id,
17 | ),
18 | function_id=LookupNode(
19 | source_location=SourceLocation(
20 | lineno=1,
21 | col_offset=4,
22 | end_lineno=1,
23 | end_col_offset=7,
24 | source_code=source_1.id,
25 | ),
26 | name="abs",
27 | ).id,
28 | positional_args=[
29 | LiteralNode(
30 | source_location=SourceLocation(
31 | lineno=1,
32 | col_offset=8,
33 | end_lineno=1,
34 | end_col_offset=10,
35 | source_code=source_1.id,
36 | ),
37 | value=11,
38 | ).id
39 | ],
40 | )
41 |
--------------------------------------------------------------------------------
/tests/integration/slices/sklearn_preprocessing_plot_scaling_importance.py:
--------------------------------------------------------------------------------
1 | # This is the manual slice of:
2 | # (unscaled_clf, std_clf)
3 | # from file:
4 | # sources/scikit-learn/examples/preprocessing/plot_scaling_importance.py
5 |
6 | # To verify that linea produces the same slice, run:
7 | # pytest -m integration --runxfail -vv 'tests/integration/test_slice.py::test_slice[sklearn_preprocessing_plot_scaling_importance]'
8 |
9 | from sklearn.datasets import load_wine
10 | from sklearn.decomposition import PCA
11 | from sklearn.model_selection import train_test_split
12 | from sklearn.naive_bayes import GaussianNB
13 | from sklearn.pipeline import make_pipeline
14 | from sklearn.preprocessing import StandardScaler
15 |
16 | RANDOM_STATE = 42
17 | features, target = load_wine(return_X_y=True)
18 | X_train, X_test, y_train, y_test = train_test_split(
19 | features, target, test_size=0.3, random_state=RANDOM_STATE
20 | )
21 | unscaled_clf = make_pipeline(PCA(n_components=2), GaussianNB())
22 | unscaled_clf.fit(X_train, y_train)
23 | std_clf = make_pipeline(StandardScaler(), PCA(n_components=2), GaussianNB())
24 | std_clf.fit(X_train, y_train)
25 | linea_artifact_value = unscaled_clf, std_clf
26 |
--------------------------------------------------------------------------------
/tests/unit/migration/test_migrations.py:
--------------------------------------------------------------------------------
1 | from sqlalchemy import text
2 |
3 |
4 | def test_38d5f834d3b7_orig(alembic_engine, alembic_runner):
5 | alembic_runner.migrate_up_to("38d5f834d3b7")
6 |
7 | with alembic_engine.connect() as conn:
8 | assert conn.execute(
9 | text("SELECT name FROM sqlite_master WHERE type='table'")
10 | ).fetchall() == [
11 | ("alembic_version",),
12 | ("execution",),
13 | ("source_code",),
14 | ("node",),
15 | ("session_context",),
16 | ("artifact",),
17 | ("call_node",),
18 | ("global_node",),
19 | ("import_node",),
20 | ("literal_assign_node",),
21 | ("lookup",),
22 | ("mutate_node",),
23 | ("node_value",),
24 | ("global_reference",),
25 | ("implicit_dependency",),
26 | ("keyword_arg",),
27 | ("positional_arg",),
28 | ]
29 |
30 | assert conn.execute(
31 | text(
32 | "SELECT 1 FROM PRAGMA_TABLE_INFO('session_context') WHERE name='python_version';"
33 | )
34 | ).fetchall() == [(1,)]
35 |
--------------------------------------------------------------------------------
/docs/gen_ref_pages.py:
--------------------------------------------------------------------------------
1 | """
2 | Generate the code reference pages.
3 |
4 | Adapted from https://mkdocstrings.github.io/recipes/
5 | """
6 |
7 | from pathlib import Path
8 |
9 | import mkdocs_gen_files
10 |
11 | nav = mkdocs_gen_files.Nav()
12 |
13 | SKIP_DIRS = ["_alembic"]
14 |
15 | for path in sorted(Path("..", "lineapy").rglob("*.py")):
16 | if not set(path.parts).isdisjoint(SKIP_DIRS):
17 | continue
18 |
19 | module_path = path.relative_to("..").with_suffix("")
20 | doc_path = path.relative_to("..").with_suffix(".md")
21 | full_doc_path = Path("reference", doc_path)
22 |
23 | parts = list(module_path.parts)
24 |
25 | if parts[-1] == "__init__":
26 | parts = parts[:-1]
27 | doc_path = doc_path.with_name("index.md")
28 | full_doc_path = full_doc_path.with_name("index.md")
29 | elif parts[-1] == "__main__":
30 | continue
31 |
32 | nav[parts] = doc_path.as_posix()
33 |
34 | with mkdocs_gen_files.open(full_doc_path, "w") as fd:
35 | identifier = ".".join(parts)
36 | print("::: " + identifier, file=fd)
37 |
38 | with mkdocs_gen_files.open("reference/nav.md", "w") as nav_file:
39 | nav_file.writelines(nav.build_literate_nav())
40 |
--------------------------------------------------------------------------------
/lineapy/plugins/jinja_templates/module/module.jinja:
--------------------------------------------------------------------------------
1 | {% if default_input_parameters|length>0 %}
2 | import argparse
3 | {% endif %}
4 |
5 | {{module_imports}}
6 |
7 | {{artifact_functions}}
8 |
9 | {{session_functions}}
10 |
11 | def run_all_sessions({%- for input_parameter in default_input_parameters %}
12 | {{indentation_block}}{{input_parameter}},
13 | {%- endfor -%}):
14 | {{indentation_block}}artifacts = dict()
15 | {{module_function_body | indent(4, True)}}
16 | {{indentation_block}}return artifacts
17 |
18 | if __name__ == "__main__":
19 | {{indentation_block}}# Edit this section to customize the behavior of artifacts
20 | {% if default_input_parameters|length>0 -%}
21 | {{indentation_block}}parser = argparse.ArgumentParser()
22 | {% for parser_block in parser_blocks -%}
23 | {{indentation_block}}{{parser_block}}
24 | {% endfor -%}
25 | {{indentation_block}}args = parser.parse_args()
26 | {{indentation_block}}artifacts = run_all_sessions({%- for parser_input_parameter in parser_input_parameters %}
27 | {{indentation_block}}{{indentation_block}}{{parser_input_parameter}},
28 | {%- endfor -%})
29 | {% else -%}
30 | {{indentation_block}}artifacts = run_all_sessions()
31 | {% endif -%}
32 | {{indentation_block}}print(artifacts)
33 |
--------------------------------------------------------------------------------
/examples/self-hosting-lineapy/lineapy-notebook/Dockerfile:
--------------------------------------------------------------------------------
1 | ARG OWNER=jupyter
2 | # If you need ARM set ARCH to "aarch64-"
3 | ARG ARCH
4 | ARG BASE_CONTAINER=$OWNER/minimal-notebook:${ARCH}latest
5 | FROM $BASE_CONTAINER
6 |
7 | LABEL maintainer="LineaPy Project "
8 |
9 | SHELL ["/bin/bash", "-o", "pipefail", "-c"]
10 |
11 | USER root
12 |
13 | # curl required for health check
14 | RUN apt update -y
15 | RUN apt install -y curl
16 |
17 | # switch back to notebook user so permissions on files are correct
18 | USER $NB_UID
19 |
20 | # prevent pip timing out on slow internet connections
21 | RUN export PIP_DEFAULT_TIMEOUT=1000
22 |
23 | # deps for lineapy
24 | RUN pip install -U pip
25 | RUN pip install fsspec s3fs psycopg2-binary
26 |
27 | # install lineapy
28 | RUN pip install lineapy==0.2.1
29 |
30 | # other nice to have libraries
31 | RUN pip install pandas==1.4.3 scikit-learn==1.1.2 seaborn==0.11.2
32 |
33 | COPY notebook-start.sh /usr/local/bin/notebook-start.sh
34 |
35 | # Make mountpoints for airflow so that they are mounted as non-root
36 | RUN mkdir -p /home/jovyan/work/airflow/dags
37 | RUN mkdir -p /home/jovyan/work/airflow/plugins
38 |
39 | ENTRYPOINT ["tini", "-g", "--"]
40 | CMD ["notebook-start.sh"]
41 |
--------------------------------------------------------------------------------
/lineapy/utils/migration.py:
--------------------------------------------------------------------------------
1 | # Code based on https://improveandrepeat.com/2021/09/python-friday-87-handling-pre-existing-tables-with-alembic-and-sqlalchemy/
2 | # Code based on https://github.com/talkpython/data-driven-web-apps-with-flask
3 |
4 | from alembic import op
5 | from sqlalchemy import engine_from_config, inspect
6 |
7 | from lineapy.utils.config import options
8 |
9 |
10 | def table_exists(table, schema=None):
11 | engine = engine_from_config(
12 | {"sqlalchemy.url": options.database_url}, prefix="sqlalchemy."
13 | )
14 | insp = inspect(engine)
15 | return insp.has_table(table, schema)
16 |
17 |
18 | def ensure_table(name, *args, **kwargs):
19 | if not table_exists(name):
20 | op.create_table(name, *args, **kwargs)
21 |
22 |
23 | def table_has_column(table, column):
24 | engine = engine_from_config(
25 | {"sqlalchemy.url": options.database_url}, prefix="sqlalchemy."
26 | )
27 | insp = inspect(engine)
28 | return any([column == col["name"] for col in insp.get_columns(table)])
29 |
30 |
31 | def ensure_column(table_name, column, *args, **kwargs):
32 | if not table_has_column(table_name, column.name):
33 | op.add_column(table_name, column, *args, **kwargs)
34 |
--------------------------------------------------------------------------------
/tests/end_to_end/__snapshots__/test_var_aliasing/test_alias_by_value.py:
--------------------------------------------------------------------------------
1 | import datetime
2 | from pathlib import *
3 | from lineapy.data.types import *
4 | from lineapy.utils.utils import get_new_id
5 |
6 | source_1 = SourceCode(
7 | code="""a = 0
8 | b = a
9 | a = 2
10 | """,
11 | location=PosixPath("[source file path]"),
12 | )
13 | call_1 = CallNode(
14 | source_location=SourceLocation(
15 | lineno=2,
16 | col_offset=0,
17 | end_lineno=2,
18 | end_col_offset=5,
19 | source_code=source_1.id,
20 | ),
21 | function_id=LookupNode(
22 | name="l_alias",
23 | ).id,
24 | positional_args=[
25 | LiteralNode(
26 | source_location=SourceLocation(
27 | lineno=1,
28 | col_offset=4,
29 | end_lineno=1,
30 | end_col_offset=5,
31 | source_code=source_1.id,
32 | ),
33 | value=0,
34 | ).id
35 | ],
36 | )
37 | literal_2 = LiteralNode(
38 | source_location=SourceLocation(
39 | lineno=3,
40 | col_offset=4,
41 | end_lineno=3,
42 | end_col_offset=5,
43 | source_code=source_1.id,
44 | ),
45 | value=2,
46 | )
47 |
--------------------------------------------------------------------------------
/tests/end_to_end/test_classdef.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 |
4 | def test_basic_classdef(execute):
5 | code = """class A():
6 | def __init__(self, varname:str):
7 | self.varname = varname
8 | a = A("myclass")
9 | """
10 | res = execute(code)
11 | assert res.values["a"].varname == "myclass"
12 |
13 |
14 | GLOBAL_MUTATE_CODE = """new_value="newval"
15 | class A():
16 | def __init__(self, initialname:str):
17 | self.varname = initialname
18 | def update_name(newname:str):
19 | self.varname = newname
20 |
21 | class Modifier():
22 | def modify_A(self,classinstance):
23 | classinstance.varname = new_value
24 |
25 | a = A("origvalue")
26 | b = Modifier()
27 | b.modify_A(a)
28 | """
29 |
30 |
31 | def test_mutate_classvar_values(execute):
32 | res = execute(GLOBAL_MUTATE_CODE)
33 | assert res.values["a"].varname == "newval"
34 |
35 |
36 | @pytest.mark.xfail(
37 | reason="slicing calls to class's functions arent parsed \
38 | since classes are blackboxes right now."
39 | )
40 | def test_mutate_classvar_slice(execute):
41 | res = execute(GLOBAL_MUTATE_CODE, artifacts=["a", "b"])
42 | assert res.artifacts["a"] == GLOBAL_MUTATE_CODE
43 | assert res.artifacts["b"] == GLOBAL_MUTATE_CODE
44 |
--------------------------------------------------------------------------------
/lineapy/plugins/loader.py:
--------------------------------------------------------------------------------
1 | import importlib.util
2 | import sys
3 | import tempfile
4 | from importlib.abc import Loader
5 | from pathlib import Path
6 |
7 | from lineapy.plugins.base_pipeline_writer import BasePipelineWriter
8 | from lineapy.utils.utils import prettify
9 |
10 |
11 | def load_as_module(writer: BasePipelineWriter):
12 | """
13 | Writing module text to a temp file and load module with names of
14 | ``session_art1_art2_...```
15 | """
16 |
17 | module_name = f"session_{'_'.join(writer.artifact_collection.session_artifacts.keys())}"
18 | temp_folder = tempfile.mkdtemp()
19 | temp_module_path = Path(temp_folder, f"{module_name}.py")
20 |
21 | with open(temp_module_path, "w") as f:
22 | f.writelines(prettify(writer._compose_module()))
23 |
24 | spec = importlib.util.spec_from_file_location(
25 | module_name, temp_module_path
26 | )
27 | if spec is not None:
28 | session_module = importlib.util.module_from_spec(spec)
29 | assert isinstance(spec.loader, Loader)
30 | sys.modules["module.name"] = session_module
31 | spec.loader.exec_module(session_module)
32 | return session_module
33 | else:
34 | raise Exception("LineaPy cannot retrive a module.")
35 |
--------------------------------------------------------------------------------
/lineapy/visualizer/optimize_svg.py:
--------------------------------------------------------------------------------
1 | """
2 | Optimizes an SVG file to reduce the size in the notebook.
3 | """
4 |
5 | import subprocess
6 | import tempfile
7 | from pathlib import Path
8 |
9 | # https://github.com/scour-project/scour#usage
10 | OPTIONS = [
11 | "--strip-xml-prolog",
12 | "--remove-titles",
13 | "--remove-descriptions",
14 | "--remove-metadata",
15 | "--remove-descriptive-elements",
16 | "--enable-comment-stripping",
17 | "--no-line-breaks",
18 | "--enable-id-stripping",
19 | "--shorten-ids",
20 | "--create-groups",
21 | ]
22 |
23 |
24 | def optimize_svg(svg: str) -> str:
25 | # Calls optimize in subprocess to avoid needing to tie ourselves
26 | # to scours's internal Python API, which is likely less stable and not
27 | # documented.
28 | with tempfile.TemporaryDirectory() as tmpdirname:
29 | tmp_dir = Path(tmpdirname)
30 | input_path = tmp_dir / "input.svg"
31 | output_path = tmp_dir / "output.svg"
32 | input_path.write_text(svg)
33 | subprocess.run(
34 | ["scour", "-i", str(input_path), "-o", str(output_path)] + OPTIONS,
35 | capture_output=True,
36 | check=True,
37 | )
38 | return output_path.read_text()
39 |
--------------------------------------------------------------------------------
/tests/test_globals_dict.py:
--------------------------------------------------------------------------------
1 | from typing import Dict
2 |
3 | import pytest
4 |
5 | from lineapy.execution.globals_dict import GlobalsDict, GlobalsDictResult
6 |
7 |
8 | @pytest.mark.parametrize(
9 | "code,inputs,accessed_inputs,added_or_modified",
10 | (
11 | pytest.param("x", {"x": 1}, ["x"], {}, id="load input"),
12 | pytest.param("x = 1", {}, [], {"x": 1}, id="save output"),
13 | pytest.param("x = 1", {"x": 2}, [], {"x": 1}, id="overwrite input"),
14 | pytest.param(
15 | "x += 1", {"x": 1}, ["x"], {"x": 2}, id="ovewrite and access input"
16 | ),
17 | pytest.param(
18 | "x = 2\nx", {"x": 1}, [], {"x": 2}, id="read after write"
19 | ),
20 | ),
21 | )
22 | def test_results(
23 | code: str, inputs: Dict[str, object], accessed_inputs, added_or_modified
24 | ):
25 | g = GlobalsDict()
26 | g.setup_globals(inputs)
27 | b = compile(code, "", "exec")
28 | exec(b, g)
29 | intended_res = GlobalsDictResult(accessed_inputs, added_or_modified)
30 | assert g.teardown_globals() == intended_res
31 |
32 | # Try again to make sure it works second time
33 | g.setup_globals(inputs)
34 | exec(b, g)
35 | assert g.teardown_globals() == intended_res
36 |
--------------------------------------------------------------------------------
/tests/integration/slices/matplotlib_dash_joinstyle.py:
--------------------------------------------------------------------------------
1 | # This is the manual slice of:
2 | # lineapy.file_system
3 | # from file:
4 | # sources/matplotlib-tutorial/scripts/dash_joinstyle.py
5 |
6 | # To verify that linea produces the same slice, run:
7 | # pytest -m integration --runxfail -vv 'tests/integration/test_slice.py::test_slice[matplotlib_dash_joinstyle]'
8 |
9 | import matplotlib.pyplot as plt
10 | import numpy as np
11 |
12 | size = 256, 16
13 | dpi = 72.0
14 | figsize = size[0] / float(dpi), size[1] / float(dpi)
15 | fig = plt.figure(figsize=figsize, dpi=dpi)
16 | fig.patch.set_alpha(0)
17 | plt.axes([0, 0, 1, 1], frameon=False)
18 | plt.plot(
19 | np.arange(3),
20 | [0, 1, 0],
21 | color="blue",
22 | dashes=[12, 5],
23 | linewidth=8,
24 | dash_joinstyle="miter",
25 | )
26 | plt.plot(
27 | 4 + np.arange(3),
28 | [0, 1, 0],
29 | color="blue",
30 | dashes=[12, 5],
31 | linewidth=8,
32 | dash_joinstyle="bevel",
33 | )
34 | plt.plot(
35 | 8 + np.arange(3),
36 | [0, 1, 0],
37 | color="blue",
38 | dashes=[12, 5],
39 | linewidth=8,
40 | dash_joinstyle="round",
41 | )
42 | plt.xlim(0, 12), plt.ylim(-1, 2)
43 | plt.xticks([]), plt.yticks([])
44 | plt.savefig("../figures/dash_joinstyle.png", dpi=dpi)
45 |
--------------------------------------------------------------------------------
/lineapy/utils/analytics/utils.py:
--------------------------------------------------------------------------------
1 | from functools import wraps
2 | from typing import Callable, TypeVar, cast
3 |
4 | from lineapy.data.types import LineaID
5 | from lineapy.db.db import RelationalLineaDB
6 | from lineapy.instrumentation.annotation_spec import ExternalState
7 | from lineapy.utils.analytics.event_schemas import LibImportEvent
8 | from lineapy.utils.analytics.usage_tracking import do_not_track, track
9 |
10 | C = TypeVar("C", bound=Callable)
11 |
12 |
13 | def allow_do_not_track(fn: C) -> C:
14 | @wraps(fn)
15 | def decorator(*args, **kwargs):
16 | if do_not_track():
17 | return
18 | return fn(*args, **kwargs)
19 |
20 | return cast(C, decorator)
21 |
22 |
23 | # checking earlier to avoid doing extra DB query work
24 | @allow_do_not_track
25 | def send_lib_info_from_db(db: RelationalLineaDB, session_id: LineaID):
26 | import_nodes = db.get_libraries_for_session(session_id)
27 | [
28 | track(LibImportEvent(str(n.package_name), str(n.version)))
29 | for n in import_nodes
30 | if n.package_name != "lineapy"
31 | ]
32 | return
33 |
34 |
35 | def side_effect_to_str(reference: object):
36 | if isinstance(reference, ExternalState):
37 | return reference.external_state
38 | return ""
39 |
--------------------------------------------------------------------------------
/lineapy/graph_reader/types.py:
--------------------------------------------------------------------------------
1 | from dataclasses import dataclass
2 |
3 |
4 | @dataclass
5 | class InputVariable:
6 | """
7 | Class to generate code related input variable and it's default value
8 |
9 | Attributes
10 | ----------
11 | variable_name:
12 | variable name
13 | value:
14 | variable value
15 | value_type:
16 | variable object type
17 | default_args:
18 | assignment of variable to a default value
19 | ex: ``a = 1``
20 | parser_body:
21 | code block that uses python parser library to get a input variable from CLI.
22 | ex: ``parser.add_argument('--a', default=1, type=int)``
23 | parser_args:
24 | code block that unpacks input variable from args.
25 | ex: ``a = args.a``
26 |
27 | """
28 |
29 | def __init__(self, variable_name, value, value_type) -> None:
30 | self.variable_name = variable_name
31 | self.value = value
32 | self.value_type = value_type.__name__
33 | self.default_args = f"{self.variable_name} = {repr(self.value)}"
34 | self.parser_body = f"parser.add_argument('--{self.variable_name}', type={self.value_type}, default={repr(self.value)})"
35 | self.parser_args = f"{self.variable_name} = args.{self.variable_name}"
36 |
--------------------------------------------------------------------------------
/tests/integration/slices/sklearn_compose_plot_feature_union.py:
--------------------------------------------------------------------------------
1 | # This is the manual slice of:
2 | # grid_search
3 | # from file:
4 | # sources/scikit-learn/examples/compose/plot_feature_union.py
5 |
6 | # To verify that linea produces the same slice, run:
7 | # pytest -m integration --runxfail -vv 'tests/integration/test_slice.py::test_slice[sklearn_compose_plot_feature_union]'
8 |
9 | from sklearn.datasets import load_iris
10 | from sklearn.decomposition import PCA
11 | from sklearn.feature_selection import SelectKBest
12 | from sklearn.model_selection import GridSearchCV
13 | from sklearn.pipeline import FeatureUnion, Pipeline
14 | from sklearn.svm import SVC
15 |
16 | iris = load_iris()
17 | X, y = iris.data, iris.target
18 | pca = PCA(n_components=2)
19 | selection = SelectKBest(k=1)
20 | combined_features = FeatureUnion([("pca", pca), ("univ_select", selection)])
21 | X_features = combined_features.fit(X, y).transform(X)
22 | svm = SVC(kernel="linear")
23 | pipeline = Pipeline([("features", combined_features), ("svm", svm)])
24 | param_grid = dict(
25 | features__pca__n_components=[1, 2, 3],
26 | features__univ_select__k=[1, 2],
27 | svm__C=[0.1, 1, 10],
28 | )
29 | grid_search = GridSearchCV(pipeline, param_grid=param_grid, verbose=10)
30 | grid_search.fit(X, y)
31 | linea_artifact_value = grid_search
32 |
--------------------------------------------------------------------------------
/lineapy/exceptions/create_frame.py:
--------------------------------------------------------------------------------
1 | """
2 | Copied from https://naleraphael.github.io/blog/posts/devlog_create_a_builtin_frame_object/
3 | """
4 | import ctypes
5 | from types import CodeType, FrameType
6 |
7 | P_SIZE = ctypes.sizeof(ctypes.c_void_p)
8 | IS_X64 = P_SIZE == 8
9 |
10 | P_MEM_TYPE = ctypes.POINTER(ctypes.c_ulong if IS_X64 else ctypes.c_uint)
11 |
12 | ctypes.pythonapi.PyFrame_New.argtypes = (
13 | P_MEM_TYPE, # PyThreadState *tstate
14 | P_MEM_TYPE, # PyCodeObject *code
15 | ctypes.py_object, # PyObject *globals
16 | ctypes.py_object, # PyObject *locals
17 | )
18 | ctypes.pythonapi.PyFrame_New.restype = ctypes.py_object # PyFrameObject*
19 |
20 | ctypes.pythonapi.PyThreadState_Get.argtypes = ()
21 | ctypes.pythonapi.PyThreadState_Get.restype = P_MEM_TYPE
22 |
23 |
24 | def create_frame(code: CodeType) -> FrameType:
25 | """
26 | Creates a new frame object from a code object.
27 | """
28 |
29 | return ctypes.pythonapi.PyFrame_New(
30 | ctypes.pythonapi.PyThreadState_Get(), # thread state
31 | ctypes.cast(id(code), P_MEM_TYPE), # a code object
32 | # Make sure not to set __file__ in the globals,
33 | # or else ipython will look at it and change the file name
34 | {}, # a dict of globals
35 | {}, # a dict of locals
36 | )
37 |
--------------------------------------------------------------------------------
/lineapy/annotations/internal/operator.annotations.yaml:
--------------------------------------------------------------------------------
1 | - module: operator
2 | annotations:
3 | - criteria: # setitem(dict, key, value)
4 | function_name: setitem
5 | side_effects:
6 | - mutated_value:
7 | positional_argument_index: 0
8 | - views:
9 | - positional_argument_index: 2
10 | - positional_argument_index: 0
11 | - criteria: # getitem(dict, key)
12 | function_name: getitem
13 | side_effects:
14 | - views:
15 | - positional_argument_index: 0
16 | - result: RESULT
17 | - criteria: # delitem(dict, key)
18 | function_name: delitem
19 | side_effects:
20 | - mutated_value:
21 | positional_argument_index: 0
22 | - criteria: # inplace ops
23 | function_names:
24 | - iadd
25 | - iand
26 | - iconcat
27 | - ifloordiv
28 | - ilshift
29 | - imod
30 | - imul
31 | - imatmul
32 | - ior
33 | - ipow
34 | - irshift
35 | - isub
36 | - itruediv
37 | - ixor
38 | side_effects:
39 | - mutated_value:
40 | positional_argument_index: 0
41 | - views:
42 | - positional_argument_index: 0
43 | - result: RESULT
44 |
--------------------------------------------------------------------------------
/tests/unit/plugins/test_utils.py:
--------------------------------------------------------------------------------
1 | from lineapy.plugins import utils
2 |
3 |
4 | def test_slugify() -> None:
5 | """
6 | Taken from https://github.com/django/blob/master/tests/utils_tests/test_text.py
7 | """
8 | items = (
9 | # given - expected - Unicode?
10 | ("Hello, World!", "hello_world", False),
11 | ("spam & eggs", "spam_eggs", False),
12 | (" multiple---dash and space ", "multiple_dash_and_space", False),
13 | ("\t whitespace-in-value \n", "whitespace_in_value", False),
14 | ("underscore_in-value", "underscore_in_value", False),
15 | ("__strip__underscore-value___", "strip__underscore_value", False),
16 | ("--strip-dash-value---", "strip_dash_value", False),
17 | ("__strip-mixed-value---", "strip_mixed_value", False),
18 | ("_ -strip-mixed-value _-", "strip_mixed_value", False),
19 | ("spam & ıçüş", "spam_ıçüş", True),
20 | ("spam & ıçüş", "spam_cus", False),
21 | ("foo ıç bar", "foo_ıç_bar", True),
22 | (" foo ıç bar", "foo_ıç_bar", True),
23 | ("你好", "你好", True),
24 | ("İstanbul", "istanbul", True),
25 | ("var-name-is-_private", "var_name_is__private", False),
26 | )
27 | for value, output, is_unicode in items:
28 | assert utils.slugify(value, allow_unicode=is_unicode) == output
29 |
--------------------------------------------------------------------------------
/tests/end_to_end/test_blackbox_tracing.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | LINEA_CODE = """import lineapy
4 | """
5 |
6 | CODE = """import matplotlib.pyplot as plt
7 | import numpy as np
8 |
9 | size = 256, 16
10 | dpi = 72.0
11 | figsize = size[0] / float(dpi), size[1] / float(dpi)
12 | fig = plt.figure(figsize=figsize, dpi=dpi)
13 | plt.axes([0, 0, 1, 1], frameon=False)
14 |
15 | dash_styles = ["miter", "bevel", "round"]
16 |
17 | for i in range(3):
18 | plt.plot(
19 | i * 4 + np.arange(3),
20 | [0, 1, 0],
21 | color="blue",
22 | dashes=[12, 5],
23 | linewidth=8,
24 | dash_joinstyle=dash_styles[i],
25 | )
26 |
27 | plt.xlim(0, 12), plt.ylim(-1, 2)
28 | plt.xticks([]), plt.yticks([])
29 | plt.savefig("output/dash_joinstyle.png", dpi=dpi)
30 |
31 | """
32 |
33 | ARTIFACT_CODE = """
34 | artifact = lineapy.save(lineapy.file_system, "test_mplt")
35 | """
36 |
37 |
38 | @pytest.mark.xfail(reason="libraries used inside a blackbox are not captured")
39 | def test_mplt_inside_blackbox_does_not_fail(execute):
40 | # simply a test to check if the code runs without exceptions.
41 | # Later on this will be edited to ensure that the slice is accurate.
42 | res = execute(LINEA_CODE + CODE + ARTIFACT_CODE, snapshot=False)
43 | assert res.values["artifact"].get_code() == CODE
44 | # assert res.values["fig"] is not None
45 |
--------------------------------------------------------------------------------
/lineapy/plugins/pipeline_writer_factory.py:
--------------------------------------------------------------------------------
1 | from lineapy.data.types import PipelineType
2 | from lineapy.plugins.airflow_pipeline_writer import AirflowPipelineWriter
3 | from lineapy.plugins.argo_pipeline_writer import ARGOPipelineWriter
4 | from lineapy.plugins.base_pipeline_writer import BasePipelineWriter
5 | from lineapy.plugins.dvc_pipeline_writer import DVCPipelineWriter
6 | from lineapy.plugins.kubeflow_pipeline_writer import KubeflowPipelineWriter
7 | from lineapy.plugins.ray_pipeline_writer import RayPipelineWriter
8 |
9 |
10 | class PipelineWriterFactory:
11 | @classmethod
12 | def get(
13 | cls,
14 | pipeline_type: PipelineType = PipelineType.SCRIPT,
15 | *args,
16 | **kwargs,
17 | ):
18 | if pipeline_type == PipelineType.AIRFLOW:
19 | return AirflowPipelineWriter(*args, **kwargs)
20 | elif pipeline_type == PipelineType.DVC:
21 | return DVCPipelineWriter(*args, **kwargs)
22 | elif pipeline_type == PipelineType.ARGO:
23 | return ARGOPipelineWriter(*args, **kwargs)
24 | elif pipeline_type == PipelineType.KUBEFLOW:
25 | return KubeflowPipelineWriter(*args, **kwargs)
26 | elif pipeline_type == PipelineType.RAY:
27 | return RayPipelineWriter(*args, **kwargs)
28 | else:
29 | return BasePipelineWriter(*args, **kwargs)
30 |
--------------------------------------------------------------------------------
/tests/end_to_end/test_list_slice.py:
--------------------------------------------------------------------------------
1 | def test_empty_slice(execute):
2 | res = execute("x = [1, 2, 3][:]", snapshot=False)
3 | assert res.values["x"] == [1, 2, 3]
4 |
5 |
6 | def test_slice_with_step(execute):
7 | res = execute("x = [1, 2, 3][::2]", snapshot=False)
8 | assert res.values["x"] == [1, 3]
9 |
10 |
11 | def test_slice_with_step_and_start(execute):
12 | res = execute("x = [1, 2, 3][0::2]", snapshot=False)
13 | assert res.values["x"] == [1, 3]
14 |
15 |
16 | def test_slice_with_step_and_stop(execute):
17 | res = execute("x = [1, 2, 3][:2:2]", snapshot=False)
18 | assert res.values["x"] == [1]
19 |
20 |
21 | def test_slice_with_step_and_start_and_stop(execute):
22 | res = execute("x = [1, 2, 3][1:2:2]", snapshot=False)
23 | assert res.values["x"] == [2]
24 |
25 |
26 | def test_slice_with_start(execute):
27 | res = execute("x = [1, 2, 3][1:]", snapshot=False)
28 | assert res.values["x"] == [2, 3]
29 |
30 |
31 | def test_subscript(execute):
32 | SUBSCRIPT = """
33 | ls = [1,2,3,4]
34 | ls[0] = 1
35 | a = 4
36 | ls[1] = a
37 | ls[2:3] = [30]
38 | ls[3:a] = [40]
39 | """
40 | res = execute(SUBSCRIPT, snapshot=False)
41 | assert len(res.values["ls"]) == 4
42 | assert res.values["ls"][0] == 1
43 | assert res.values["ls"][1] == 4
44 | assert res.values["ls"][2] == 30
45 | assert res.values["ls"][3] == 40
46 |
--------------------------------------------------------------------------------
/tests/end_to_end/test_set.py:
--------------------------------------------------------------------------------
1 | from lineapy.utils.utils import prettify
2 |
3 |
4 | def test_set_init(execute):
5 | code = """x={1,1,2}
6 | """
7 | res = execute(code, artifacts=["x"])
8 | assert res.slice("x") == prettify(code)
9 | assert res.values["x"] == {1, 2}
10 |
11 |
12 | def test_set_add_mutates(execute):
13 | code = """x = set()
14 | x.add(10)
15 | """
16 | res = execute(code, artifacts=["x"])
17 | assert res.slice("x") == prettify(code)
18 |
19 |
20 | def test_set_getitem_view(execute):
21 | code = """y = set()
22 | x = [y]
23 | y.add(10)
24 | """
25 | res = execute(code, artifacts=["x"])
26 | assert res.slice("x") == prettify(code)
27 |
28 |
29 | def test_set_add_mutates_inner(execute):
30 | code = """x = set()
31 | y = [x]
32 | x.add(10)
33 | y[0].add(11)
34 | """
35 | res = execute(code, artifacts=["x", "y"])
36 | assert res.slice("x") == prettify(code)
37 | assert res.slice("y") == prettify(code)
38 |
39 |
40 | def test_update_set_mutates(execute):
41 | code = """x = set()
42 | x.update({1,1,2})
43 | """
44 | res = execute(code, artifacts=["x"])
45 | assert res.slice("x") == prettify(code)
46 |
47 |
48 | def test_set_clear_mutates(execute):
49 | code = """x = set()
50 | x.add(10)
51 | x.clear()
52 | """
53 | res = execute(code, artifacts=["x"])
54 | assert res.slice("x") == prettify(code)
55 |
--------------------------------------------------------------------------------
/lineapy/system_tracing/exec_and_record_function_calls.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from sys import gettrace, settrace
3 | from types import CodeType
4 | from typing import Dict
5 |
6 | from lineapy.system_tracing._trace_func import TraceFunc
7 |
8 | logger = logging.getLogger(__name__)
9 |
10 |
11 | def exec_and_record_function_calls(
12 | code: CodeType, globals_: Dict[str, object]
13 | ) -> TraceFunc:
14 | """
15 | Execute the code while recording all the function calls which originate from the code object.
16 |
17 | While recording function calls, we use sys.settrace() with LineaPy's tracer to extract relevant
18 | information during the runtime of the user's code's function, and reset the tracer after the user
19 | function has completed execution to prevent unnecessary logging.
20 | However, to ensure LineaPy works correctly while debugging using VSCode, we first capture any
21 | existing tracers using sys.gettrace(), perform our analysis using the LineaPy tracer, and reset
22 | the existing tracer using sys.settrace()
23 | """
24 | logger.debug("Executing code")
25 | original_trace = gettrace()
26 | trace_func = TraceFunc(code)
27 | try:
28 | settrace(trace_func)
29 | exec(code, globals_)
30 | # Always stop tracing even if exception raised
31 | finally:
32 | settrace(original_trace)
33 | return trace_func
34 |
--------------------------------------------------------------------------------
/tests/unit/graph_reader/test_artifact_get_code.py:
--------------------------------------------------------------------------------
1 | from unittest.mock import MagicMock
2 |
3 | import pytest
4 |
5 | from lineapy.api.api_utils import de_lineate_code
6 |
7 | FAKE_PATH = "/tmp/path/to/value/file/xey"
8 |
9 |
10 | @pytest.mark.parametrize(
11 | "code, expected",
12 | [
13 | pytest.param("", "", id="blank"),
14 | pytest.param("x = 1", "x = 1", id="nolinea"),
15 | pytest.param(
16 | """import lineapy
17 | lineapy.save(x,"xey")""",
18 | f"""import pickle
19 |
20 | pickle.dump(x,open("{FAKE_PATH}","wb"))""",
21 | id="lineapy_save",
22 | ),
23 | pytest.param(
24 | "x = lineapy.get('x').get_value()",
25 | f"""import pickle
26 | x = pickle.load(open("{FAKE_PATH}","rb"))""",
27 | id="lineapy_get",
28 | ),
29 | pytest.param(
30 | """import lineapy
31 | x = lineapy.get('x').get_value()
32 | y = lineapy.get('y')""",
33 | f"""import pickle
34 | import lineapy
35 | x = pickle.load(open("{FAKE_PATH}","rb"))
36 | y = lineapy.get('y')""",
37 | id="lineapy_get_partial_replace",
38 | ),
39 | ],
40 | )
41 | def test__de_linealize_code(code, expected):
42 | db = MagicMock()
43 | db.get_node_value_path = MagicMock(return_value=FAKE_PATH) # type: ignore
44 | delineazed = de_lineate_code(code, db)
45 | assert delineazed == expected
46 |
--------------------------------------------------------------------------------
/tests/end_to_end/__snapshots__/test_lambda/test_lambda_with_primitives.py:
--------------------------------------------------------------------------------
1 | import datetime
2 | from pathlib import *
3 | from lineapy.data.types import *
4 | from lineapy.utils.utils import get_new_id
5 |
6 | source_1 = SourceCode(
7 | code="""a = 10
8 | b = lambda x: x + 10
9 | c = b(a)
10 | """,
11 | location=PosixPath("[source file path]"),
12 | )
13 | call_2 = CallNode(
14 | source_location=SourceLocation(
15 | lineno=3,
16 | col_offset=4,
17 | end_lineno=3,
18 | end_col_offset=8,
19 | source_code=source_1.id,
20 | ),
21 | function_id=CallNode(
22 | source_location=SourceLocation(
23 | lineno=2,
24 | col_offset=4,
25 | end_lineno=2,
26 | end_col_offset=20,
27 | source_code=source_1.id,
28 | ),
29 | function_id=LookupNode(
30 | name="l_exec_expr",
31 | ).id,
32 | positional_args=[
33 | LiteralNode(
34 | value="lambda x: x + 10",
35 | ).id
36 | ],
37 | ).id,
38 | positional_args=[
39 | LiteralNode(
40 | source_location=SourceLocation(
41 | lineno=1,
42 | col_offset=4,
43 | end_lineno=1,
44 | end_col_offset=6,
45 | source_code=source_1.id,
46 | ),
47 | value=10,
48 | ).id
49 | ],
50 | )
51 |
--------------------------------------------------------------------------------
/tests/unit/transformer/test_source_giver.py:
--------------------------------------------------------------------------------
1 | # type: ignore
2 | import ast
3 | import sys
4 |
5 | import pytest
6 |
7 | from lineapy.transformer.source_giver import SourceGiver
8 |
9 |
10 | @pytest.mark.parametrize(
11 | "code,lineno",
12 | [
13 | (
14 | """a = 10
15 | b = lambda x: x + 10
16 | c = b(a)
17 | """,
18 | 1,
19 | ),
20 | ("""a = 10;b=10""", 1),
21 | ],
22 | ids=["multiline", "singleline"],
23 | )
24 | def test_source_giver_adds_end_lineno(code, lineno):
25 | if sys.version_info >= (3, 8):
26 | pytest.skip("SourceGiver not invoked for Python 3.8+")
27 | import asttokens
28 |
29 | tree = ast.parse(code)
30 | # ensure that the end_lineno is not available and fetching it raises exceptions
31 | with pytest.raises(AttributeError):
32 | print(tree.body[0].end_lineno)
33 |
34 | # now we invoke the SourceGiver and add end_linenos in 2 steps - first we run the tree thr asttokens
35 | asttokens.ASTTokens(code, parse=False, tree=tree)
36 | # double check that the line numbers cooked up by asttokens are correct
37 | assert tree.body[0].last_token.end[0] == lineno
38 |
39 | # and in step 2, run the tree thr SourceGiver and copy the asttokens's token values
40 | # so that the tree looks like 3.8+ tree with all the end_linenos etc
41 | SourceGiver().transform(tree)
42 | assert tree.body[0].end_lineno == lineno
43 |
--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
1 | version: "3.9"
2 | services:
3 | lineapy:
4 | image: ${IMAGE_NAME}
5 | build: .
6 | environment:
7 | - LINEAPY_DATABASE_URL=${LINEAPY_DATABASE_URL}
8 | volumes:
9 | - ./lineapy:/usr/src/base/lineapy
10 | - ./tests:/usr/src/base/tests
11 | - ./pyproject.toml:/usr/src/base/pyproject.toml
12 | - ./pytest.ini:/usr/src/base/pytest.ini
13 | - ./docs:/usr/src/base/docs
14 | networks:
15 | - lineapy
16 |
17 | postgres:
18 | image: postgres:latest
19 | environment:
20 | - POSTGRES_USER=postgres
21 | - POSTGRES_PASSWORD=${POSTGRES_PASSWORD}
22 | - POSTGRES_DB=postgres
23 | ports:
24 | - "5432:5432"
25 | networks:
26 | - lineapy
27 |
28 | # wait_for_deps2:
29 | # image: willwill/wait-for-it
30 | # command: [ postgres:5432 ]
31 | # networks:
32 | # - lineapy
33 |
34 | wait_for_deps:
35 | image: dadarek/wait-for-dependencies
36 | command: postgres:5432
37 | networks:
38 | - lineapy
39 |
40 | lineapy-airflow:
41 | image: ${IMAGE_NAME_AIRFLOW}
42 | build:
43 | args:
44 | IMAGE_NAME: ${IMAGE_NAME}
45 | context: .
46 | dockerfile: Dockerfile-airflow
47 | depends_on:
48 | - lineapy
49 | ports:
50 | - 8080:8080
51 | command: airflow standalone
52 | networks:
53 | - lineapy
54 |
55 | networks:
56 | lineapy:
57 | driver: bridge
58 | external: true
59 |
--------------------------------------------------------------------------------
/docs/mkdocs/concepts/artifact-store.md:
--------------------------------------------------------------------------------
1 | # Artifact Store
2 |
3 | LineaPy saves artifacts in the artifact store, which is a centralized repository for artifacts and
4 | their metadata (e.g., creation time, version). Under the hood, it is a collection of two data structures:
5 |
6 | - Serialized artifact values (i.e., pickle files)
7 | - Database that stores artifact metadata (e.g., timestamp, version, code, pointer to the serialized value)
8 |
9 | Encapsulating both value and code, as well as other metadata such as creation time and version,
10 | LineaPy's artifact store provides a more unified and streamlined experience to save, manage, and reuse
11 | works from different people over time. Contrast this with a typical setup where the team stores their
12 | outputs in one place (e.g., a key-value store) and the code in another (e.g., GitHub repo) — we can
13 | imagine how difficult it would be to maintain correlations between the two. LineaPy simplifies lineage tracking
14 | by storing all correlations in one framework: artifact store.
15 |
16 | LineaPy's artifact store is globally accessible, which means the user can view, load, and build on artifacts across
17 | different development sessions and even different projects. This unified global storage is designed to accelerate the overall
18 | development process, which is iterative in nature. Moreover, it can facilitate collaboration between different teams
19 | as it provides a single source of truth for all prior relevant work.
20 |
--------------------------------------------------------------------------------
/tests/end_to_end/test_stack_trace.py:
--------------------------------------------------------------------------------
1 | """
2 | Verifies we are rewriting the stack trace properly.
3 | """
4 |
5 | import traceback
6 | from typing import cast
7 |
8 | import pytest
9 |
10 | from lineapy.exceptions.user_exception import UserException
11 |
12 |
13 | def test_call_exception(execute):
14 | code = """def divide_me(a):
15 | return a/0
16 | x = divide_me(1)
17 | """
18 | with pytest.raises(UserException) as e:
19 | execute(code)
20 |
21 | # Test that the first line of the inner exception is the line in the source
22 | # file for this call node
23 | inner_exception = cast(Exception, e.value.__cause__)
24 | assert (
25 | traceback.extract_tb(inner_exception.__traceback__)[0].line
26 | == "x = divide_me(1)"
27 | )
28 |
29 |
30 | def test_syntax_error(execute):
31 | code = """a = 10
32 | a+++
33 | """
34 | with pytest.raises(UserException) as e:
35 | execute(code)
36 | # Verify that the first line is the proper line from the file
37 | inner_exception = cast(SyntaxError, e.value.__cause__)
38 | assert inner_exception.text == "a+++\n"
39 |
40 |
41 | def test_name_error(execute):
42 | code = """a = 10
43 | x
44 | """
45 | with pytest.raises(UserException) as e:
46 | execute(code)
47 | # Verify that the first line is the proper line from the file
48 | inner_exception = cast(NameError, e.value.__cause__)
49 | assert inner_exception.args == ("name 'x' is not defined",)
50 |
--------------------------------------------------------------------------------
/tests/integration/__snapshots__/test_slice/test_slice[pytorch_intro_torchscript].py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 |
4 | class MyDecisionGate(torch.nn.Module):
5 | def forward(self, x):
6 | if x.sum() > 0:
7 | return x
8 | else:
9 | return -x
10 |
11 |
12 | class MyCell(torch.nn.Module):
13 | def __init__(self, dg):
14 | super(MyCell, self).__init__()
15 | self.dg = dg
16 | self.linear = torch.nn.Linear(4, 4)
17 |
18 | def forward(self, x, h):
19 | new_h = torch.tanh(self.dg(self.linear(x)) + h)
20 | return new_h, new_h
21 |
22 |
23 | scripted_gate = torch.jit.script(MyDecisionGate())
24 | x, h = torch.rand(3, 4), torch.rand(3, 4)
25 |
26 |
27 | class MyRNNLoop(torch.nn.Module):
28 | def __init__(self):
29 | super(MyRNNLoop, self).__init__()
30 | self.cell = torch.jit.trace(MyCell(scripted_gate), (x, h))
31 |
32 | def forward(self, xs):
33 | h, y = torch.zeros(3, 4), torch.zeros(3, 4)
34 | for i in range(xs.size(0)):
35 | y, h = self.cell(xs[i], h)
36 | return y, h
37 |
38 |
39 | class WrapRNN(torch.nn.Module):
40 | def __init__(self):
41 | super(WrapRNN, self).__init__()
42 | self.loop = torch.jit.script(MyRNNLoop())
43 |
44 | def forward(self, xs):
45 | y, h = self.loop(xs)
46 | return torch.relu(y)
47 |
48 |
49 | traced = torch.jit.trace(WrapRNN(), torch.rand(10, 3, 4))
50 | traced.save("wrapped_rnn.pt")
51 |
--------------------------------------------------------------------------------
/tests/integration/slices/pytorch_vision_tensor_transform.py:
--------------------------------------------------------------------------------
1 | # This is the manual slice of:
2 | # lineapy.file_system
3 | # from file:
4 | # sources/pytorch-vision/gallery/plot_scripted_tensor_transforms.py
5 |
6 | # To verify that linea produces the same slice, run:
7 | # pytest -m integration --runxfail -vv 'tests/integration/test_slice.py::test_slice[pytorch_vision_tensor_transform]'
8 |
9 | import torch
10 | import torchvision.transforms as T
11 |
12 | torch.manual_seed(1)
13 | import torch.nn as nn
14 |
15 | device = "cuda" if torch.cuda.is_available() else "cpu"
16 | from torchvision.models import resnet18
17 |
18 |
19 | class Predictor(nn.Module):
20 | def __init__(self):
21 | super().__init__()
22 | self.resnet18 = resnet18(pretrained=True, progress=False).eval()
23 | self.transforms = nn.Sequential(
24 | T.Resize([256]),
25 | T.CenterCrop(224),
26 | T.ConvertImageDtype(torch.float),
27 | T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
28 | )
29 |
30 | def forward(self, x: torch.Tensor) -> torch.Tensor:
31 | with torch.no_grad():
32 | x = self.transforms(x)
33 | y_pred = self.resnet18(x)
34 | return y_pred.argmax(dim=1)
35 |
36 |
37 | predictor = Predictor().to(device)
38 | scripted_predictor = torch.jit.script(predictor).to(device)
39 | import tempfile
40 |
41 | with tempfile.NamedTemporaryFile() as f:
42 | scripted_predictor.save(f.name)
43 |
--------------------------------------------------------------------------------
/lineapy/transformer/py37_transformer.py:
--------------------------------------------------------------------------------
1 | import ast
2 |
3 | from lineapy.transformer.base_transformer import BaseTransformer
4 | from lineapy.utils.deprecation_utils import Constant
5 |
6 |
7 | class Py37Transformer(BaseTransformer):
8 | def _convert_to_constant(self, value, node) -> Constant:
9 | if not hasattr(
10 | node, "end_lineno"
11 | ): # somehow didnt go through our sourcegiver
12 | return Constant(
13 | value=value, lineno=node.lineno, col_offset=node.col_offset
14 | )
15 | else:
16 | return Constant(
17 | value=value,
18 | lineno=node.lineno,
19 | end_lineno=node.end_lineno, # type: ignore
20 | col_offset=node.col_offset,
21 | end_col_offset=node.end_col_offset, # type: ignore
22 | )
23 |
24 | def visit_Ellipsis(self, node: ast.Ellipsis) -> Constant:
25 | return self._convert_to_constant(..., node)
26 |
27 | def visit_Str(self, node: ast.Str) -> Constant:
28 | return self._convert_to_constant(node.s, node)
29 |
30 | def visit_Num(self, node: ast.Num) -> Constant:
31 | return self._convert_to_constant(node.n, node)
32 |
33 | def visit_NameConstant(self, node: ast.NameConstant) -> Constant:
34 | return self._convert_to_constant(node.value, node)
35 |
36 | def visit_Bytes(self, node: ast.Bytes) -> Constant:
37 | return self._convert_to_constant(node.s, node)
38 |
--------------------------------------------------------------------------------
/tests/end_to_end/__snapshots__/test_assign_destruc/test_variable_alias_nested.py:
--------------------------------------------------------------------------------
1 | import datetime
2 | from pathlib import *
3 | from lineapy.data.types import *
4 | from lineapy.utils.utils import get_new_id
5 |
6 | source_1 = SourceCode(
7 | code="""a = 0
8 | b = a
9 | c = b""",
10 | location=PosixPath("[source file path]"),
11 | )
12 | call_2 = CallNode(
13 | source_location=SourceLocation(
14 | lineno=3,
15 | col_offset=0,
16 | end_lineno=3,
17 | end_col_offset=5,
18 | source_code=source_1.id,
19 | ),
20 | function_id=LookupNode(
21 | name="l_alias",
22 | ).id,
23 | positional_args=[
24 | CallNode(
25 | source_location=SourceLocation(
26 | lineno=2,
27 | col_offset=0,
28 | end_lineno=2,
29 | end_col_offset=5,
30 | source_code=source_1.id,
31 | ),
32 | function_id=LookupNode(
33 | name="l_alias",
34 | ).id,
35 | positional_args=[
36 | LiteralNode(
37 | source_location=SourceLocation(
38 | lineno=1,
39 | col_offset=4,
40 | end_lineno=1,
41 | end_col_offset=5,
42 | source_code=source_1.id,
43 | ),
44 | value=0,
45 | ).id
46 | ],
47 | ).id
48 | ],
49 | )
50 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.black]
2 | line-length = 79
3 | extend-exclude = '(__snapshots__|integration/slices|integration/sources|integration/envs|outputs|_alembic|unit/plugins/expected|examples|env)'
4 |
5 | [tool.isort]
6 | profile = "black"
7 | skip_gitignore = true
8 | skip = ["__snapshots__"]
9 | skip_glob=["tests/integration/slices/*", "tests/integration/sources/*", "tests/integration/envs/*", "tests/outputs/*", "lineapy/_alembic/*", "env/*"]
10 | line_length = 79
11 |
12 | [tool.coverage.run]
13 | # Trace which side of branches were taken
14 | # https://coverage.readthedocs.io/en/latest/branch.html#branch
15 | branch = true
16 | # Ignore coverage on app, since we are letting it rot
17 | omit = ["lineapy/app/*"]
18 | relative_files = true
19 |
20 | [tool.mypy]
21 |
22 | exclude = '(/__snapshots__/|sliced_housing_dag*.py|tutorials/.*|integration/slices/.*|integration/sources/.*|integration/envs/.*|/outputs/|/build/|/_alembic/|env/)$'
23 |
24 | # https://docs.sqlalchemy.org/en/14/orm/extensions/mypy.html
25 | # https://pydantic-docs.helpmanual.io/mypy_plugin/#enabling-the-plugin
26 | plugins = ["sqlalchemy.ext.mypy.plugin", "pydantic.mypy"]
27 |
28 |
29 | # Enable function body type checking, even if function types are not annotated
30 | check_untyped_defs = true
31 |
32 | # Dont check libraries without stubs
33 | ignore_missing_imports = true
34 |
35 | # Do not raise errors on ignores which we don't need - hassle when supporting multiple python versions
36 | warn_unused_ignores = false
37 |
38 | warn_unreachable = true
39 |
--------------------------------------------------------------------------------
/tests/notebook/test_is_executing.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "92e3dbb4",
6 | "metadata": {},
7 | "source": [
8 | "Test that linea tracing is active"
9 | ]
10 | },
11 | {
12 | "cell_type": "code",
13 | "execution_count": 1,
14 | "id": "06ac3074-0510-43fe-86d2-84101db99956",
15 | "metadata": {
16 | "execution": {
17 | "iopub.execute_input": "2022-01-13T19:00:04.651272Z",
18 | "iopub.status.busy": "2022-01-13T19:00:04.650632Z",
19 | "iopub.status.idle": "2022-01-13T19:00:04.815525Z",
20 | "shell.execute_reply": "2022-01-13T19:00:04.814859Z"
21 | },
22 | "tags": []
23 | },
24 | "outputs": [],
25 | "source": [
26 | "import lineapy\n",
27 | "assert lineapy._is_executing()"
28 | ]
29 | }
30 | ],
31 | "metadata": {
32 | "kernelspec": {
33 | "display_name": "Python 3 (ipykernel)",
34 | "language": "python",
35 | "name": "python3"
36 | },
37 | "language_info": {
38 | "codemirror_mode": {
39 | "name": "ipython",
40 | "version": 3
41 | },
42 | "file_extension": ".py",
43 | "mimetype": "text/x-python",
44 | "name": "python",
45 | "nbconvert_exporter": "python",
46 | "pygments_lexer": "ipython3",
47 | "version": "3.9.6"
48 | },
49 | "widgets": {
50 | "application/vnd.jupyter.widget-state+json": {
51 | "state": {},
52 | "version_major": 2,
53 | "version_minor": 0
54 | }
55 | }
56 | },
57 | "nbformat": 4,
58 | "nbformat_minor": 5
59 | }
60 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | # See https://pre-commit.com for more information
2 | # See https://pre-commit.com/hooks.html for more hooks
3 | # We use local hooks so that we don't pass the changed filenames in as args,
4 | # so that the ignores we defined in the configs work
5 | repos:
6 | - repo: local
7 | hooks:
8 | - id: flake8
9 | name: flake8
10 | entry: flake8 .
11 | language: python
12 | additional_dependencies: ["flake8==4.0.1"]
13 | types: [python]
14 | pass_filenames: false
15 | - repo: local
16 | hooks:
17 | - id: isort
18 | name: isort
19 | entry: isort .
20 | language: python
21 | additional_dependencies: ["isort==5.10.1"]
22 | types: [python]
23 | pass_filenames: false
24 | - repo: local
25 | hooks:
26 | - id: black
27 | name: black
28 | entry: black .
29 | language: python
30 | # https://github.com/psf/black/issues/2964
31 | additional_dependencies: ["black==22.3.0"]
32 | types: [python]
33 | pass_filenames: false
34 | - repo: local
35 | hooks:
36 | - id: mypy
37 | name: mypy
38 | entry: mypy .
39 | language: python
40 | additional_dependencies: ["mypy==0.931", "SQLAlchemy==1.4.29", "sqlalchemy[mypy]",
41 | "mypy-extensions==0.4.3", "pydantic==1.9.0", "types-PyYAML", "types-requests", "types-mock"]
42 | types: [python]
43 | pass_filenames: false
44 |
--------------------------------------------------------------------------------
/lineapy/plugins/jinja_templates/airflow/airflow_dag_PythonOperator.jinja:
--------------------------------------------------------------------------------
1 | import {{ MODULE_NAME }}
2 | import pickle
3 | import pathlib
4 | from airflow import DAG
5 | from airflow.utils.dates import days_ago
6 | from airflow.operators.python_operator import PythonOperator
7 |
8 | {% for task_def in task_definitions %}
9 | {{ task_def }}
10 | {% endfor %}
11 |
12 | default_dag_args = {
13 | "owner": "{{ OWNER }}",
14 | "retries": {{ RETRIES }},
15 | "start_date": {{ START_DATE }},
16 | {%- if (dag_params|length > 0) %}
17 | "params": {{ dag_params }},
18 | {%- endif %}
19 | }
20 |
21 | with DAG(
22 | dag_id="{{ DAG_NAME }}_dag",
23 | schedule_interval="{{ SCHEDULE_INTERVAL }}",
24 | max_active_runs={{ MAX_ACTIVE_RUNS }},
25 | catchup={{ CATCHUP }},
26 | default_args=default_dag_args,
27 | ) as dag:
28 |
29 | {% for task_name, task_def in tasks.items() %}
30 | {{ task_name }} = PythonOperator(
31 | task_id="{{ task_name }}_task",
32 | python_callable=task_{{ task_name }},
33 | {%- if task_def.user_input_variables|length > 0 %}
34 | op_kwargs={
35 | {%- for var in task_def.user_input_variables %}
36 | "{{ var }}": "{{ '{{' }} params.{{ var }} {{ '}}' }}"{{ ',' if not loop.last else '' }}
37 | {%- endfor %}
38 | },
39 | {%- endif %}
40 | )
41 | {% endfor %}
42 |
43 | {% if task_dependencies is not none %}
44 | {% for TASK_DEPENDENCIES in task_dependencies %}
45 | {{TASK_DEPENDENCIES}}
46 | {% endfor %}
47 | {%endif %}
48 |
--------------------------------------------------------------------------------
/tests/test_api.py:
--------------------------------------------------------------------------------
1 | import pickle
2 | from pathlib import Path
3 |
4 | from lineapy.api.artifact_serializer import _try_write_to_pickle
5 | from lineapy.api.models.linea_artifact import LineaArtifact
6 | from lineapy.utils.config import options
7 |
8 |
9 | def test_execute_slice(execute):
10 | """
11 | Tests that executing a slice of a graph yields the same result as executing the graph
12 | """
13 | c = """x = []
14 | if True:
15 | x = []
16 | x.append(1)
17 | """
18 | res = execute(c, artifacts=["x"], snapshot=False)
19 | artifactorm = res.db.get_artifactorm_by_name("x")
20 | full_graph_artifact = LineaArtifact(
21 | db=res.db,
22 | _execution_id=artifactorm.execution_id,
23 | _node_id=artifactorm.node_id,
24 | _session_id=artifactorm.node.session_id,
25 | _version=artifactorm.version,
26 | name=artifactorm.name,
27 | )
28 |
29 | slice_graph_artifact_res = full_graph_artifact.execute()
30 | assert slice_graph_artifact_res == res.values["x"]
31 | assert (
32 | res.artifacts["x"]
33 | == """if True:
34 | x = []
35 | x.append(1)
36 | """
37 | )
38 | assert res.values["x"] == [1]
39 |
40 |
41 | def test_write_to_pickle():
42 | _try_write_to_pickle(42, "test_pickle")
43 | pickle_path = (
44 | Path(options.safe_get("artifact_storage_dir")) / "test_pickle"
45 | )
46 | assert pickle_path.exists()
47 |
48 | with pickle_path.open("rb") as f:
49 | assert pickle.load(f) == 42
50 |
--------------------------------------------------------------------------------