├── tests ├── __init__.py ├── unit │ ├── __init__.py │ ├── plugins │ │ ├── __init__.py │ │ ├── framework_specific │ │ │ └── __init__.py │ │ ├── expected │ │ │ ├── script_complex_h │ │ │ │ └── sample_output │ │ │ │ │ ├── f.pkl │ │ │ │ │ └── h.pkl │ │ │ ├── script_pipeline_a0_b0_dependencies │ │ │ │ └── sample_output │ │ │ │ │ ├── a0.pkl │ │ │ │ │ └── b0.pkl │ │ │ └── script_pipeline_housing_w_dependencies │ │ │ │ └── sample_output │ │ │ │ ├── y.pkl │ │ │ │ └── p_value.pkl │ │ ├── test_task.py │ │ └── test_utils.py │ ├── graph_reader │ │ ├── inputs │ │ │ ├── simple_twovar │ │ │ ├── simple │ │ │ ├── mutate_after_save │ │ │ ├── extract_common │ │ │ ├── module_import │ │ │ ├── module_import_alias │ │ │ ├── linear │ │ │ ├── complex │ │ │ ├── module_import_from │ │ │ └── housing │ │ └── test_artifact_get_code.py │ ├── system_tracing │ │ └── test_op_stack.py │ ├── transformer │ │ ├── test_transform_code.py │ │ └── test_source_giver.py │ ├── cli │ │ └── test_cli.py │ ├── utils │ │ └── test_config.py │ ├── db │ │ ├── test_db_utils.py │ │ └── test_literal_node.py │ └── migration │ │ └── test_migrations.py ├── integration │ ├── __init__.py │ ├── .gitignore │ ├── sources │ │ └── matplotlib-tutorial │ │ │ ├── figures │ │ │ └── .gitkeep │ │ │ └── scripts │ │ │ ├── alpha.py │ │ │ ├── aliased.py │ │ │ ├── dash_joinstyle.py │ │ │ └── exercice_3.py │ ├── __snapshots__ │ │ └── test_slice │ │ │ ├── test_slice[pandas_timeseries].py │ │ │ ├── test_slice[pandas_deleting].py │ │ │ ├── test_slice[matplotlib_aliased].py │ │ │ ├── test_slice[pandas_apply].py │ │ │ ├── test_slice[sklearn_semi_supervised_plot_label_propagation_structure].py │ │ │ ├── test_slice[matplotlib_exercise_3].py │ │ │ ├── test_slice[sklearn_multioutput_plot_classifier_chain_yeast].py │ │ │ ├── test_slice[pandas_stats].py │ │ │ ├── test_slice[sklearn_model_selection_plot_randomized_search].py │ │ │ ├── test_slice[pandas_merge].py │ │ │ ├── test_slice[sklearn_tree_plot_cost_complexity_pruning].py │ │ │ ├── test_slice[sklearn_preprocessing_plot_scaling_importance].py │ │ │ ├── test_slice[matplotlib_dash_joinstyle].py │ │ │ ├── test_slice[sklearn_compose_plot_feature_union].py │ │ │ └── test_slice[pytorch_intro_torchscript].py │ └── slices │ │ ├── pandas_timeseries.py │ │ ├── matplotlib_alpha.py │ │ ├── matplotlib_aliased.py │ │ ├── pandas_apply.py │ │ ├── pandas_deleting.py │ │ ├── matplotlib_exercise_3.py │ │ ├── sklearn_semi_supervised_plot_label_propagation_structure.py │ │ ├── pandas_stats.py │ │ ├── sklearn_multioutput_plot_classifier_chain_yeast.py │ │ ├── sklearn_model_selection_plot_randomized_search.py │ │ ├── xgboost_sklearn_examples.py │ │ ├── pandas_merge.py │ │ ├── sklearn_tree_plot_cost_complexity_pruning.py │ │ ├── sklearn_preprocessing_plot_scaling_importance.py │ │ ├── matplotlib_dash_joinstyle.py │ │ ├── sklearn_compose_plot_feature_union.py │ │ └── pytorch_vision_tensor_transform.py ├── notebook │ ├── .gitignore │ ├── pyproject.toml │ └── test_is_executing.ipynb ├── outputs │ ├── generated │ │ └── .keep │ └── expected │ │ ├── sliced_housing_simple_requirements.txt │ │ ├── sliced_housing_multiple_requirements.txt │ │ ├── sliced_housing_multiple_w_dependencies_requirements.txt │ │ ├── sliced_housing_simple_script_dag.py │ │ ├── sliced_housing_multiple_script_dag.py │ │ ├── sliced_housing_multiple_w_dependencies_script_dag.py │ │ ├── sliced_housing_simple_Dockerfile │ │ ├── sliced_housing_multiple_Dockerfile │ │ ├── sliced_housing_multiple_w_dependencies_Dockerfile │ │ ├── sliced_housing_simple.py │ │ ├── sliced_housing_simple_dag.py │ │ ├── sliced_housing_multiple_dag.py │ │ ├── sliced_housing_multiple_w_dependencies_dag.py │ │ ├── sliced_housing_multiple.py │ │ └── sliced_housing_multiple_w_dependencies.py ├── end_to_end │ ├── import_data │ │ ├── __init__.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── __will_not_import.py │ │ │ ├── __error_on_load.py │ │ │ ├── __no_imported_submodule_prime.py │ │ │ └── __no_imported_submodule.py │ ├── import_with_name_conflict │ │ ├── data.py │ │ └── __init__.py │ ├── __snapshots__ │ │ ├── test_misc │ │ │ ├── TestEndToEnd.test_messy_nodes.1.py │ │ │ ├── TestEndToEnd.test_messy_nodes_slice.py │ │ │ ├── TestEndToEnd.test_housing.py │ │ │ └── TestEndToEnd.test_simple.py │ │ ├── test_literal │ │ │ └── test_ellipsis.py │ │ ├── test_list_comp │ │ │ └── test_returns_value.py │ │ ├── test_op │ │ │ ├── test_sub.py │ │ │ ├── test_invert.py │ │ │ └── test_not.py │ │ ├── test_var_aliasing │ │ │ ├── test_variable_alias.py │ │ │ └── test_alias_by_value.py │ │ ├── test_lambda │ │ │ └── test_lambda_with_primitives.py │ │ └── test_assign_destruc │ │ │ └── test_variable_alias_nested.py │ ├── test_literal.py │ ├── test_list_comp.py │ ├── test_decorator.py │ ├── test_dictionary.py │ ├── test_lists.py │ ├── test_delete.py │ ├── test_dask.py │ ├── test_classdef.py │ ├── test_blackbox_tracing.py │ ├── test_list_slice.py │ ├── test_set.py │ └── test_stack_trace.py ├── simple_data.csv ├── README.md ├── simple.py ├── pyproject.toml ├── tools │ └── print_ast.py ├── housing.py ├── __snapshots__ │ └── test_ipython │ │ ├── test_to_airflow[no_config-module].py │ │ └── test_to_airflow[with_config-module].py ├── test_globals_dict.py └── test_api.py ├── lineapy ├── api │ ├── __init__.py │ └── models │ │ └── __init__.py ├── cli │ └── __init__.py ├── db │ └── __init__.py ├── _alembic │ ├── __init__.py │ ├── versions │ │ ├── __init__.py │ │ └── 41a413504720_add_named_var.py │ ├── README │ └── script.py.mako ├── data │ └── __init__.py ├── editors │ └── __init__.py ├── plugins │ ├── __init__.py │ ├── serializers │ │ └── __init__.py │ ├── jinja_templates │ │ ├── task │ │ │ ├── cwdpickle │ │ │ │ ├── task_ser.jinja │ │ │ │ └── task_deser.jinja │ │ │ ├── parameterizedpickle │ │ │ │ ├── task_ser.jinja │ │ │ │ └── task_deser.jinja │ │ │ ├── tmpdirpickle │ │ │ │ ├── task_deser.jinja │ │ │ │ ├── task_teardown.jinja │ │ │ │ ├── task_setup.jinja │ │ │ │ └── task_ser.jinja │ │ │ └── task_function.jinja │ │ ├── dvc │ │ │ ├── dvc_dag_SingleStageAllSessions.jinja │ │ │ ├── dvc_dag_params.jinja │ │ │ ├── dvc_dag_PythonOperator.jinja │ │ │ ├── dvc_dockerfile.jinja │ │ │ └── dvc_dag_StagePerArtifact.jinja │ │ ├── module │ │ │ ├── session_function.jinja │ │ │ └── module.jinja │ │ ├── script_dockerfile.jinja │ │ ├── ray │ │ │ ├── ray_dag_remote.jinja │ │ │ ├── ray_dag_workflow.jinja │ │ │ ├── ray_dockerfile.jinja │ │ │ └── ray_dag_base.jinja │ │ ├── airflow │ │ │ ├── airflow_dockerfile.jinja │ │ │ └── airflow_dag_PythonOperator.jinja │ │ ├── argo │ │ │ └── argo_dockerfile.jinja │ │ └── kubeflow │ │ │ └── kubeflow_dockerfile.jinja │ ├── loader.py │ └── pipeline_writer_factory.py ├── utils │ ├── __init__.py │ ├── analytics │ │ ├── __init__.py │ │ └── utils.py │ ├── __error_on_load.py │ ├── __no_imported_submodule_prime.py │ ├── version.py │ ├── __no_imported_submodule.py │ ├── validate_annotation_spec.py │ └── migration.py ├── exceptions │ ├── __init__.py │ ├── l_import_error.py │ ├── db_exceptions.py │ ├── flag.py │ └── create_frame.py ├── execution │ └── __init__.py ├── graph_reader │ ├── __init__.py │ └── types.py ├── instrumentation │ └── __init__.py ├── transformer │ ├── __init__.py │ ├── transformer_util.py │ ├── py38_transformer.py │ ├── source_giver.py │ └── py37_transformer.py ├── __main__.py ├── annotations │ ├── external │ │ ├── joblib.annotations.yaml │ │ ├── numpy.annotations.yaml │ │ ├── tensorflow.annotations.yaml │ │ ├── opencv.annotations.yaml │ │ ├── prophet.annotations.yaml │ │ ├── statsforecast.annotations.yaml │ │ ├── pillow.annotations.yaml │ │ ├── torch.annotations.yaml │ │ ├── boto3.annotations.yaml │ │ ├── keras.annotations.yaml │ │ ├── sklearn.annotations.yaml │ │ └── gym.annotations.yaml │ └── internal │ │ ├── pickle.annotations.yaml │ │ ├── tempfile.annotations.yaml │ │ ├── io.annotations.yaml │ │ └── operator.annotations.yaml ├── system_tracing │ ├── function_call.py │ ├── _object_side_effect.py │ ├── __init__.py │ └── exec_and_record_function_calls.py └── visualizer │ ├── README.md │ └── optimize_svg.py ├── docs ├── .gitignore ├── mkdocs │ ├── images │ │ ├── .gitkeep │ │ ├── sample_graph.png │ │ ├── example_graph.png │ │ ├── function_components.png │ │ ├── icon-lineapy-white.png │ │ ├── graph_reader_classes.png │ │ ├── lineapy-square-light.png │ │ └── pipeline-example-diagram.png │ ├── tutorials │ │ ├── .gitkeep │ │ └── README.md │ ├── reference │ │ └── lineapy │ │ │ ├── index.md │ │ │ ├── api │ │ │ ├── index.md │ │ │ ├── api.md │ │ │ ├── api_utils.md │ │ │ ├── models │ │ │ │ ├── index.md │ │ │ │ ├── pipeline.md │ │ │ │ ├── linea_artifact.md │ │ │ │ └── linea_artifact_store.md │ │ │ └── artifact_serializer.md │ │ │ ├── cli │ │ │ ├── index.md │ │ │ └── cli.md │ │ │ ├── db │ │ │ ├── db.md │ │ │ ├── index.md │ │ │ ├── utils.md │ │ │ └── relational.md │ │ │ ├── data │ │ │ ├── index.md │ │ │ ├── graph.md │ │ │ └── types.md │ │ │ ├── utils │ │ │ ├── index.md │ │ │ ├── utils.md │ │ │ ├── config.md │ │ │ ├── version.md │ │ │ ├── benchmarks.md │ │ │ ├── constants.md │ │ │ ├── migration.md │ │ │ ├── analytics │ │ │ │ ├── index.md │ │ │ │ ├── utils.md │ │ │ │ ├── event_schemas.md │ │ │ │ └── usage_tracking.md │ │ │ ├── lineabuiltins.md │ │ │ ├── tree_logger.md │ │ │ ├── __error_on_load.md │ │ │ ├── logging_config.md │ │ │ ├── deprecation_utils.md │ │ │ ├── __no_imported_submodule.md │ │ │ ├── validate_annotation_spec.md │ │ │ └── __no_imported_submodule_prime.md │ │ │ ├── editors │ │ │ ├── index.md │ │ │ ├── ipython.md │ │ │ └── ipython_cell_storage.md │ │ │ ├── plugins │ │ │ ├── index.md │ │ │ ├── loader.md │ │ │ ├── task.md │ │ │ ├── utils.md │ │ │ ├── taskgen.md │ │ │ ├── serializers │ │ │ │ ├── index.md │ │ │ │ └── mlflow_io.md │ │ │ ├── session_writers.md │ │ │ ├── argo_pipeline_writer.md │ │ │ ├── base_pipeline_writer.md │ │ │ ├── dvc_pipeline_writer.md │ │ │ ├── ray_pipeline_writer.md │ │ │ ├── airflow_pipeline_writer.md │ │ │ ├── pipeline_writer_factory.md │ │ │ └── kubeflow_pipeline_writer.md │ │ │ ├── exceptions │ │ │ ├── index.md │ │ │ ├── flag.md │ │ │ ├── excepthook.md │ │ │ ├── create_frame.md │ │ │ ├── db_exceptions.md │ │ │ ├── l_import_error.md │ │ │ └── user_exception.md │ │ │ ├── execution │ │ │ ├── index.md │ │ │ ├── context.md │ │ │ ├── executor.md │ │ │ ├── globals_dict.md │ │ │ ├── side_effects.md │ │ │ └── inspect_function.md │ │ │ ├── transformer │ │ │ ├── index.md │ │ │ ├── source_giver.md │ │ │ ├── transform_code.md │ │ │ ├── base_transformer.md │ │ │ ├── node_transformer.md │ │ │ ├── py37_transformer.md │ │ │ ├── py38_transformer.md │ │ │ ├── transformer_util.md │ │ │ └── conditional_transformer.md │ │ │ ├── visualizer │ │ │ ├── index.md │ │ │ ├── graphviz.md │ │ │ ├── optimize_svg.md │ │ │ └── visual_graph.md │ │ │ ├── graph_reader │ │ │ ├── index.md │ │ │ ├── types.md │ │ │ ├── utils.md │ │ │ ├── graph_printer.md │ │ │ ├── program_slice.md │ │ │ ├── node_collection.md │ │ │ ├── session_artifacts.md │ │ │ └── artifact_collection.md │ │ │ ├── instrumentation │ │ │ ├── index.md │ │ │ ├── tracer.md │ │ │ ├── annotation_spec.md │ │ │ ├── tracer_context.md │ │ │ ├── mutation_tracker.md │ │ │ └── control_flow_tracker.md │ │ │ └── system_tracing │ │ │ ├── index.md │ │ │ ├── _op_stack.md │ │ │ ├── _trace_func.md │ │ │ ├── function_call.md │ │ │ ├── _object_side_effect.md │ │ │ ├── exec_and_record_function_calls.md │ │ │ ├── function_calls_to_side_effects.md │ │ │ ├── _object_side_effects_to_side_effects.md │ │ │ └── _function_calls_to_object_side_effects.md │ ├── guides │ │ ├── contributing │ │ │ ├── areas │ │ │ │ └── add-test.md │ │ │ └── tips.md │ │ └── support.md │ └── concepts │ │ ├── artifact.md │ │ ├── pipeline.md │ │ └── artifact-store.md ├── requirements.txt ├── overrides │ └── main.html └── gen_ref_pages.py ├── examples ├── .gitignore ├── self-hosting-lineapy │ ├── airflow │ │ ├── requirements.txt │ │ ├── webserver_config.py │ │ ├── airflow-start.sh │ │ ├── airflow.cfg │ │ └── Dockerfile │ ├── lineapy-notebook │ │ ├── requirements.txt │ │ ├── verify_environment.py │ │ ├── lineapy_config.json │ │ ├── notebook-start.sh │ │ └── Dockerfile │ ├── .env │ └── .gitignore ├── use_cases │ ├── creating_reusable_components │ │ └── .gitignore │ ├── discover_and_trace_past_work │ │ ├── README.md │ │ └── artifact_store.zip │ └── README.md ├── .gitattributes ├── tutorials │ └── README.md └── README.md ├── test_pipeline_dvc_req.txt ├── test_pipeline_airflow_req.txt ├── .colab ├── creating_reusable_components │ └── .gitignore ├── discover_and_trace_past_work │ ├── README.md │ └── artifact_store.zip └── README.md ├── jupyterlab-workspaces ├── .gitignore ├── README.md └── default-37a8.jupyterlab-workspace ├── test_pipeline_ray_req.txt ├── .dockerignore ├── ports.png ├── MANIFEST.in ├── .gitattributes ├── Dockerfile-airflow ├── .github ├── ISSUE_TEMPLATE │ ├── eng_item.md │ ├── feature_request.md │ └── bug_report.md ├── pull_request_template.md └── workflows │ └── publish.yml ├── HISTORY.md ├── airflow_webserver_config.py ├── CONTRIBUTING.md ├── .devcontainer └── start.sh ├── .cspell └── custom-dictionary-workspace.txt ├── conftest.py ├── .flake8 ├── requirements.txt ├── Dockerfile ├── PERFORMANCE.md ├── docker-compose.yml ├── pyproject.toml └── .pre-commit-config.yaml /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lineapy/api/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lineapy/cli/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lineapy/db/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/unit/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | site/ 2 | -------------------------------------------------------------------------------- /docs/mkdocs/images/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lineapy/_alembic/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lineapy/data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lineapy/editors/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lineapy/plugins/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lineapy/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/mkdocs/tutorials/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lineapy/api/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lineapy/exceptions/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lineapy/execution/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lineapy/graph_reader/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/integration/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/notebook/.gitignore: -------------------------------------------------------------------------------- 1 | dag.py -------------------------------------------------------------------------------- /tests/outputs/generated/.keep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/unit/plugins/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/.gitignore: -------------------------------------------------------------------------------- 1 | outputs 2 | deem -------------------------------------------------------------------------------- /lineapy/_alembic/versions/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lineapy/instrumentation/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lineapy/utils/analytics/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test_pipeline_dvc_req.txt: -------------------------------------------------------------------------------- 1 | dvc==2.38.1 -------------------------------------------------------------------------------- /tests/integration/.gitignore: -------------------------------------------------------------------------------- 1 | envs 2 | -------------------------------------------------------------------------------- /lineapy/plugins/serializers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/end_to_end/import_data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/end_to_end/import_data/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test_pipeline_airflow_req.txt: -------------------------------------------------------------------------------- 1 | apache-airflow==2.2.4 -------------------------------------------------------------------------------- /tests/unit/plugins/framework_specific/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.colab/creating_reusable_components/.gitignore: -------------------------------------------------------------------------------- 1 | !data/*.csv -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/index.md: -------------------------------------------------------------------------------- 1 | ::: lineapy 2 | -------------------------------------------------------------------------------- /examples/self-hosting-lineapy/airflow/requirements.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /jupyterlab-workspaces/.gitignore: -------------------------------------------------------------------------------- 1 | *.jupyterlab-workspace -------------------------------------------------------------------------------- /test_pipeline_ray_req.txt: -------------------------------------------------------------------------------- 1 | ray==2.2.0 2 | ray[data] 3 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/api/index.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.api 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/cli/index.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.cli 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/db/db.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.db.db 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/db/index.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.db 2 | -------------------------------------------------------------------------------- /examples/self-hosting-lineapy/lineapy-notebook/requirements.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/integration/sources/matplotlib-tutorial/figures/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/api/api.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.api.api 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/cli/cli.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.cli.cli 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/data/index.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.data 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/db/utils.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.db.utils 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/utils/index.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.utils 2 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | tests/integration 3 | .git 4 | *housing.py -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/data/graph.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.data.graph 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/data/types.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.data.types 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/editors/index.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.editors 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/plugins/index.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.plugins 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/utils/utils.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.utils.utils 2 | -------------------------------------------------------------------------------- /examples/use_cases/creating_reusable_components/.gitignore: -------------------------------------------------------------------------------- 1 | !data/*.csv -------------------------------------------------------------------------------- /ports.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LineaLabs/lineapy/HEAD/ports.png -------------------------------------------------------------------------------- /tests/end_to_end/import_data/utils/__will_not_import.py: -------------------------------------------------------------------------------- 1 | some_var = 1 2 | -------------------------------------------------------------------------------- /tests/simple_data.csv: -------------------------------------------------------------------------------- 1 | a,b 2 | 1,2 3 | 3,4 4 | 5,6 5 | 7,8 6 | 9,10 7 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/api/api_utils.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.api.api_utils 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/api/models/index.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.api.models 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/db/relational.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.db.relational 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/exceptions/index.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.exceptions 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/execution/index.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.execution 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/plugins/loader.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.plugins.loader 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/plugins/task.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.plugins.task 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/plugins/utils.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.plugins.utils 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/transformer/index.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.transformer 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/utils/config.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.utils.config 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/utils/version.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.utils.version 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/visualizer/index.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.visualizer 2 | -------------------------------------------------------------------------------- /lineapy/transformer/__init__.py: -------------------------------------------------------------------------------- 1 | # TODO: copy from `lineapy_experimental` 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/editors/ipython.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.editors.ipython 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/exceptions/flag.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.exceptions.flag 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/graph_reader/index.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.graph_reader 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/plugins/taskgen.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.plugins.taskgen 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/utils/benchmarks.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.utils.benchmarks 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/utils/constants.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.utils.constants 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/utils/migration.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.utils.migration 2 | -------------------------------------------------------------------------------- /examples/self-hosting-lineapy/airflow/webserver_config.py: -------------------------------------------------------------------------------- 1 | AUTH_ROLE_PUBLIC = "Admin" -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/api/models/pipeline.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.api.models.pipeline 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/execution/context.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.execution.context 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/execution/executor.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.execution.executor 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/graph_reader/types.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.graph_reader.types 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/graph_reader/utils.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.graph_reader.utils 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/instrumentation/index.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.instrumentation 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/system_tracing/index.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.system_tracing 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/utils/analytics/index.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.utils.analytics 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/utils/lineabuiltins.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.utils.lineabuiltins 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/utils/tree_logger.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.utils.tree_logger 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/visualizer/graphviz.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.visualizer.graphviz 2 | -------------------------------------------------------------------------------- /lineapy/exceptions/l_import_error.py: -------------------------------------------------------------------------------- 1 | class LImportError(Exception): 2 | pass 3 | -------------------------------------------------------------------------------- /tests/end_to_end/import_with_name_conflict/data.py: -------------------------------------------------------------------------------- 1 | import_with_name_conflict = 1 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/exceptions/excepthook.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.exceptions.excepthook 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/utils/__error_on_load.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.utils.__error_on_load 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/utils/analytics/utils.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.utils.analytics.utils 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/utils/logging_config.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.utils.logging_config 2 | -------------------------------------------------------------------------------- /examples/.gitattributes: -------------------------------------------------------------------------------- 1 | examples/data/diabetes.csv filter=lfs diff=lfs merge=lfs -text 2 | -------------------------------------------------------------------------------- /lineapy/exceptions/db_exceptions.py: -------------------------------------------------------------------------------- 1 | class ArtifactSaveException(Exception): 2 | pass 3 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/api/artifact_serializer.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.api.artifact_serializer 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/exceptions/create_frame.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.exceptions.create_frame 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/exceptions/db_exceptions.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.exceptions.db_exceptions 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/execution/globals_dict.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.execution.globals_dict 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/execution/side_effects.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.execution.side_effects 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/instrumentation/tracer.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.instrumentation.tracer 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/plugins/serializers/index.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.plugins.serializers 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/plugins/session_writers.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.plugins.session_writers 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/system_tracing/_op_stack.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.system_tracing._op_stack 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/transformer/source_giver.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.transformer.source_giver 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/utils/deprecation_utils.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.utils.deprecation_utils 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/visualizer/optimize_svg.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.visualizer.optimize_svg 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/visualizer/visual_graph.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.visualizer.visual_graph 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/api/models/linea_artifact.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.api.models.linea_artifact 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/exceptions/l_import_error.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.exceptions.l_import_error 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/exceptions/user_exception.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.exceptions.user_exception 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/execution/inspect_function.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.execution.inspect_function 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/graph_reader/graph_printer.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.graph_reader.graph_printer 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/graph_reader/program_slice.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.graph_reader.program_slice 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/system_tracing/_trace_func.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.system_tracing._trace_func 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/transformer/transform_code.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.transformer.transform_code 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/editors/ipython_cell_storage.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.editors.ipython_cell_storage 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/graph_reader/node_collection.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.graph_reader.node_collection 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/plugins/argo_pipeline_writer.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.plugins.argo_pipeline_writer 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/plugins/base_pipeline_writer.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.plugins.base_pipeline_writer 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/plugins/dvc_pipeline_writer.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.plugins.dvc_pipeline_writer 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/plugins/ray_pipeline_writer.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.plugins.ray_pipeline_writer 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/plugins/serializers/mlflow_io.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.plugins.serializers.mlflow_io 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/system_tracing/function_call.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.system_tracing.function_call 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/transformer/base_transformer.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.transformer.base_transformer 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/transformer/node_transformer.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.transformer.node_transformer 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/transformer/py37_transformer.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.transformer.py37_transformer 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/transformer/py38_transformer.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.transformer.py38_transformer 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/transformer/transformer_util.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.transformer.transformer_util 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/utils/__no_imported_submodule.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.utils.__no_imported_submodule 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/utils/analytics/event_schemas.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.utils.analytics.event_schemas 2 | -------------------------------------------------------------------------------- /examples/self-hosting-lineapy/.env: -------------------------------------------------------------------------------- 1 | AIRFLOW_PORT=8080 2 | MINIO_CONSOLE_PORT=9001 3 | JUPYTER_PORT=8888 4 | -------------------------------------------------------------------------------- /examples/self-hosting-lineapy/.gitignore: -------------------------------------------------------------------------------- 1 | # Jupyter Notebook 2 | .ipynb_checkpoints 3 | !examples/data/*.csv -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/api/models/linea_artifact_store.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.api.models.linea_artifact_store 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/graph_reader/session_artifacts.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.graph_reader.session_artifacts 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/instrumentation/annotation_spec.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.instrumentation.annotation_spec 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/instrumentation/tracer_context.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.instrumentation.tracer_context 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/plugins/airflow_pipeline_writer.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.plugins.airflow_pipeline_writer 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/plugins/pipeline_writer_factory.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.plugins.pipeline_writer_factory 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/utils/analytics/usage_tracking.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.utils.analytics.usage_tracking 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/utils/validate_annotation_spec.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.utils.validate_annotation_spec 2 | -------------------------------------------------------------------------------- /lineapy/__main__.py: -------------------------------------------------------------------------------- 1 | from lineapy.cli.cli import python 2 | 3 | if __name__ == "__main__": 4 | python() 5 | -------------------------------------------------------------------------------- /lineapy/exceptions/flag.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | REWRITE_EXCEPTIONS = "LINEA_NO_EXCEPTIONS" not in os.environ 4 | -------------------------------------------------------------------------------- /tests/README.md: -------------------------------------------------------------------------------- 1 | # lineapy Tests 2 | 3 | Please review the test section in [Contributing](/CONTRIBUTING.md). 4 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/graph_reader/artifact_collection.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.graph_reader.artifact_collection 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/instrumentation/mutation_tracker.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.instrumentation.mutation_tracker 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/plugins/kubeflow_pipeline_writer.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.plugins.kubeflow_pipeline_writer 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/system_tracing/_object_side_effect.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.system_tracing._object_side_effect 2 | -------------------------------------------------------------------------------- /docs/mkdocs/images/sample_graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LineaLabs/lineapy/HEAD/docs/mkdocs/images/sample_graph.png -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/instrumentation/control_flow_tracker.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.instrumentation.control_flow_tracker 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/transformer/conditional_transformer.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.transformer.conditional_transformer 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/utils/__no_imported_submodule_prime.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.utils.__no_imported_submodule_prime 2 | -------------------------------------------------------------------------------- /lineapy/utils/__error_on_load.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module that raises an error on loading, for testing 3 | """ 4 | 5 | 1 / 0 6 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include lineapy *.jinja 2 | recursive-include lineapy *.annotations.yaml 3 | include lineapy/alembic.ini -------------------------------------------------------------------------------- /docs/mkdocs/images/example_graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LineaLabs/lineapy/HEAD/docs/mkdocs/images/example_graph.png -------------------------------------------------------------------------------- /examples/self-hosting-lineapy/airflow/airflow-start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | pip install -r /requirements.txt 3 | airflow standalone -------------------------------------------------------------------------------- /tests/end_to_end/__snapshots__/test_misc/TestEndToEnd.test_messy_nodes.1.py: -------------------------------------------------------------------------------- 1 | a = 1 2 | b = a + 2 3 | c = 2 4 | f = a * b * c 5 | -------------------------------------------------------------------------------- /docs/mkdocs/guides/contributing/areas/add-test.md: -------------------------------------------------------------------------------- 1 | # Adding tests 2 | 3 | [COMING SOON] 4 | [//]: # (TODO: LIN-742) 5 | 6 | 7 | -------------------------------------------------------------------------------- /tests/end_to_end/__snapshots__/test_misc/TestEndToEnd.test_messy_nodes_slice.py: -------------------------------------------------------------------------------- 1 | a = 1 2 | b = a + 2 3 | c = 2 4 | f = a * b * c 5 | -------------------------------------------------------------------------------- /docs/mkdocs/images/function_components.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LineaLabs/lineapy/HEAD/docs/mkdocs/images/function_components.png -------------------------------------------------------------------------------- /docs/mkdocs/images/icon-lineapy-white.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LineaLabs/lineapy/HEAD/docs/mkdocs/images/icon-lineapy-white.png -------------------------------------------------------------------------------- /lineapy/plugins/jinja_templates/task/cwdpickle/task_ser.jinja: -------------------------------------------------------------------------------- 1 | pickle.dump({{return_variable}}, open('{{return_variable}}.pickle','wb')) -------------------------------------------------------------------------------- /docs/mkdocs/images/graph_reader_classes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LineaLabs/lineapy/HEAD/docs/mkdocs/images/graph_reader_classes.png -------------------------------------------------------------------------------- /docs/mkdocs/images/lineapy-square-light.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LineaLabs/lineapy/HEAD/docs/mkdocs/images/lineapy-square-light.png -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/system_tracing/exec_and_record_function_calls.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.system_tracing.exec_and_record_function_calls 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/system_tracing/function_calls_to_side_effects.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.system_tracing.function_calls_to_side_effects 2 | -------------------------------------------------------------------------------- /lineapy/plugins/jinja_templates/dvc/dvc_dag_SingleStageAllSessions.jinja: -------------------------------------------------------------------------------- 1 | stages: 2 | run_all_sessions: 3 | cmd: {{MODULE_COMMAND}} 4 | -------------------------------------------------------------------------------- /tests/end_to_end/import_data/utils/__error_on_load.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module that raises an error on loading, for testing 3 | """ 4 | 5 | 1 / 0 6 | -------------------------------------------------------------------------------- /docs/mkdocs/images/pipeline-example-diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LineaLabs/lineapy/HEAD/docs/mkdocs/images/pipeline-example-diagram.png -------------------------------------------------------------------------------- /tests/simple.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | assets = pd.read_csv("ames_train_cleaned.csv") 4 | assets["is_new"] = assets["Year_Built"] > 1970 5 | -------------------------------------------------------------------------------- /.colab/discover_and_trace_past_work/README.md: -------------------------------------------------------------------------------- 1 | When you need to recreate the artifact store (both db and pickle files), use the `demo_setup.ipynb`. 2 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/system_tracing/_object_side_effects_to_side_effects.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.system_tracing._object_side_effects_to_side_effects 2 | -------------------------------------------------------------------------------- /lineapy/plugins/jinja_templates/task/cwdpickle/task_deser.jinja: -------------------------------------------------------------------------------- 1 | {{loaded_input_variable}} = pickle.load(open('{{loaded_input_variable}}.pickle','rb')) -------------------------------------------------------------------------------- /lineapy/plugins/jinja_templates/task/parameterizedpickle/task_ser.jinja: -------------------------------------------------------------------------------- 1 | pickle.dump({{return_variable}}, open(variable_{{return_variable}}_path,'wb')) -------------------------------------------------------------------------------- /tests/end_to_end/import_with_name_conflict/__init__.py: -------------------------------------------------------------------------------- 1 | from .data import import_with_name_conflict 2 | 3 | __all__ = ["import_with_name_conflict"] 4 | -------------------------------------------------------------------------------- /tests/outputs/expected/sliced_housing_simple_requirements.txt: -------------------------------------------------------------------------------- 1 | seaborn==0.11.2 2 | pandas==1.3.5 3 | altair==4.2.0 4 | lineapy 5 | scikit-learn==1.0.2 6 | -------------------------------------------------------------------------------- /docs/mkdocs/reference/lineapy/system_tracing/_function_calls_to_object_side_effects.md: -------------------------------------------------------------------------------- 1 | ::: lineapy.system_tracing._function_calls_to_object_side_effects 2 | -------------------------------------------------------------------------------- /tests/outputs/expected/sliced_housing_multiple_requirements.txt: -------------------------------------------------------------------------------- 1 | seaborn==0.11.2 2 | lineapy 3 | pandas==1.3.5 4 | scikit-learn==1.0.2 5 | altair==4.2.0 6 | -------------------------------------------------------------------------------- /.colab/discover_and_trace_past_work/artifact_store.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LineaLabs/lineapy/HEAD/.colab/discover_and_trace_past_work/artifact_store.zip -------------------------------------------------------------------------------- /examples/use_cases/discover_and_trace_past_work/README.md: -------------------------------------------------------------------------------- 1 | When you need to recreate the artifact store (both db and pickle files), use the `demo_setup.ipynb`. 2 | -------------------------------------------------------------------------------- /tests/unit/graph_reader/inputs/simple_twovar: -------------------------------------------------------------------------------- 1 | import lineapy 2 | 3 | art = dict() 4 | p = "p" 5 | n = 5 6 | pn = p * n 7 | art["pn"] = lineapy.save(pn, "pn") 8 | -------------------------------------------------------------------------------- /lineapy/plugins/jinja_templates/task/parameterizedpickle/task_deser.jinja: -------------------------------------------------------------------------------- 1 | {{loaded_input_variable}} = pickle.load(open(variable_{{loaded_input_variable}}_path,'rb')) -------------------------------------------------------------------------------- /tests/end_to_end/test_literal.py: -------------------------------------------------------------------------------- 1 | def test_ellipsis(execute): 2 | code = """x = ... 3 | """ 4 | res = execute(code) 5 | assert res.values["x"] == ... 6 | -------------------------------------------------------------------------------- /tests/outputs/expected/sliced_housing_multiple_w_dependencies_requirements.txt: -------------------------------------------------------------------------------- 1 | scikit-learn==1.0.2 2 | pandas==1.3.5 3 | lineapy 4 | altair==4.2.0 5 | seaborn==0.11.2 6 | -------------------------------------------------------------------------------- /examples/self-hosting-lineapy/airflow/airflow.cfg: -------------------------------------------------------------------------------- 1 | [webserver] 2 | expose_config: True 3 | 4 | [scheduler] 5 | min_file_process_interval: 10 6 | dag_dir_list_interval: 10 7 | -------------------------------------------------------------------------------- /tests/unit/graph_reader/inputs/simple: -------------------------------------------------------------------------------- 1 | import lineapy 2 | 3 | art = {} 4 | b0 = 0 5 | art["b0"] = lineapy.save(b0, "b0") 6 | a = b0 + 1 7 | art["a"] = lineapy.save(a, "a") 8 | -------------------------------------------------------------------------------- /lineapy/plugins/jinja_templates/dvc/dvc_dag_params.jinja: -------------------------------------------------------------------------------- 1 | {% for var_name, var_value in input_parameters_dict.items() -%} 2 | {{var_name}}: {{var_value}} 3 | {% endfor -%} 4 | 5 | -------------------------------------------------------------------------------- /tests/unit/plugins/expected/script_complex_h/sample_output/f.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LineaLabs/lineapy/HEAD/tests/unit/plugins/expected/script_complex_h/sample_output/f.pkl -------------------------------------------------------------------------------- /tests/unit/plugins/expected/script_complex_h/sample_output/h.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LineaLabs/lineapy/HEAD/tests/unit/plugins/expected/script_complex_h/sample_output/h.pkl -------------------------------------------------------------------------------- /examples/use_cases/discover_and_trace_past_work/artifact_store.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LineaLabs/lineapy/HEAD/examples/use_cases/discover_and_trace_past_work/artifact_store.zip -------------------------------------------------------------------------------- /lineapy/plugins/jinja_templates/task/tmpdirpickle/task_deser.jinja: -------------------------------------------------------------------------------- 1 | {{loaded_input_variable}} = pickle.load(open('/tmp/{{pipeline_name}}/variable_{{loaded_input_variable}}.pickle','rb')) -------------------------------------------------------------------------------- /tests/outputs/expected/sliced_housing_simple_script_dag.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import sliced_housing_simple 4 | 5 | if __name__ == "__main__": 6 | 7 | sliced_housing_simple.p_value() 8 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | *.sqlite filter=lfs diff=lfs merge=lfs -text 2 | examples/data/mushroom.csv filter=lfs diff=lfs merge=lfs -text 3 | examples/data/diabetes.csv filter=lfs diff=lfs merge=lfs -text 4 | -------------------------------------------------------------------------------- /lineapy/_alembic/README: -------------------------------------------------------------------------------- 1 | This directory contains database migration scripts, which can be found in the `versions` directory. 2 | For more information, see `https://alembic.sqlalchemy.org/en/latest/`. -------------------------------------------------------------------------------- /lineapy/plugins/jinja_templates/task/tmpdirpickle/task_teardown.jinja: -------------------------------------------------------------------------------- 1 | pickle_files = pathlib.Path('/tmp').joinpath('{{pipeline_name}}').glob('*.pickle') 2 | for f in pickle_files: 3 | f.unlink() -------------------------------------------------------------------------------- /lineapy/utils/__no_imported_submodule_prime.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file exists for testing to make sure we can differentiate between imports of different submodules 3 | """ 4 | 5 | is_prime = True 6 | -------------------------------------------------------------------------------- /lineapy/utils/version.py: -------------------------------------------------------------------------------- 1 | # This file contains the package version for Lineapy 2 | # all other references to the package version should read 3 | # from this file. 4 | 5 | __version__ = "0.2.3" 6 | -------------------------------------------------------------------------------- /lineapy/plugins/jinja_templates/task/tmpdirpickle/task_setup.jinja: -------------------------------------------------------------------------------- 1 | pickle_folder = pathlib.Path('/tmp').joinpath('{{pipeline_name}}') 2 | if not pickle_folder.exists(): 3 | pickle_folder.mkdir() 4 | -------------------------------------------------------------------------------- /tests/unit/graph_reader/inputs/mutate_after_save: -------------------------------------------------------------------------------- 1 | import lineapy 2 | 3 | art = {} 4 | a = [1] 5 | art["a"] = lineapy.save(a, "a") 6 | a.append(2) 7 | b = a[-1] + 1 8 | art["b"] = lineapy.save(b, "b") 9 | -------------------------------------------------------------------------------- /tests/unit/graph_reader/inputs/extract_common: -------------------------------------------------------------------------------- 1 | import lineapy 2 | 3 | art = {} 4 | a = 1 5 | a += 1 6 | b = a + 1 7 | art["b"] = lineapy.save(b, "b") 8 | c = a + 2 9 | art["c"] = lineapy.save(c, "c") 10 | -------------------------------------------------------------------------------- /Dockerfile-airflow: -------------------------------------------------------------------------------- 1 | ARG IMAGE_NAME=ghcr.io/linealabs/lineapy:main 2 | FROM $IMAGE_NAME 3 | 4 | RUN pip install apache-airflow==2.2.0 5 | RUN airflow db init 6 | 7 | COPY . . 8 | 9 | CMD [ "airflow"] 10 | -------------------------------------------------------------------------------- /tests/end_to_end/import_data/utils/__no_imported_submodule_prime.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file exists for testing to make sure we can differentiate between imports of different submodules 3 | """ 4 | 5 | is_prime = True 6 | -------------------------------------------------------------------------------- /tests/unit/plugins/expected/script_pipeline_a0_b0_dependencies/sample_output/a0.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LineaLabs/lineapy/HEAD/tests/unit/plugins/expected/script_pipeline_a0_b0_dependencies/sample_output/a0.pkl -------------------------------------------------------------------------------- /tests/unit/plugins/expected/script_pipeline_a0_b0_dependencies/sample_output/b0.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LineaLabs/lineapy/HEAD/tests/unit/plugins/expected/script_pipeline_a0_b0_dependencies/sample_output/b0.pkl -------------------------------------------------------------------------------- /lineapy/utils/__no_imported_submodule.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file exists to represent a module that was not loaded in the parent module, utils, so that we can 3 | test importing it with Linea. 4 | """ 5 | 6 | is_prime = False 7 | -------------------------------------------------------------------------------- /tests/unit/plugins/expected/script_pipeline_housing_w_dependencies/sample_output/y.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LineaLabs/lineapy/HEAD/tests/unit/plugins/expected/script_pipeline_housing_w_dependencies/sample_output/y.pkl -------------------------------------------------------------------------------- /tests/unit/plugins/expected/script_pipeline_housing_w_dependencies/sample_output/p_value.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LineaLabs/lineapy/HEAD/tests/unit/plugins/expected/script_pipeline_housing_w_dependencies/sample_output/p_value.pkl -------------------------------------------------------------------------------- /lineapy/annotations/external/joblib.annotations.yaml: -------------------------------------------------------------------------------- 1 | - module: joblib 2 | annotations: 3 | - criteria: 4 | function_name: dump 5 | side_effects: 6 | - mutated_value: 7 | external_state: file_system 8 | -------------------------------------------------------------------------------- /tests/end_to_end/import_data/utils/__no_imported_submodule.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file exists to represent a module that was not loaded in the parent module, utils, so that we can 3 | test importing it with Linea. 4 | """ 5 | 6 | is_prime = False 7 | -------------------------------------------------------------------------------- /tests/outputs/expected/sliced_housing_multiple_script_dag.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import sliced_housing_multiple 4 | 5 | if __name__ == "__main__": 6 | 7 | sliced_housing_multiple.y() 8 | 9 | sliced_housing_multiple.p_value() 10 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/eng_item.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Eng item 3 | about: new engineering TODO for Linea devs 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **What**: 11 | 12 | **Why**: 13 | 14 | **Possible Approaches**: 15 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | mkdocs==1.4.2 2 | mkdocs-material==8.5.11 3 | mkdocstrings==0.19.1 4 | mkdocstrings-python==0.8.2 5 | mkdocs-jupyter==0.22.0 6 | mkdocs-gen-files==0.4.0 7 | mkdocs-literate-nav==0.6.0 8 | mkdocs-section-index==0.3.4 9 | mike==1.1.2 10 | -------------------------------------------------------------------------------- /tests/pyproject.toml: -------------------------------------------------------------------------------- 1 | # Add coverage file to this directory, so that tests run in a subprocess 2 | # in this directory will find it and also use branch data. 3 | # Otherwise this will break when coverage tries to combine files. 4 | [tool.coverage.run] 5 | branch = true -------------------------------------------------------------------------------- /HISTORY.md: -------------------------------------------------------------------------------- 1 | ## 0.1.5 2 | 3 | * DB schema has been updated as per https://github.com/LineaLabs/lineapy/pull/702. For compatibility, users are asked to delete and recreate `.lineapy` folder. This crude resolution shall be replaced by a more systematic DB migration process. 4 | -------------------------------------------------------------------------------- /airflow_webserver_config.py: -------------------------------------------------------------------------------- 1 | # turn off auth https://airflow.apache.org/docs/apache-airflow/stable/security/webserver.html#web-authentication 2 | AUTH_ROLE_PUBLIC = "Admin" 3 | # Turn off CSRF so we can submit froms from another URL on codespaces 4 | WTF_CSRF_ENABLED = False 5 | -------------------------------------------------------------------------------- /tests/notebook/pyproject.toml: -------------------------------------------------------------------------------- 1 | # Add coverage file to this directory, so that tests run in a subprocess 2 | # in this directory will find it and also use branch data. 3 | # Otherwise this will break when coverage tries to combine files. 4 | [tool.coverage.run] 5 | branch = true -------------------------------------------------------------------------------- /lineapy/annotations/internal/pickle.annotations.yaml: -------------------------------------------------------------------------------- 1 | - module: pickle 2 | annotations: 3 | - criteria: 4 | function_name: dump # Note: `load` doesn't need annotation 5 | side_effects: 6 | - mutated_value: 7 | positional_argument_index: 1 8 | -------------------------------------------------------------------------------- /tests/unit/graph_reader/inputs/module_import: -------------------------------------------------------------------------------- 1 | import pandas 2 | 3 | import lineapy 4 | 5 | art = {} 6 | 7 | df = pandas.DataFrame({"a": [1, 2]}) 8 | art["df"] = lineapy.save(df, "df") 9 | 10 | df2 = pandas.concat([df, df]) 11 | art["df2"] = lineapy.save(df2, "df2") 12 | -------------------------------------------------------------------------------- /tests/unit/graph_reader/inputs/module_import_alias: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | import lineapy 4 | 5 | art = {} 6 | 7 | df = pd.DataFrame({"a": [1, 2]}) 8 | art["df"] = lineapy.save(df, "df") 9 | 10 | df2 = pd.concat([df, df]) 11 | art["df2"] = lineapy.save(df2, "df2") 12 | -------------------------------------------------------------------------------- /lineapy/annotations/external/numpy.annotations.yaml: -------------------------------------------------------------------------------- 1 | - module: numpy 2 | annotations: 3 | - criteria: 4 | function_names: 5 | - savetxt 6 | - savez 7 | side_effects: 8 | - mutated_value: 9 | external_state: file_system 10 | -------------------------------------------------------------------------------- /lineapy/plugins/jinja_templates/task/tmpdirpickle/task_ser.jinja: -------------------------------------------------------------------------------- 1 | if not pathlib.Path('/tmp').joinpath('{{pipeline_name}}').exists(): pathlib.Path('/tmp').joinpath('{{pipeline_name}}').mkdir() 2 | pickle.dump({{return_variable}}, open('/tmp/{{pipeline_name}}/variable_{{return_variable}}.pickle','wb')) -------------------------------------------------------------------------------- /tests/outputs/expected/sliced_housing_multiple_w_dependencies_script_dag.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import sliced_housing_multiple_w_dependencies 4 | 5 | if __name__ == "__main__": 6 | 7 | sliced_housing_multiple_w_dependencies.p_value() 8 | 9 | sliced_housing_multiple_w_dependencies.y() 10 | -------------------------------------------------------------------------------- /tests/unit/graph_reader/inputs/linear: -------------------------------------------------------------------------------- 1 | import lineapy 2 | 3 | linear_first = 1 4 | linear_second = linear_first + 1 5 | linear_third = linear_second + linear_first 6 | lineapy.save(linear_first, "linear_first") 7 | lineapy.save(linear_second, "linear_second") 8 | lineapy.save(linear_third, "linear_third") 9 | -------------------------------------------------------------------------------- /lineapy/annotations/external/tensorflow.annotations.yaml: -------------------------------------------------------------------------------- 1 | - module: tensorflow.keras.utils 2 | annotations: 3 | - criteria: 4 | function_name: get_file 5 | side_effects: 6 | - dependency: 7 | external_state: file_system 8 | - mutated_value: 9 | external_state: file_system 10 | -------------------------------------------------------------------------------- /tests/unit/system_tracing/test_op_stack.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | 3 | import pytest 4 | 5 | from lineapy.system_tracing._op_stack import OpStack 6 | 7 | 8 | def test_stack_access(): 9 | f = inspect.currentframe() 10 | assert f 11 | op_stack = OpStack(f) 12 | with pytest.raises(IndexError): 13 | op_stack[-1000] 14 | -------------------------------------------------------------------------------- /jupyterlab-workspaces/README.md: -------------------------------------------------------------------------------- 1 | Create a directory for jupyterlab workspaces, so we can use a default one in the repo. 2 | 3 | I am not sure what's up with the magic string `37a8` in the name. If I change it, 4 | JupyterLab won't recognize the default. 5 | 6 | Anytime you run jupyterlab, it will re-save this file. Before committing, reformat 7 | it. 8 | -------------------------------------------------------------------------------- /lineapy/annotations/internal/tempfile.annotations.yaml: -------------------------------------------------------------------------------- 1 | - module: tempfile 2 | annotations: 3 | - criteria: 4 | function_name: TemporaryFile 5 | side_effects: 6 | - mutated_value: 7 | external_state: file_system 8 | - views: 9 | - result: RESULT 10 | - external_state: file_system 11 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | Thank you for your interest in contributing to LineaPy! We believe that it is the community that makes an open source project truly great and successful, so we welcome contribution from any new members. 2 | 3 | Please check out the project [documentation](https://docs.lineapy.org/latest/guides/contributing/process/) to learn how you can contribute! 4 | -------------------------------------------------------------------------------- /lineapy/annotations/external/opencv.annotations.yaml: -------------------------------------------------------------------------------- 1 | - module: cv2 2 | annotations: 3 | - criteria: 4 | class_instance: Algorithm 5 | class_method_name: train 6 | side_effects: 7 | - mutated_value: 8 | self_ref: SELF_REF 9 | - views: 10 | - self_ref: SELF_REF 11 | - result: RESULT -------------------------------------------------------------------------------- /lineapy/annotations/external/prophet.annotations.yaml: -------------------------------------------------------------------------------- 1 | - module: prophet 2 | annotations: 3 | - criteria: 4 | class_instance: Prophet 5 | class_method_name: fit 6 | side_effects: 7 | - mutated_value: 8 | self_ref: SELF_REF 9 | - views: 10 | - self_ref: SELF_REF 11 | - result: RESULT -------------------------------------------------------------------------------- /lineapy/transformer/transformer_util.py: -------------------------------------------------------------------------------- 1 | import ast 2 | from typing import Dict, List 3 | 4 | """ 5 | AST synthesizers used by node_transformers 6 | """ 7 | 8 | 9 | def create_lib_attributes(names: List[ast.alias]) -> Dict[str, str]: 10 | return { 11 | alias.asname if alias.asname else alias.name: alias.name 12 | for alias in names 13 | } 14 | -------------------------------------------------------------------------------- /lineapy/annotations/external/statsforecast.annotations.yaml: -------------------------------------------------------------------------------- 1 | - module: statsforecast.models 2 | annotations: 3 | - criteria: 4 | class_instance: _TS 5 | class_method_name: fit 6 | side_effects: 7 | - mutated_value: 8 | self_ref: SELF_REF 9 | - views: 10 | - self_ref: SELF_REF 11 | - result: RESULT -------------------------------------------------------------------------------- /docs/mkdocs/guides/support.md: -------------------------------------------------------------------------------- 1 | # More Help 2 | 3 | ## Community 4 | 5 | The quickest way to get support for your unresolved issue is to join our [community on Slack](https://join.slack.com/t/lineacommunity/shared_invite/zt-18kizfn3b-1Qu_HDT3ahGudnAwoFAw9Q). 6 | You can post your issue on the ``#support`` channel, and it will be answered promptly. We are always happy and ready to help you! 7 | -------------------------------------------------------------------------------- /tests/unit/plugins/test_task.py: -------------------------------------------------------------------------------- 1 | from lineapy.plugins.task import TaskGraph 2 | 3 | 4 | def test_task_graph(): 5 | g = TaskGraph( 6 | ["a", "b", "c"], 7 | {"c": {"a", "b"}}, 8 | ) 9 | g = g.remap_nodes({"a": "a_p", "b": "b_p", "c": "c_p"}) 10 | expected_orders = [["a_p", "b_p", "c_p"], ["b_p", "a_p", "c_p"]] 11 | assert g.get_taskorder() in expected_orders 12 | -------------------------------------------------------------------------------- /examples/self-hosting-lineapy/lineapy-notebook/verify_environment.py: -------------------------------------------------------------------------------- 1 | import lineapy 2 | 3 | assert(lineapy.options.get('database_url') == 'postgresql://lineapy:lineapypassword@postgres-lineapy:5432/lineapy_artifact_store') 4 | assert(lineapy.options.get('artifact_storage_dir') == 's3://lineapy-artifact-store') 5 | assert(lineapy.options.get('storage_options') is not None) 6 | 7 | print("Lineapy configuration verified.") -------------------------------------------------------------------------------- /tests/integration/__snapshots__/test_slice/test_slice[pandas_timeseries].py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | url = "https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/09_Time_Series/Apple_Stock/appl_1980_2014.csv" 4 | apple = pd.read_csv(url) 5 | apple.Date = pd.to_datetime(apple.Date) 6 | apple = apple.set_index("Date") 7 | apple_months = apple.resample("BM").mean() 8 | linea_artifact_value = apple_months 9 | -------------------------------------------------------------------------------- /.devcontainer/start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Modified from https://docs.docker.com/config/containers/multi-service_container/ 3 | 4 | # Install lineapy in develop mode 5 | python setup.py develop 6 | 7 | # turn on bash's job control 8 | set -m 9 | 10 | # Start the first process 11 | make jupyterlab_start &> /tmp/jupyterlab_log & 12 | 13 | # Start the second process 14 | make airflow_home airflow_start &> /tmp/airflow_log & 15 | -------------------------------------------------------------------------------- /lineapy/annotations/external/pillow.annotations.yaml: -------------------------------------------------------------------------------- 1 | - module: PIL.Image 2 | annotations: 3 | - criteria: 4 | class_instance: Image 5 | class_method_name: save 6 | side_effects: 7 | - mutated_value: 8 | external_state: file_system 9 | - criteria: 10 | function_name: open 11 | side_effects: 12 | - dependency: 13 | external_state: file_system 14 | -------------------------------------------------------------------------------- /tests/integration/__snapshots__/test_slice/test_slice[pandas_deleting].py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data" 4 | iris = pd.read_csv(url) 5 | iris.columns = ["sepal_length", "sepal_width", "petal_length", "petal_width", "class"] 6 | del iris["class"] 7 | iris = iris.dropna(how="any") 8 | iris = iris.reset_index(drop=True) 9 | linea_artifact_value = iris 10 | -------------------------------------------------------------------------------- /lineapy/plugins/jinja_templates/module/session_function.jinja: -------------------------------------------------------------------------------- 1 | def {{session_function_name}}({{session_input_parameters_body}}): 2 | # Given multiple artifacts, we need to save each right after 3 | # its calculation to protect from any irrelevant downstream 4 | # mutations (e.g., inside other artifact calculations) 5 | import copy 6 | {{return_dict_name}} = dict() 7 | {{session_function_body | indent(4, True) }} 8 | return {{return_dict_name}} -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest a new feature for Linea 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | -------------------------------------------------------------------------------- /tests/integration/__snapshots__/test_slice/test_slice[matplotlib_aliased].py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | 3 | size = 128, 16 4 | dpi = 72.0 5 | figsize = size[0] / float(dpi), size[1] / float(dpi) 6 | fig = plt.figure(figsize=figsize, dpi=dpi) 7 | plt.axes([0, 0, 1, 1], frameon=False) 8 | plt.text(0.5, 0.5, "Aliased", ha="center", va="center") 9 | plt.xlim(0, 1), plt.ylim(0, 1) 10 | plt.xticks([]), plt.yticks([]) 11 | plt.savefig("../figures/aliased.png", dpi=dpi) 12 | -------------------------------------------------------------------------------- /lineapy/annotations/external/torch.annotations.yaml: -------------------------------------------------------------------------------- 1 | - module: torch 2 | annotations: 3 | - criteria: 4 | function_name: manual_seed 5 | side_effects: 6 | - mutated_value: 7 | self_ref: SELF_REF 8 | - module: torch.jit._script 9 | annotations: 10 | - criteria: 11 | class_method_name: save 12 | class_instance: ScriptModule 13 | side_effects: 14 | - mutated_value: 15 | external_state: file_system 16 | -------------------------------------------------------------------------------- /lineapy/plugins/jinja_templates/dvc/dvc_dag_PythonOperator.jinja: -------------------------------------------------------------------------------- 1 | {% if task_parameters|length>0 -%} 2 | import dvc.api 3 | {% endif %} 4 | import {{ MODULE_NAME }} 5 | import pickle 6 | 7 | {{ TASK_CODE }} 8 | 9 | if __name__ == "__main__": 10 | {% for param in task_parameters -%} 11 | {{param}} = dvc.api.params_show()["{{param}}"] 12 | {% endfor -%} 13 | task_{{ task_name }}({% for param in task_parameters -%}{{param}}{{ ", " if not loop.last else "" }}{% endfor %}) 14 | -------------------------------------------------------------------------------- /tests/outputs/expected/sliced_housing_simple_Dockerfile: -------------------------------------------------------------------------------- 1 | FROM apache/airflow:latest-python{python_version} 2 | 3 | RUN mkdir /tmp/installers 4 | WORKDIR /tmp/installers 5 | 6 | # copy all the requirements to run the current dag 7 | COPY ./sliced_housing_simple_requirements.txt ./ 8 | # install the required libs 9 | RUN pip install -r ./sliced_housing_simple_requirements.txt 10 | 11 | WORKDIR /opt/airflow/dags 12 | COPY . . 13 | 14 | WORKDIR /opt/airflow 15 | 16 | CMD [ "standalone" ] -------------------------------------------------------------------------------- /tests/outputs/expected/sliced_housing_multiple_Dockerfile: -------------------------------------------------------------------------------- 1 | FROM apache/airflow:latest-python{python_version} 2 | 3 | RUN mkdir /tmp/installers 4 | WORKDIR /tmp/installers 5 | 6 | # copy all the requirements to run the current dag 7 | COPY ./sliced_housing_multiple_requirements.txt ./ 8 | # install the required libs 9 | RUN pip install -r ./sliced_housing_multiple_requirements.txt 10 | 11 | WORKDIR /opt/airflow/dags 12 | COPY . . 13 | 14 | WORKDIR /opt/airflow 15 | 16 | CMD [ "standalone" ] -------------------------------------------------------------------------------- /lineapy/plugins/jinja_templates/script_dockerfile.jinja: -------------------------------------------------------------------------------- 1 | FROM python:{{ python_version }} 2 | 3 | RUN mkdir /tmp/installers 4 | WORKDIR /tmp/installers 5 | 6 | # Copy all the requirements to run current DAG 7 | COPY ./{{ pipeline_name }}_requirements.txt ./ 8 | 9 | # Install required libs 10 | RUN pip install -r ./{{ pipeline_name }}_requirements.txt 11 | 12 | WORKDIR /home 13 | COPY ./{{ pipeline_name }}_module.py ./ 14 | 15 | ENTRYPOINT [ "python", "/home/{{ pipeline_name }}_module.py" ] 16 | -------------------------------------------------------------------------------- /lineapy/system_tracing/function_call.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from dataclasses import dataclass, field 4 | from typing import Any, Callable, Dict, List 5 | 6 | 7 | @dataclass 8 | class FunctionCall: 9 | """ 10 | A record of a function call that happened in the tracer. 11 | """ 12 | 13 | fn: Callable 14 | args: List[Any] = field(default_factory=list) 15 | kwargs: Dict[str, Any] = field(default_factory=dict) 16 | res: Any = field(default=None) 17 | -------------------------------------------------------------------------------- /lineapy/plugins/jinja_templates/ray/ray_dag_remote.jinja: -------------------------------------------------------------------------------- 1 | {% extends "ray/ray_dag_base.jinja" %} 2 | 3 | {% block bind_or_remote %}remote{% endblock %} 4 | 5 | {%- block ray_dag_execution %} 6 | # Execute actors to get remote objects 7 | # Make changes here to access any additional objects needed. 8 | {%- for task_name in sink_tasks %} 9 | ray.get([{%- for var in tasks[task_name].return_vars %}{{ var }}{{ ',' if not loop.last else '' }}{%- endfor %}]) 10 | {%- endfor %} 11 | {% endblock %} 12 | 13 | -------------------------------------------------------------------------------- /tests/integration/sources/matplotlib-tutorial/scripts/alpha.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | 3 | size = 256,16 4 | dpi = 72.0 5 | figsize= size[0]/float(dpi),size[1]/float(dpi) 6 | fig = plt.figure(figsize=figsize, dpi=dpi) 7 | fig.patch.set_alpha(0) 8 | plt.axes([0,0.1,1,.8], frameon=False) 9 | 10 | for i in range(1,11): 11 | plt.axvline(i, linewidth=1, color='blue',alpha=.25+.75*i/10.) 12 | 13 | plt.xlim(0,11) 14 | plt.xticks([]), plt.yticks([]) 15 | plt.savefig('../figures/alpha.png', dpi=dpi) 16 | -------------------------------------------------------------------------------- /tests/end_to_end/__snapshots__/test_misc/TestEndToEnd.test_housing.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from sklearn.ensemble import RandomForestClassifier 3 | 4 | assets = pd.read_csv("ames_train_cleaned.csv") 5 | 6 | 7 | def is_new(col): 8 | return col > 1970 9 | 10 | 11 | assets["is_new"] = is_new(assets["Year_Built"]) 12 | clf = RandomForestClassifier(random_state=0) 13 | y = assets["is_new"] 14 | x = assets[["SalePrice", "Lot_Area", "Garage_Area"]] 15 | clf.fit(x, y) 16 | p = clf.predict([[100 * 1000, 10, 4]]) 17 | -------------------------------------------------------------------------------- /lineapy/plugins/jinja_templates/ray/ray_dag_workflow.jinja: -------------------------------------------------------------------------------- 1 | {% extends "ray/ray_dag_base.jinja" %} 2 | 3 | {% block bind_or_remote %}bind{% endblock %} 4 | 5 | {% block ray_dag_execution %} 6 | from packaging import version 7 | if version.parse(ray.__version__) < version.parse('2.0'): 8 | raise RuntimeError(f"Ray Workflows requires version >2.0 but {ray.__version__} was found") 9 | {%- for task_name in sink_tasks %} 10 | ray.workflow.run({{tasks[task_name].return_vars[0]}}) 11 | {%- endfor %} 12 | {% endblock %} 13 | 14 | 15 | -------------------------------------------------------------------------------- /.cspell/custom-dictionary-workspace.txt: -------------------------------------------------------------------------------- 1 | __getitem__ 2 | # Custom Dictionary Words 3 | astpretty 4 | asttokens 5 | builtins 6 | chdir 7 | dataclass 8 | dataframe 9 | getattr 10 | graphviz 11 | ipython 12 | isinstance 13 | jupyterlab 14 | kwargs 15 | linea 16 | lineabuiltins 17 | lineapy 18 | listify 19 | maxdepth 20 | nbconvert 21 | nbformat 22 | nbsphinx 23 | nbval 24 | NBVAL_IGNORE_OUTPUT 25 | networkx 26 | orms 27 | psycopg2 28 | Pydantic 29 | scipy 30 | sklearn 31 | sqlalchemy 32 | templating 33 | toctree 34 | traceback 35 | xdist 36 | -------------------------------------------------------------------------------- /tests/integration/sources/matplotlib-tutorial/scripts/aliased.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | 3 | size = 128,16 4 | dpi = 72.0 5 | figsize= size[0]/float(dpi),size[1]/float(dpi) 6 | fig = plt.figure(figsize=figsize, dpi=dpi) 7 | fig.patch.set_alpha(0) 8 | plt.axes([0,0,1,1], frameon=False) 9 | 10 | plt.rcParams['text.antialiased'] = False 11 | plt.text(0.5,0.5,"Aliased",ha='center',va='center') 12 | 13 | plt.xlim(0,1),plt.ylim(0,1), 14 | plt.xticks([]),plt.yticks([]) 15 | 16 | plt.savefig('../figures/aliased.png', dpi=dpi) 17 | -------------------------------------------------------------------------------- /tests/outputs/expected/sliced_housing_multiple_w_dependencies_Dockerfile: -------------------------------------------------------------------------------- 1 | FROM apache/airflow:latest-python{python_version} 2 | 3 | RUN mkdir /tmp/installers 4 | WORKDIR /tmp/installers 5 | 6 | # copy all the requirements to run the current dag 7 | COPY ./sliced_housing_multiple_w_dependencies_requirements.txt ./ 8 | # install the required libs 9 | RUN pip install -r ./sliced_housing_multiple_w_dependencies_requirements.txt 10 | 11 | WORKDIR /opt/airflow/dags 12 | COPY . . 13 | 14 | WORKDIR /opt/airflow 15 | 16 | CMD [ "standalone" ] -------------------------------------------------------------------------------- /lineapy/plugins/jinja_templates/ray/ray_dockerfile.jinja: -------------------------------------------------------------------------------- 1 | FROM python:{{ python_version }} 2 | 3 | RUN mkdir /tmp/installers 4 | WORKDIR /tmp/installers 5 | 6 | # Copy all the requirements to run current DAG 7 | COPY ./{{ pipeline_name }}_requirements.txt ./ 8 | 9 | # Install ray 10 | RUN apt update 11 | RUN pip install ray 12 | 13 | # Install required libs 14 | RUN pip install -r ./{{ pipeline_name }}_requirements.txt 15 | 16 | WORKDIR /home 17 | COPY ./{{ pipeline_name }}_module.py ./ 18 | COPY ./{{ pipeline_name }}_dag.py ./ 19 | 20 | -------------------------------------------------------------------------------- /lineapy/annotations/external/boto3.annotations.yaml: -------------------------------------------------------------------------------- 1 | - module: boto3 2 | annotations: 3 | - criteria: 4 | function_names: 5 | - upload_file 6 | - upload_fileobj 7 | side_effects: 8 | - mutated_value: 9 | external_state: file_system 10 | - module: boto.s3.inject 11 | annotations: 12 | - criteria: 13 | function_names: 14 | - upload_file 15 | - upload_fileobj 16 | side_effects: 17 | - mutated_value: 18 | external_state: file_system 19 | -------------------------------------------------------------------------------- /tests/integration/__snapshots__/test_slice/test_slice[pandas_apply].py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | url = "https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/04_Apply/US_Crime_Rates/US_Crime_Rates_1960_2014.csv" 4 | crime = pd.read_csv(url) 5 | crime.Year = pd.to_datetime(crime.Year, format="%Y") 6 | crime = crime.set_index("Year", drop=True) 7 | del crime["Total"] 8 | crimes = crime.resample("10AS").sum() 9 | population = crime["Population"].resample("10AS").max() 10 | crimes["Population"] = population 11 | linea_artifact_value = crimes 12 | -------------------------------------------------------------------------------- /tests/integration/__snapshots__/test_slice/test_slice[sklearn_semi_supervised_plot_label_propagation_structure].py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.datasets import make_circles 3 | from sklearn.semi_supervised import LabelSpreading 4 | 5 | n_samples = 200 6 | X, y = make_circles(n_samples=n_samples, shuffle=False) 7 | outer, inner = 0, 1 8 | labels = np.full(n_samples, -1.0) 9 | labels[0] = outer 10 | labels[-1] = inner 11 | label_spread = LabelSpreading(kernel="knn", alpha=0.8) 12 | label_spread.fit(X, labels) 13 | linea_artifact_value = label_spread 14 | -------------------------------------------------------------------------------- /tests/end_to_end/__snapshots__/test_literal/test_ellipsis.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | from pathlib import * 3 | from lineapy.data.types import * 4 | from lineapy.utils.utils import get_new_id 5 | 6 | source_1 = SourceCode( 7 | code="""x = ... 8 | """, 9 | location=PosixPath("[source file path]"), 10 | ) 11 | literal_1 = LiteralNode( 12 | source_location=SourceLocation( 13 | lineno=1, 14 | col_offset=4, 15 | end_lineno=1, 16 | end_col_offset=7, 17 | source_code=source_1.id, 18 | ), 19 | value=Ellipsis, 20 | ) 21 | -------------------------------------------------------------------------------- /lineapy/plugins/jinja_templates/airflow/airflow_dockerfile.jinja: -------------------------------------------------------------------------------- 1 | FROM apache/airflow:latest-python{{ python_version }} 2 | 3 | RUN mkdir /tmp/installers 4 | WORKDIR /tmp/installers 5 | 6 | # copy all the requirements to run the current dag 7 | COPY ./{{ pipeline_name }}_requirements.txt ./ 8 | # install the required libs 9 | RUN pip install -r ./{{ pipeline_name }}_requirements.txt 10 | 11 | WORKDIR /opt/airflow/dags 12 | COPY ./{{ pipeline_name }}_module.py ./ 13 | COPY ./{{ pipeline_name }}_dag.py ./ 14 | 15 | WORKDIR /opt/airflow 16 | 17 | CMD [ "standalone" ] 18 | -------------------------------------------------------------------------------- /tests/unit/graph_reader/inputs/complex: -------------------------------------------------------------------------------- 1 | import lineapy 2 | 3 | art = {} 4 | a0 = 0 5 | a0 += 1 6 | art["a0"] = lineapy.save(a0, "a0") 7 | a = 1 8 | art["a"] = lineapy.save(a, "a") 9 | 10 | a += 1 11 | b = a * 2 + a0 12 | c = b + 3 13 | d = a * 4 14 | e = d + 5 15 | e += 6 16 | art["c"] = lineapy.save(c, "c") 17 | art["e"] = lineapy.save(e, "e") 18 | 19 | f = c + 7 20 | art["f"] = lineapy.save(f, "f") 21 | a += 1 22 | g = c + e * 2 23 | art["g2"] = lineapy.save(g, "g2") 24 | h = a + g 25 | art["h"] = lineapy.save(h, "h") 26 | z = [1] 27 | z.append(h) 28 | art["z"] = lineapy.save(z, "z") 29 | -------------------------------------------------------------------------------- /tests/integration/__snapshots__/test_slice/test_slice[matplotlib_exercise_3].py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | 4 | plt.figure(figsize=(8, 5), dpi=80) 5 | plt.subplot(111) 6 | X = np.linspace(-np.pi, np.pi, 256, endpoint=True) 7 | C, S = np.cos(X), np.sin(X) 8 | plt.plot(X, C, color="blue", linewidth=2.5, linestyle="-") 9 | plt.plot(X, S, color="red", linewidth=2.5, linestyle="-") 10 | plt.xlim(-4.0, 4.0) 11 | plt.xticks(np.linspace(-4, 4, 9, endpoint=True)) 12 | plt.ylim(-1.0, 1.0) 13 | plt.yticks(np.linspace(-1, 1, 5, endpoint=True)) 14 | linea_artifact_value = plt.gcf() 15 | -------------------------------------------------------------------------------- /docs/mkdocs/guides/contributing/tips.md: -------------------------------------------------------------------------------- 1 | # Recommended Practices 2 | 3 | ## Organize each PR with relevant changes 4 | 5 | To maintain a linear/cleaner project history, the project was set up to apply “squashing” when merging a PR. 6 | That is, if a PR contains more than one commit, GitHub will combine them into a single commit where the summary 7 | equals the PR title (followed by the PR number) and the description consists of commit messages for all squashed 8 | commits (in date order). Hence, we ask you to organize each PR with related changes only so that it can represent 9 | a single unit of meaningful change. 10 | -------------------------------------------------------------------------------- /conftest.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from lineapy.cli.cli import setup_ipython_dir 4 | 5 | 6 | # Set the IPYTHONDIR globally when running any tests 7 | # This needs to be in the root directory, so that even notebooks 8 | # tested in `./examples` use this plugin 9 | def pytest_configure(config): 10 | setup_ipython_dir() 11 | os.environ["LINEAPY_DO_NOT_TRACK"] = "true" 12 | os.environ["AIRFLOW_HOME"] = "/tmp/airflow_home" 13 | 14 | 15 | def pytest_collectstart(collector): 16 | if collector.fspath and collector.fspath.ext == ".ipynb": 17 | 18 | collector.skip_compare += ("image/svg+xml", "text/html") 19 | -------------------------------------------------------------------------------- /tests/outputs/expected/sliced_housing_simple.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | 4 | def p_value(): 5 | import pandas as pd 6 | from sklearn.ensemble import RandomForestClassifier 7 | 8 | assets = pd.read_csv("ames_train_cleaned.csv") 9 | 10 | def is_new(col): 11 | return col > 1970 12 | 13 | assets["is_new"] = is_new(assets["Year_Built"]) 14 | clf = RandomForestClassifier(random_state=0) 15 | y = assets["is_new"] 16 | x = assets[["SalePrice", "Lot_Area", "Garage_Area"]] 17 | clf.fit(x, y) 18 | p = clf.predict([[100 * 1000, 10, 4]]) 19 | pickle.dump(p, open("pickle-sample.pkl", "wb")) 20 | -------------------------------------------------------------------------------- /tests/tools/print_ast.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Pretty prints the AST of some Python code you pass in from the CLI 4 | """ 5 | 6 | import ast 7 | import dis 8 | 9 | import click 10 | from astpretty import pprint 11 | 12 | 13 | @click.command() 14 | @click.argument("code") 15 | def linea_cli(code): 16 | 17 | ast_ = ast.parse(code) 18 | print("*** AST ***") 19 | pprint(ast_) 20 | print("\n*** TRACER ***") 21 | # print(astor.to_source(NodeTransformer("dummy").visit(ast_))) 22 | print("\n*** Bytecoce ***") 23 | dis.dis(code) 24 | 25 | 26 | if __name__ == "__main__": 27 | linea_cli() 28 | -------------------------------------------------------------------------------- /lineapy/plugins/jinja_templates/dvc/dvc_dockerfile.jinja: -------------------------------------------------------------------------------- 1 | FROM python:{{ python_version }} 2 | 3 | RUN mkdir /tmp/installers 4 | WORKDIR /tmp/installers 5 | 6 | # Copy all the requirements to run current DAG 7 | COPY ./{{ pipeline_name }}_requirements.txt ./ 8 | 9 | # Install git and dvc 10 | RUN apt update 11 | RUN apt install -y git 12 | RUN pip install dvc 13 | 14 | # Install required libs 15 | RUN pip install -r ./{{ pipeline_name }}_requirements.txt 16 | 17 | WORKDIR /home 18 | COPY . . 19 | 20 | # Initialize workdir as a dvc repo 21 | RUN git init 22 | RUN dvc init 23 | 24 | ENTRYPOINT [ "dvc", "repro", "run_all_sessions"] 25 | -------------------------------------------------------------------------------- /lineapy/visualizer/README.md: -------------------------------------------------------------------------------- 1 | # Visualizer 2 | 3 | We use `graphviz` to show the internal state of lineapy. We use the graphs 4 | to support demos and debugging/tests. 5 | 6 | The graph can be created two ways: (1) with the tracer, which will contain more 7 | rich run-time information, such as the variable names, and mutation nodes, and 8 | (2) without run time information, such as when we load the artifact from the database. 9 | 10 | There are four different ways to access the visualizer currently, with slightly 11 | different configurations (you can find the full list in `__init__.py`): 12 | 13 | - ipython 14 | - snapshots 15 | - cli -------------------------------------------------------------------------------- /lineapy/_alembic/script.py.mako: -------------------------------------------------------------------------------- 1 | """${message} 2 | 3 | Revision ID: ${up_revision} 4 | Revises: ${down_revision | comma,n} 5 | Create Date: ${create_date} 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | ${imports if imports else ""} 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = ${repr(up_revision)} 14 | down_revision = ${repr(down_revision)} 15 | branch_labels = ${repr(branch_labels)} 16 | depends_on = ${repr(depends_on)} 17 | 18 | 19 | def upgrade() -> None: 20 | ${upgrades if upgrades else "pass"} 21 | 22 | 23 | def downgrade() -> None: 24 | ${downgrades if downgrades else "pass"} 25 | -------------------------------------------------------------------------------- /tests/end_to_end/test_list_comp.py: -------------------------------------------------------------------------------- 1 | def test_returns_value(execute): 2 | res = execute("x = [i + 1 for i in range(3)]") 3 | assert res.values["x"] == [1, 2, 3] 4 | 5 | 6 | def test_depends_on_prev_value(execute): 7 | res = execute( 8 | "y = range(3)\nx = [i + 1 for i in y]", 9 | snapshot=False, 10 | artifacts=["x"], 11 | ) 12 | # Verify that i isn't set in the local scope 13 | assert res.values["x"] == [1, 2, 3] 14 | assert res.values["y"] == range(3) 15 | assert "i" not in res.values 16 | sliced_code = res.slice("x") 17 | assert execute(sliced_code).values["x"] == [1, 2, 3] 18 | -------------------------------------------------------------------------------- /docs/overrides/main.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block announce %} 4 | 5 | Have questions? Join our 6 | 7 | {% include ".icons/fontawesome/brands/slack.svg" %} 8 | 9 | Slack community and ask away! 10 | 11 | {% endblock %} 12 | 13 | {% block outdated %} 14 | You are viewing an old version of the documentation. 15 | 16 | Click here to go to the latest version. 17 | 18 | {% endblock %} -------------------------------------------------------------------------------- /lineapy/annotations/external/keras.annotations.yaml: -------------------------------------------------------------------------------- 1 | - module: keras.engine.training 2 | annotations: 3 | - criteria: 4 | class_method_name: compile 5 | class_instance: Model 6 | side_effects: 7 | - mutated_value: 8 | self_ref: SELF_REF 9 | - criteria: 10 | class_method_name: fit 11 | class_instance: Model 12 | side_effects: 13 | - mutated_value: 14 | self_ref: SELF_REF 15 | - criteria: 16 | class_method_name: save 17 | class_instance: Model 18 | side_effects: 19 | - mutated_value: 20 | external_state: file_system 21 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | # Description 2 | 3 | Please include a summary of the change and which issue is fixed. Please also include relevant motivation and context. List any dependencies that are required for this change. 4 | 5 | Fixes # (issue) 6 | 7 | ## Type of change 8 | 9 | Please delete options that are not relevant. 10 | 11 | - [ ] Bug fix (non-breaking change which fixes an issue) 12 | - [ ] New feature (non-breaking change which adds functionality) 13 | - [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected) 14 | - [ ] This change requires a documentation update 15 | 16 | # How Has This Been Tested? 17 | -------------------------------------------------------------------------------- /tests/unit/transformer/test_transform_code.py: -------------------------------------------------------------------------------- 1 | from mock import MagicMock, patch 2 | 3 | from lineapy.transformer.transform_code import transform 4 | 5 | 6 | @patch( 7 | "lineapy.transformer.transform_code.NodeTransformer", 8 | ) 9 | def test_transform_fn(nt_mock: MagicMock): 10 | """ 11 | Test that the transform function calls the NodeTransformer 12 | """ 13 | mocked_tracer = MagicMock() 14 | source_location = MagicMock() 15 | transform("x = 1", source_location, mocked_tracer) 16 | nt_mock.assert_called_once() 17 | mocked_tracer.db.commit.assert_called_once() 18 | # TODO - test that source giver is called only for 3.7 and below 19 | -------------------------------------------------------------------------------- /.colab/README.md: -------------------------------------------------------------------------------- 1 | # LineaPy Tutorials 2 | 3 | ## `00_lineapy_quickstart` 4 | 5 | This tutorial gives you a quick tour of core functionalities of LineaPy. If you are new to LineaPy, start here! 6 | 7 | ## `01_using_artifacts` 8 | 9 | This tutorial uses simple examples to demonstrate how to work with LineaPy artifacts. 10 | 11 | ## `02_pipeline_building` 12 | 13 | Data science workflows revolve around building and refining pipelines, but it is often manual and time-consuming work. Having the complete development process stored in artifacts, LineaPy can automate pipeline building, accelerating transition from development to production. This tutorial demonstrates how this can be done. 14 | -------------------------------------------------------------------------------- /lineapy/annotations/external/sklearn.annotations.yaml: -------------------------------------------------------------------------------- 1 | - module: sklearn.base 2 | annotations: 3 | - criteria: 4 | class_instance: BaseEstimator 5 | class_method_name: fit 6 | side_effects: 7 | - mutated_value: 8 | self_ref: SELF_REF # self is a keyword... 9 | - views: 10 | - self_ref: SELF_REF 11 | - result: RESULT 12 | - criteria: 13 | class_instance: BaseEstimator 14 | class_method_name: fit_transform 15 | side_effects: 16 | - mutated_value: 17 | self_ref: SELF_REF 18 | - views: 19 | - self_ref: SELF_REF 20 | - result: RESULT 21 | -------------------------------------------------------------------------------- /tests/outputs/expected/sliced_housing_simple_dag.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import sliced_housing_simple 4 | from airflow import DAG 5 | from airflow.operators.python_operator import PythonOperator 6 | from airflow.utils.dates import days_ago 7 | 8 | default_dag_args = {"owner": "airflow", "retries": 2, "start_date": days_ago(1)} 9 | 10 | dag = DAG( 11 | dag_id="sliced_housing_simple_dag", 12 | schedule_interval="*/15 * * * *", 13 | max_active_runs=1, 14 | catchup=False, 15 | default_args=default_dag_args, 16 | ) 17 | 18 | 19 | p_value = PythonOperator( 20 | dag=dag, 21 | task_id="p_value_task", 22 | python_callable=sliced_housing_simple.p_value, 23 | ) 24 | -------------------------------------------------------------------------------- /lineapy/annotations/external/gym.annotations.yaml: -------------------------------------------------------------------------------- 1 | - module: gym.wrappers.monitor 2 | annotations: 3 | - criteria: 4 | class_instance: Monitor 5 | class_method_name: seed 6 | side_effects: 7 | - mutated_value: 8 | self_ref: SELF_REF # self is a keyword... 9 | - views: 10 | - self_ref: SELF_REF 11 | - result: RESULT 12 | - criteria: 13 | class_instance: Monitor 14 | class_method_name: reset 15 | side_effects: 16 | - mutated_value: 17 | self_ref: SELF_REF # self is a keyword... 18 | - views: 19 | - self_ref: SELF_REF 20 | - result: RESULT 21 | -------------------------------------------------------------------------------- /tests/unit/cli/test_cli.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from lineapy.cli import cli 4 | 5 | 6 | @pytest.mark.parametrize( 7 | "test_input,expected", 8 | [ 9 | ("xyz", "xyz"), 10 | ("test.yml", "test.yml"), 11 | ("test.yaml", "test"), 12 | ("test.annotations.yaml", "test"), 13 | ("tet.annotation.yaml", "tet.annotation"), 14 | ("tet.annotation.yml", "tet.annotation.yml"), 15 | ("explicit .yaml", "explicit"), 16 | ("implicit . annotations . yaml", "implicit"), 17 | ], 18 | ) 19 | def test_remove_annotations_file_extension(test_input, expected): 20 | 21 | assert cli.remove_annotations_file_extension(test_input) == expected 22 | -------------------------------------------------------------------------------- /docs/mkdocs/tutorials/README.md: -------------------------------------------------------------------------------- 1 | # LineaPy Tutorials 2 | 3 | ## `00_lineapy_quickstart` 4 | 5 | This tutorial gives you a quick tour of core functionalities of LineaPy. If you are new to LineaPy, start here! 6 | 7 | ## `01_using_artifacts` 8 | 9 | This tutorial uses simple examples to demonstrate how to work with LineaPy artifacts. 10 | 11 | ## `02_pipeline_building` 12 | 13 | Data science workflows revolve around building and refining pipelines, but it is often manual and time-consuming work. Having the complete development process stored in artifacts, LineaPy can automate pipeline building, accelerating transition from development to production. This tutorial demonstrates how this can be done. 14 | -------------------------------------------------------------------------------- /examples/tutorials/README.md: -------------------------------------------------------------------------------- 1 | # LineaPy Tutorials 2 | 3 | ## `00_lineapy_quickstart` 4 | 5 | This tutorial gives you a quick tour of core functionalities of LineaPy. If you are new to LineaPy, start here! 6 | 7 | ## `01_using_artifacts` 8 | 9 | This tutorial uses simple examples to demonstrate how to work with LineaPy artifacts. 10 | 11 | ## `02_pipeline_building` 12 | 13 | Data science workflows revolve around building and refining pipelines, but it is often manual and time-consuming work. Having the complete development process stored in artifacts, LineaPy can automate pipeline building, accelerating transition from development to production. This tutorial demonstrates how this can be done. 14 | -------------------------------------------------------------------------------- /tests/integration/__snapshots__/test_slice/test_slice[sklearn_multioutput_plot_classifier_chain_yeast].py: -------------------------------------------------------------------------------- 1 | from sklearn.datasets import fetch_openml 2 | from sklearn.linear_model import LogisticRegression 3 | from sklearn.model_selection import train_test_split 4 | from sklearn.multioutput import ClassifierChain 5 | 6 | X, Y = fetch_openml("yeast", version=4, return_X_y=True) 7 | Y = Y == "TRUE" 8 | X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0) 9 | base_lr = LogisticRegression() 10 | chains = [ClassifierChain(base_lr, order="random", random_state=i) for i in range(10)] 11 | for chain in chains: 12 | chain.fit(X_train, Y_train) 13 | linea_artifact_value = chains 14 | -------------------------------------------------------------------------------- /examples/self-hosting-lineapy/lineapy-notebook/lineapy_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "storage_options": { 3 | "key": "lineapy", 4 | "secret": "lineapypassword", 5 | "client_kwargs": { 6 | "endpoint_url": "http://minio:9000" 7 | } 8 | }, 9 | "artifact_storage_dir": "s3://lineapy-artifact-store", 10 | "customized_annotation_folder": "/home/jovyan/work/.lineapy/custom-annotations", 11 | "database_url": "postgresql://lineapy:lineapypassword@postgres-lineapy:5432/lineapy_artifact_store", 12 | "do_not_track": "True", 13 | "home_dir": "/home/jovyan/work/.lineapy", 14 | "logging_file": "/home/jovyan/work/.lineapy/lineapy.log", 15 | "logging_level": "INFO" 16 | } -------------------------------------------------------------------------------- /tests/integration/__snapshots__/test_slice/test_slice[pandas_stats].py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | import pandas as pd 4 | 5 | data_url = "https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/06_Stats/Wind_Stats/wind.data" 6 | data = pd.read_csv(data_url, sep="\\s+", parse_dates=[[0, 1, 2]]) 7 | 8 | 9 | def fix_century(x): 10 | year = x.year - 100 if x.year > 1989 else x.year 11 | return datetime.date(year, x.month, x.day) 12 | 13 | 14 | data["Yr_Mo_Dy"] = data["Yr_Mo_Dy"].apply(fix_century) 15 | data["Yr_Mo_Dy"] = pd.to_datetime(data["Yr_Mo_Dy"]) 16 | data = data.set_index("Yr_Mo_Dy") 17 | weekly = data.resample("W").agg(["min", "max", "mean", "std"]) 18 | linea_artifact_value = weekly 19 | -------------------------------------------------------------------------------- /tests/integration/__snapshots__/test_slice/test_slice[sklearn_model_selection_plot_randomized_search].py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.datasets import load_digits 3 | from sklearn.linear_model import SGDClassifier 4 | from sklearn.model_selection import GridSearchCV, RandomizedSearchCV 5 | 6 | X, y = load_digits(return_X_y=True, n_class=3) 7 | clf = SGDClassifier(loss="hinge", penalty="elasticnet", fit_intercept=True) 8 | param_grid = { 9 | "average": [True, False], 10 | "l1_ratio": np.linspace(0, 1, num=10), 11 | "alpha": np.power(10, np.arange(-2, 1, dtype=float)), 12 | } 13 | grid_search = GridSearchCV(clf, param_grid=param_grid) 14 | grid_search.fit(X, y) 15 | linea_artifact_value = grid_search 16 | -------------------------------------------------------------------------------- /lineapy/plugins/jinja_templates/task/task_function.jinja: -------------------------------------------------------------------------------- 1 | {{ function_decorator }} 2 | def task_{{function_name}}({{user_input_variables}}): 3 | {%- if include_imports_locally %} 4 | import {{ MODULE_NAME }} 5 | import pickle, pathlib 6 | {%- endif %} 7 | {% for typing_block in typing_blocks %} 8 | {{typing_block | indent(4, True) }} 9 | {% endfor %} 10 | {% for loading_block in loading_blocks %} 11 | {{loading_block | indent(4, True) }} 12 | {% endfor %} 13 | {{ pre_call_block | indent(4, True) }} 14 | {{ call_block | indent(4, True) }} 15 | {{ post_call_block | indent(4, True) }} 16 | {% for dumping_block in dumping_blocks %} 17 | {{dumping_block | indent(4, True) }} 18 | {% endfor %} 19 | {{return_block | indent(4, True) }} -------------------------------------------------------------------------------- /lineapy/system_tracing/_object_side_effect.py: -------------------------------------------------------------------------------- 1 | """ 2 | These classes represent side effects, where the values are actual 3 | Python object, in comparison to the other two representations, 4 | where the values are either references to a certain argument (i.e. the first arg) 5 | or to a node. 6 | """ 7 | from dataclasses import dataclass 8 | from typing import List, Union 9 | 10 | 11 | @dataclass 12 | class ViewOfObjects: 13 | objects: List[object] 14 | 15 | 16 | @dataclass 17 | class MutatedObject: 18 | object: object 19 | 20 | 21 | @dataclass 22 | class ImplicitDependencyObject: 23 | object: object 24 | 25 | 26 | ObjectSideEffect = Union[ 27 | ViewOfObjects, MutatedObject, ImplicitDependencyObject 28 | ] 29 | -------------------------------------------------------------------------------- /lineapy/plugins/jinja_templates/dvc/dvc_dag_StagePerArtifact.jinja: -------------------------------------------------------------------------------- 1 | stages: 2 | {% for task_name, task_def in TASK_DEFS.items() %} 3 | {{ task_name }}: 4 | cmd: python task_{{ task_name }}.py 5 | deps: 6 | - {{ MODULE_NAME }}.py 7 | - task_{{ task_name }}.py 8 | {%- if task_def.loaded_input_variables|length > 0 %} 9 | {%- for dep in task_def.loaded_input_variables %} 10 | - {{ dep }}.pickle 11 | {%- endfor %} 12 | {%- endif %} 13 | {%- if task_def.return_vars|length > 0 %} 14 | outs: 15 | {%- for out in task_def.return_vars %} 16 | - {{ out }}.pickle 17 | {%- endfor %} 18 | {%- endif %} 19 | {% endfor %} 20 | 21 | -------------------------------------------------------------------------------- /tests/integration/slices/pandas_timeseries.py: -------------------------------------------------------------------------------- 1 | # This is the manual slice of: 2 | # apple_months 3 | # from file: 4 | # sources/pandas_exercises/09_Time_Series/Apple_Stock/Exercises-with-solutions-code.ipynb 5 | 6 | # To verify that linea produces the same slice, run: 7 | # pytest -m integration --runxfail -vv 'tests/integration/test_slice.py::test_slice[pandas_timeseries]' 8 | 9 | import pandas as pd 10 | 11 | url = "https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/09_Time_Series/Apple_Stock/appl_1980_2014.csv" 12 | apple = pd.read_csv(url) 13 | apple.Date = pd.to_datetime(apple.Date) 14 | apple = apple.set_index("Date") 15 | apple_months = apple.resample("BM").mean() 16 | linea_artifact_value = apple_months 17 | -------------------------------------------------------------------------------- /lineapy/transformer/py38_transformer.py: -------------------------------------------------------------------------------- 1 | import ast 2 | 3 | from lineapy.transformer.base_transformer import BaseTransformer 4 | 5 | 6 | class Py38Transformer(BaseTransformer): 7 | def visit_Index(self, node: ast.Index) -> ast.AST: 8 | # ignoring types because these classes were entirely removed without backward support in 3.9 9 | return self.visit(node.value) # type: ignore 10 | 11 | def visit_ExtSlice(self, node: ast.ExtSlice) -> ast.Tuple: 12 | # ignoring types because these classes were entirely removed without backward support in 3.9 13 | elem_nodes = [self.visit(elem) for elem in node.dims] # type: ignore 14 | return ast.Tuple( 15 | elts=list(elem_nodes), 16 | ) 17 | -------------------------------------------------------------------------------- /tests/end_to_end/__snapshots__/test_list_comp/test_returns_value.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | from pathlib import * 3 | from lineapy.data.types import * 4 | from lineapy.utils.utils import get_new_id 5 | 6 | source_1 = SourceCode( 7 | code="x = [i + 1 for i in range(3)]", 8 | location=PosixPath("[source file path]"), 9 | ) 10 | call_1 = CallNode( 11 | source_location=SourceLocation( 12 | lineno=1, 13 | col_offset=4, 14 | end_lineno=1, 15 | end_col_offset=29, 16 | source_code=source_1.id, 17 | ), 18 | function_id=LookupNode( 19 | name="l_exec_expr", 20 | ).id, 21 | positional_args=[ 22 | LiteralNode( 23 | value="[i + 1 for i in range(3)]", 24 | ).id 25 | ], 26 | ) 27 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Report a bug in Linea 4 | title: '' 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 | 10 | **python version**: 11 | *what python version are you using?* 12 | 13 | **lineapy version** 14 | *what version of lineapy are you using or which commit if installed from source?* 15 | 16 | 17 | **Your code**: 18 | *What code did you try to run with lineapy?* 19 | 20 | ```python 21 | ... 22 | ``` 23 | 24 | **Issue: 25 | *What went wrong when trying to run this code?* 26 | 27 | **Notebook(s) or script(s) to reproduce the issue** 28 | Alternatively, please upload a linear notebook or python script (or a set of notebooks/scripts if the bug is caused by cross session interactions) that can reproduce the bug. 29 | -------------------------------------------------------------------------------- /tests/integration/__snapshots__/test_slice/test_slice[pandas_merge].py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | raw_data_1 = { 4 | "subject_id": ["1", "2", "3", "4", "5"], 5 | "first_name": ["Alex", "Amy", "Allen", "Alice", "Ayoung"], 6 | "last_name": ["Anderson", "Ackerman", "Ali", "Aoni", "Atiches"], 7 | } 8 | raw_data_2 = { 9 | "subject_id": ["4", "5", "6", "7", "8"], 10 | "first_name": ["Billy", "Brian", "Bran", "Bryce", "Betty"], 11 | "last_name": ["Bonder", "Black", "Balwner", "Brice", "Btisan"], 12 | } 13 | data1 = pd.DataFrame(raw_data_1, columns=["subject_id", "first_name", "last_name"]) 14 | data2 = pd.DataFrame(raw_data_2, columns=["subject_id", "first_name", "last_name"]) 15 | all_data_col = pd.concat([data1, data2], axis=1) 16 | linea_artifact_value = all_data_col 17 | -------------------------------------------------------------------------------- /tests/integration/sources/matplotlib-tutorial/scripts/dash_joinstyle.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | 4 | size = 256,16 5 | dpi = 72.0 6 | figsize= size[0]/float(dpi),size[1]/float(dpi) 7 | fig = plt.figure(figsize=figsize, dpi=dpi) 8 | fig.patch.set_alpha(0) 9 | plt.axes([0,0,1,1], frameon=False) 10 | 11 | plt.plot(np.arange(3), [0,1,0], color="blue", dashes=[12,5], linewidth=8, dash_joinstyle = 'miter') 12 | plt.plot(4+np.arange(3), [0,1,0], color="blue", dashes=[12,5], linewidth=8, dash_joinstyle = 'bevel') 13 | plt.plot(8+np.arange(3), [0,1,0], color="blue", dashes=[12,5], linewidth=8, dash_joinstyle = 'round') 14 | 15 | plt.xlim(0,12), plt.ylim(-1,2) 16 | plt.xticks([]), plt.yticks([]) 17 | 18 | plt.savefig('../figures/dash_joinstyle.png', dpi=dpi) 19 | #show() 20 | -------------------------------------------------------------------------------- /tests/housing.py: -------------------------------------------------------------------------------- 1 | import altair as alt 2 | import pandas as pd 3 | import seaborn as sns 4 | from sklearn.ensemble import RandomForestClassifier 5 | 6 | import lineapy 7 | 8 | alt.data_transformers.enable("json") 9 | alt.renderers.enable("mimetype") 10 | 11 | assets = pd.read_csv("ames_train_cleaned.csv") 12 | 13 | sns.relplot(data=assets, x="Year_Built", y="SalePrice", size="Lot_Area") 14 | 15 | 16 | def is_new(col): 17 | return col > 1970 18 | 19 | 20 | assets["is_new"] = is_new(assets["Year_Built"]) 21 | 22 | clf = RandomForestClassifier(random_state=0) 23 | y = assets["is_new"] 24 | x = assets[["SalePrice", "Lot_Area", "Garage_Area"]] 25 | 26 | clf.fit(x, y) 27 | p = clf.predict([[100 * 1000, 10, 4]]) 28 | lineapy.save(x, "x") 29 | lineapy.save(y, "y") 30 | lineapy.save(p, "p value") 31 | -------------------------------------------------------------------------------- /tests/integration/slices/matplotlib_alpha.py: -------------------------------------------------------------------------------- 1 | # This is the manual slice of: 2 | # lineapy.file_system 3 | # from file: 4 | # sources/matplotlib-tutorial/scripts/alpha.py 5 | 6 | # To verify that linea produces the same slice, run: 7 | # pytest -m integration --runxfail -vv 'tests/integration/test_slice.py::test_slice[matplotlib_alpha]' 8 | 9 | import matplotlib.pyplot as plt 10 | 11 | size = 256, 16 12 | dpi = 72.0 13 | figsize = size[0] / float(dpi), size[1] / float(dpi) 14 | fig = plt.figure(figsize=figsize, dpi=dpi) 15 | fig.patch.set_alpha(0) 16 | plt.axes([0, 0.1, 1, 0.8], frameon=False) 17 | for i in range(1, 11): 18 | plt.axvline(i, linewidth=1, color="blue", alpha=0.25 + 0.75 * i / 10.0) 19 | plt.xlim(0, 11) 20 | plt.xticks([]), plt.yticks([]) 21 | plt.savefig("../figures/alpha.png", dpi=dpi) 22 | -------------------------------------------------------------------------------- /tests/__snapshots__/test_ipython/test_to_airflow[no_config-module].py: -------------------------------------------------------------------------------- 1 | def get_a(): 2 | a = [1, 2, 3] 3 | return a 4 | 5 | 6 | def run_session_including_a(): 7 | # Given multiple artifacts, we need to save each right after 8 | # its calculation to protect from any irrelevant downstream 9 | # mutations (e.g., inside other artifact calculations) 10 | import copy 11 | 12 | artifacts = dict() 13 | a = get_a() 14 | artifacts["a"] = copy.deepcopy(a) 15 | return artifacts 16 | 17 | 18 | def run_all_sessions(): 19 | artifacts = dict() 20 | artifacts.update(run_session_including_a()) 21 | return artifacts 22 | 23 | 24 | if __name__ == "__main__": 25 | # Edit this section to customize the behavior of artifacts 26 | artifacts = run_all_sessions() 27 | print(artifacts) 28 | -------------------------------------------------------------------------------- /tests/__snapshots__/test_ipython/test_to_airflow[with_config-module].py: -------------------------------------------------------------------------------- 1 | def get_a(): 2 | a = [1, 2, 3] 3 | return a 4 | 5 | 6 | def run_session_including_a(): 7 | # Given multiple artifacts, we need to save each right after 8 | # its calculation to protect from any irrelevant downstream 9 | # mutations (e.g., inside other artifact calculations) 10 | import copy 11 | 12 | artifacts = dict() 13 | a = get_a() 14 | artifacts["a"] = copy.deepcopy(a) 15 | return artifacts 16 | 17 | 18 | def run_all_sessions(): 19 | artifacts = dict() 20 | artifacts.update(run_session_including_a()) 21 | return artifacts 22 | 23 | 24 | if __name__ == "__main__": 25 | # Edit this section to customize the behavior of artifacts 26 | artifacts = run_all_sessions() 27 | print(artifacts) 28 | -------------------------------------------------------------------------------- /tests/integration/slices/matplotlib_aliased.py: -------------------------------------------------------------------------------- 1 | # This is the manual slice of: 2 | # lineapy.file_system 3 | # from file: 4 | # sources/matplotlib-tutorial/scripts/aliased.py 5 | 6 | # To verify that linea produces the same slice, run: 7 | # pytest -m integration --runxfail -vv 'tests/integration/test_slice.py::test_slice[matplotlib_aliased]' 8 | 9 | import matplotlib.pyplot as plt 10 | 11 | size = 128, 16 12 | dpi = 72.0 13 | figsize = size[0] / float(dpi), size[1] / float(dpi) 14 | fig = plt.figure(figsize=figsize, dpi=dpi) 15 | fig.patch.set_alpha(0) 16 | plt.axes([0, 0, 1, 1], frameon=False) 17 | plt.rcParams["text.antialiased"] = False 18 | plt.text(0.5, 0.5, "Aliased", ha="center", va="center") 19 | plt.xlim(0, 1), plt.ylim(0, 1) 20 | plt.xticks([]), plt.yticks([]) 21 | plt.savefig("../figures/aliased.png", dpi=dpi) 22 | -------------------------------------------------------------------------------- /tests/outputs/expected/sliced_housing_multiple_dag.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import sliced_housing_multiple 4 | from airflow import DAG 5 | from airflow.operators.python_operator import PythonOperator 6 | from airflow.utils.dates import days_ago 7 | 8 | default_dag_args = {"owner": "airflow", "retries": 2, "start_date": days_ago(1)} 9 | 10 | dag = DAG( 11 | dag_id="sliced_housing_multiple_dag", 12 | schedule_interval="*/15 * * * *", 13 | max_active_runs=1, 14 | catchup=False, 15 | default_args=default_dag_args, 16 | ) 17 | 18 | 19 | p_value = PythonOperator( 20 | dag=dag, 21 | task_id="p_value_task", 22 | python_callable=sliced_housing_multiple.p_value, 23 | ) 24 | 25 | y = PythonOperator( 26 | dag=dag, 27 | task_id="y_task", 28 | python_callable=sliced_housing_multiple.y, 29 | ) 30 | -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- 1 | # LineaPy Examples 2 | 3 | We believe examples are the best way to learn something, so we have created hands-on notebooks that illustrate different uses of LineaPy. 4 | 5 | - `self-hosting-lineapy`: This demo folder provides an easy to run, local data science development environment that showcases the capabilities of LineaPy. It gives users an easy way to run end-to-end tutorials and prototype solutions to their problems using LineaPy. 6 | 7 | - `tutorials`: These tutorial notebooks focus on walking you through key functionalities of LineaPy with simple examples. If you are new to LineaPy and want to learn the basics, start here! 8 | 9 | - `use-cases`: These use-case notebooks contain more realistic examples that show how LineaPy can help data science work in various domains such as real estate, finance, and medicine. 10 | -------------------------------------------------------------------------------- /lineapy/plugins/jinja_templates/argo/argo_dockerfile.jinja: -------------------------------------------------------------------------------- 1 | # Be sure to build this docker file with the following command 2 | # docker build -t {{ pipeline_name }}:lineapy -f {{ pipeline_name }}_Dockerfile . 3 | 4 | FROM python:{{ python_version }} 5 | 6 | RUN mkdir /tmp/installers 7 | WORKDIR /tmp/installers 8 | 9 | # Copy all the requirements to run current DAG 10 | COPY ./{{ pipeline_name }}_requirements.txt ./ 11 | 12 | # Install required libs 13 | RUN pip install -r ./{{ pipeline_name }}_requirements.txt 14 | 15 | WORKDIR /opt/argo/dags 16 | 17 | # Install git and argo 18 | RUN apt update 19 | RUN apt install -y git 20 | RUN pip install argo-workflows 21 | RUN pip install hera-workflows 22 | 23 | COPY ./{{ pipeline_name }}_module.py ./ 24 | COPY ./{{ pipeline_name }}_dag.py ./ 25 | 26 | ENTRYPOINT [ "argo", "repro", "run_all_sessions"] -------------------------------------------------------------------------------- /tests/integration/__snapshots__/test_slice/test_slice[sklearn_tree_plot_cost_complexity_pruning].py: -------------------------------------------------------------------------------- 1 | from sklearn.datasets import load_breast_cancer 2 | from sklearn.model_selection import train_test_split 3 | from sklearn.tree import DecisionTreeClassifier 4 | 5 | X, y = load_breast_cancer(return_X_y=True) 6 | X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) 7 | clf = DecisionTreeClassifier(random_state=0) 8 | path = clf.cost_complexity_pruning_path(X_train, y_train) 9 | ccp_alphas, impurities = path.ccp_alphas, path.impurities 10 | clfs = [] 11 | for ccp_alpha in ccp_alphas: 12 | clf = DecisionTreeClassifier(random_state=0, ccp_alpha=ccp_alpha) 13 | clf.fit(X_train, y_train) 14 | clfs.append(clf) 15 | clfs = clfs[:-1] 16 | depth = [clf.tree_.max_depth for clf in clfs] 17 | linea_artifact_value = depth 18 | -------------------------------------------------------------------------------- /tests/integration/slices/pandas_apply.py: -------------------------------------------------------------------------------- 1 | # This is the manual slice of: 2 | # crimes 3 | # from file: 4 | # sources/pandas_exercises/04_Apply/US_Crime_Rates/Exercises_with_solutions.ipynb 5 | 6 | # To verify that linea produces the same slice, run: 7 | # pytest -m integration --runxfail -vv 'tests/integration/test_slice.py::test_slice[pandas_apply]' 8 | 9 | import pandas as pd 10 | 11 | url = "https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/04_Apply/US_Crime_Rates/US_Crime_Rates_1960_2014.csv" 12 | crime = pd.read_csv(url) 13 | crime.Year = pd.to_datetime(crime.Year, format="%Y") 14 | crime = crime.set_index("Year", drop=True) 15 | del crime["Total"] 16 | crimes = crime.resample("10AS").sum() 17 | population = crime["Population"].resample("10AS").max() 18 | crimes["Population"] = population 19 | linea_artifact_value = crimes 20 | -------------------------------------------------------------------------------- /tests/integration/__snapshots__/test_slice/test_slice[sklearn_preprocessing_plot_scaling_importance].py: -------------------------------------------------------------------------------- 1 | from sklearn.datasets import load_wine 2 | from sklearn.decomposition import PCA 3 | from sklearn.model_selection import train_test_split 4 | from sklearn.naive_bayes import GaussianNB 5 | from sklearn.pipeline import make_pipeline 6 | from sklearn.preprocessing import StandardScaler 7 | 8 | RANDOM_STATE = 42 9 | features, target = load_wine(return_X_y=True) 10 | X_train, X_test, y_train, y_test = train_test_split( 11 | features, target, test_size=0.3, random_state=RANDOM_STATE 12 | ) 13 | unscaled_clf = make_pipeline(PCA(n_components=2), GaussianNB()) 14 | unscaled_clf.fit(X_train, y_train) 15 | std_clf = make_pipeline(StandardScaler(), PCA(n_components=2), GaussianNB()) 16 | std_clf.fit(X_train, y_train) 17 | linea_artifact_value = unscaled_clf, std_clf 18 | -------------------------------------------------------------------------------- /tests/integration/slices/pandas_deleting.py: -------------------------------------------------------------------------------- 1 | # This is the manual slice of: 2 | # iris 3 | # from file: 4 | # sources/pandas_exercises/10_Deleting/Iris/Exercises_with_solutions_and_code.ipynb 5 | 6 | # To verify that linea produces the same slice, run: 7 | # pytest -m integration --runxfail -vv 'tests/integration/test_slice.py::test_slice[pandas_deleting]' 8 | 9 | import numpy as np 10 | import pandas as pd 11 | 12 | url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data" 13 | iris = pd.read_csv(url) 14 | iris.columns = ["sepal_length", "sepal_width", "petal_length", "petal_width", "class"] 15 | iris.iloc[10:30, 2:3] = np.nan 16 | iris.petal_length.fillna(1, inplace=True) 17 | del iris["class"] 18 | iris.iloc[0:3, :] = np.nan 19 | iris = iris.dropna(how="any") 20 | iris = iris.reset_index(drop=True) 21 | linea_artifact_value = iris 22 | -------------------------------------------------------------------------------- /examples/self-hosting-lineapy/airflow/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM apache/airflow:slim-latest-python3.10 2 | 3 | USER $AIRFLOW_UID 4 | 5 | # prevent pip timing out on slow internet connections 6 | RUN export PIP_DEFAULT_TIMEOUT=1000 7 | 8 | # deps for lineapy 9 | RUN pip install -U pip 10 | RUN pip install fsspec s3fs psycopg2-binary 11 | 12 | # install lineapy 13 | RUN pip install lineapy==0.2.1 14 | 15 | # other nice to have libraries 16 | RUN pip install pandas==1.4.3 scikit-learn==1.1.2 17 | 18 | COPY airflow.cfg /opt/airflow/airflow.cfg 19 | COPY airflow-start.sh /airflow-start.sh 20 | 21 | USER root 22 | RUN chown $AIRFLOW_UID /opt/airflow/airflow.cfg 23 | RUN chown $AIRFLOW_UID /airflow-start.sh 24 | RUN chmod +x /airflow-start.sh 25 | USER $AIRFLOW_UID 26 | 27 | RUN mkdir /opt/airflow/plugins 28 | 29 | ENV AIRFLOW_HOME=/opt/airflow 30 | 31 | ENTRYPOINT ["/airflow-start.sh"] -------------------------------------------------------------------------------- /tests/unit/graph_reader/inputs/module_import_from: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from sklearn.linear_model import LinearRegression 3 | 4 | import lineapy 5 | 6 | art = {} 7 | # Load train data 8 | url1 = "https://raw.githubusercontent.com/LineaLabs/lineapy/main/examples/tutorials/data/iris.csv" 9 | train_df = pd.read_csv(url1) 10 | # Initiate the model 11 | mod = LinearRegression() 12 | # Fit the model 13 | mod.fit( 14 | X=train_df[["petal.width"]], 15 | y=train_df["petal.length"], 16 | ) 17 | # Save the fitted model as an artifact 18 | art["model"] = lineapy.save(mod, "iris_model") 19 | # Load data to predict (assume it comes from a different source) 20 | pred_df = pd.read_csv(url1) 21 | # Make predictions 22 | petal_length_pred = mod.predict(X=pred_df[["petal.width"]]) 23 | # Save the predictions 24 | art["pred"] = lineapy.save(petal_length_pred, "iris_petal_length_pred") 25 | -------------------------------------------------------------------------------- /tests/integration/slices/matplotlib_exercise_3.py: -------------------------------------------------------------------------------- 1 | # This is the manual slice of: 2 | # plt.gcf() 3 | # from file: 4 | # sources/matplotlib-tutorial/scripts/exercice_3.py 5 | 6 | # To verify that linea produces the same slice, run: 7 | # pytest -m integration --runxfail -vv 'tests/integration/test_slice.py::test_slice[matplotlib_exercise_3]' 8 | 9 | import matplotlib.pyplot as plt 10 | import numpy as np 11 | 12 | plt.figure(figsize=(8, 5), dpi=80) 13 | plt.subplot(111) 14 | X = np.linspace(-np.pi, np.pi, 256, endpoint=True) 15 | C, S = np.cos(X), np.sin(X) 16 | plt.plot(X, C, color="blue", linewidth=2.5, linestyle="-") 17 | plt.plot(X, S, color="red", linewidth=2.5, linestyle="-") 18 | plt.xlim(-4.0, 4.0) 19 | plt.xticks(np.linspace(-4, 4, 9, endpoint=True)) 20 | plt.ylim(-1.0, 1.0) 21 | plt.yticks(np.linspace(-1, 1, 5, endpoint=True)) 22 | linea_artifact_value = plt.gcf() 23 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | extend-exclude = __snapshots__,examples,sliced_housing_dag*.py,./lineapy/editors/ipython.py,.ipython,tutorials,slices,sources,envs,*housing.py,tests/integration/sources,tests/integration/slices,outputs,env 3 | per-file-ignores = 4 | # ignore get_ipython missing 5 | lineapy/ipython.py: F821 6 | 7 | # E203 is for compat with black 8 | # (https://black.readthedocs.io/en/stable/guides/using_black_with_other_tools.html#configuration) 9 | 10 | # E501 is line length, which is already covered by black, and was raising 11 | # errors on string/comment lines that were too long 12 | 13 | # W291 is for trailing whitespace, which is also already covered by black 14 | # besides in this case we want to ignore trailing whitespace in docstrings 15 | 16 | # F841 is raised on unused local variables, which sometimes we don't mind 17 | extend-ignore = E203,E501,W291,F841 18 | -------------------------------------------------------------------------------- /lineapy/system_tracing/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module adds support for using sys.settrace to understand what happens 3 | during a subset of code execution that's passed in. In the context of how 4 | it's currently used, it's limited to the "blackbox" execs — `l_exec_statement` 5 | It can be used used and tested independently. 6 | 7 | At a high level, users could: 8 | 9 | 1. Use the `exec_and_record_function_calls.py` as an entry point and uses 10 | `sys.settrace` to trace every bytecode execution and `_op_stack.py` to look at 11 | the bytecode stack during tracing. It translates different bytecode 12 | instructions into the corresponding Python function calls. 13 | 2. Use `function_calls_to_side_effects.py` to translate the sequence of calls 14 | that were recorded into the side effects produced on nodes (mapping Python 15 | changes to graph changes). 16 | """ 17 | -------------------------------------------------------------------------------- /tests/end_to_end/test_decorator.py: -------------------------------------------------------------------------------- 1 | from lineapy.utils.utils import prettify 2 | 3 | 4 | def test_user_defined_decorator(execute): 5 | code = """x=[] 6 | def append1(func): 7 | def wrapper(): 8 | func() 9 | x.append(1) 10 | 11 | return wrapper 12 | 13 | 14 | @append1 15 | def append2(): 16 | x.append(2) 17 | 18 | append2() 19 | """ 20 | res = execute(code, artifacts=["x"]) 21 | assert len(res.values["x"]) == 2 22 | assert res.values["x"][0] == 2 and res.values["x"][1] == 1 23 | assert res.artifacts["x"] == prettify(code) 24 | 25 | 26 | def test_functools_decorator(execute): 27 | code = """from functools import lru_cache 28 | @lru_cache(maxsize=1) 29 | def f(): 30 | return 1 31 | 32 | x = f() 33 | """ 34 | res = execute(code, artifacts=["x"]) 35 | assert res.values["x"] == 1 36 | assert res.artifacts["x"] == prettify(code) 37 | -------------------------------------------------------------------------------- /tests/integration/sources/matplotlib-tutorial/scripts/exercice_3.py: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------------------------------- 2 | # Copyright (c) 2015, Nicolas P. Rougier. All Rights Reserved. 3 | # Distributed under the (new) BSD License. See LICENSE.txt for more info. 4 | # ----------------------------------------------------------------------------- 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | 8 | plt.figure(figsize=(8,5), dpi=80) 9 | plt.subplot(111) 10 | 11 | X = np.linspace(-np.pi, np.pi, 256,endpoint=True) 12 | C,S = np.cos(X), np.sin(X) 13 | 14 | plt.plot(X, C, color="blue", linewidth=2.5, linestyle="-") 15 | plt.plot(X, S, color="red", linewidth=2.5, linestyle="-") 16 | 17 | plt.xlim(-4.0,4.0) 18 | plt.xticks(np.linspace(-4,4,9,endpoint=True)) 19 | 20 | plt.ylim(-1.0,1.0) 21 | plt.yticks(np.linspace(-1,1,5,endpoint=True)) 22 | 23 | plt.show() 24 | -------------------------------------------------------------------------------- /docs/mkdocs/concepts/artifact.md: -------------------------------------------------------------------------------- 1 | # Artifact 2 | 3 | In LineaPy, an artifact refers to any intermediate result from the development process. Most often, an artifact 4 | manifests as a variable that stores data in a specific state (e.g., `my_num = your_num + 10`). In the data science 5 | workflow, an artifact can be a model, a chart, a statistic, or a dataframe, or a feature function. 6 | 7 | What makes LineaPy special is that it treats an artifact as both code and value. That is, when storing an artifact, 8 | LineaPy not only records the state (i.e., value) of the variable but also traces and saves all relevant operations 9 | leading to this state — as code. Such a complete development history or *lineage* then allows LineaPy to fully reproduce 10 | the given artifact. Furthermore, it provides the ground to automate data engineering work to bring data science from development to production. 11 | -------------------------------------------------------------------------------- /tests/end_to_end/test_dictionary.py: -------------------------------------------------------------------------------- 1 | from lineapy.utils.utils import prettify 2 | 3 | 4 | def test_basic_dict(execute): 5 | res = execute("x = {'a': 1, 'b': 2}") 6 | assert res.values["x"] == {"a": 1, "b": 2} 7 | 8 | 9 | def test_splatting(execute): 10 | res = execute("x = {1: 2, 2:2, **{1: 3, 2: 3}, 1: 4}") 11 | assert res.values["x"] == {1: 4, 2: 3} 12 | 13 | 14 | def test_dictionary_support(execute): 15 | DICTIONARY_SUPPORT = """import pandas as pd 16 | df = pd.DataFrame({"id": [1,2]}) 17 | x = df["id"].sum() 18 | """ 19 | res = execute(DICTIONARY_SUPPORT) 20 | assert res.values["x"] == 3 21 | 22 | 23 | def test_dict_update_mutates(execute): 24 | code = """x = {'a': 1, 'b': 2} 25 | x.update({'a': 3}) 26 | """ 27 | 28 | res = execute(code, artifacts=["x"]) 29 | assert res.values["x"] == {"a": 3, "b": 2} 30 | assert res.slice("x") == prettify(code) 31 | -------------------------------------------------------------------------------- /tests/unit/graph_reader/inputs/housing: -------------------------------------------------------------------------------- 1 | import altair as alt 2 | import pandas as pd 3 | import seaborn as sns 4 | from sklearn.ensemble import RandomForestClassifier 5 | 6 | import lineapy 7 | 8 | alt.data_transformers.enable("json") 9 | alt.renderers.enable("mimetype") 10 | 11 | assets = pd.read_csv( 12 | "https://raw.githubusercontent.com/LineaLabs/lineapy/main/tests/ames_train_cleaned.csv" 13 | ) 14 | 15 | sns.relplot(data=assets, x="Year_Built", y="SalePrice", size="Lot_Area") 16 | 17 | 18 | def is_new(col): 19 | return col > 1970 20 | 21 | 22 | assets["is_new"] = is_new(assets["Year_Built"]) 23 | 24 | clf = RandomForestClassifier(random_state=0) 25 | y = assets["is_new"] 26 | x = assets[["SalePrice", "Lot_Area", "Garage_Area"]] 27 | 28 | clf.fit(x, y) 29 | p = clf.predict([[100 * 1000, 10, 4]]) 30 | lineapy.save(x, "x") 31 | lineapy.save(y, "y") 32 | lineapy.save(p, "p value") 33 | -------------------------------------------------------------------------------- /tests/end_to_end/test_lists.py: -------------------------------------------------------------------------------- 1 | def test_list_setitem_mutates(execute): 2 | code = """x = [1] 3 | x[0] = 10 4 | """ 5 | res = execute(code, artifacts=["x"]) 6 | assert res.values["x"] == [10] 7 | assert res.slice("x") == code 8 | 9 | 10 | def test_list_getitem_view(execute): 11 | code = """y = [] 12 | x = [y] 13 | y.append(10) 14 | """ 15 | res = execute(code, artifacts=["x"]) 16 | assert res.slice("x") == code 17 | 18 | 19 | def test_list_append_mutates(execute): 20 | code = """x = [] 21 | x.append(10) 22 | """ 23 | res = execute(code, artifacts=["x"]) 24 | assert res.slice("x") == code 25 | 26 | 27 | def test_list_append_mutates_inner(execute): 28 | code = """x = [] 29 | y = [x] 30 | x.append(10) 31 | y[0].append(11) 32 | """ 33 | res = execute(code, artifacts=["x", "y"]) 34 | assert res.slice("x") == code 35 | assert res.slice("y") == code 36 | -------------------------------------------------------------------------------- /tests/integration/slices/sklearn_semi_supervised_plot_label_propagation_structure.py: -------------------------------------------------------------------------------- 1 | # This is the manual slice of: 2 | # label_spread 3 | # from file: 4 | # sources/scikit-learn/examples/semi_supervised/plot_label_propagation_structure.py 5 | 6 | # To verify that linea produces the same slice, run: 7 | # pytest -m integration --runxfail -vv 'tests/integration/test_slice.py::test_slice[sklearn_semi_supervised_plot_label_propagation_structure]' 8 | 9 | import numpy as np 10 | from sklearn.datasets import make_circles 11 | from sklearn.semi_supervised import LabelSpreading 12 | 13 | n_samples = 200 14 | X, y = make_circles(n_samples=n_samples, shuffle=False) 15 | outer, inner = 0, 1 16 | labels = np.full(n_samples, -1.0) 17 | labels[0] = outer 18 | labels[-1] = inner 19 | label_spread = LabelSpreading(kernel="knn", alpha=0.8) 20 | label_spread.fit(X, labels) 21 | linea_artifact_value = label_spread 22 | -------------------------------------------------------------------------------- /examples/use_cases/README.md: -------------------------------------------------------------------------------- 1 | # LineaPy Use Cases 2 | 3 | ## `predict_house_price` 4 | 5 | This use case illustrates how LineaPy can facilitate an end-to-end data science workflow for housing price prediction. 6 | The notebook comes in 3 main sections: 7 | 8 | 1. ***Exploratory Data Analysis and Feature Engineering.*** Using various statistics and visualizations, we explore the given data 9 | to create useful features. We use LineaPy to store the transformed data as an artifact, which allows us to automatically refactor and clean up the code. 10 | 11 | 2. ***Training a Model.*** Using the transformed data, we train a model that can predict housing prices. We then store 12 | the trained model as an artifact. 13 | 14 | 3. ***Building an End-to-End Pipeline.*** Using artifacts saved in this session, we quickly build an end-to-end 15 | pipeline that combines data preprocessing and model training, moving closer to production. 16 | -------------------------------------------------------------------------------- /lineapy/annotations/internal/io.annotations.yaml: -------------------------------------------------------------------------------- 1 | - module: io 2 | annotations: 3 | - criteria: 4 | function_name: open 5 | side_effects: 6 | - dependency: 7 | external_state: file_system 8 | - views: 9 | - result: RESULT 10 | - external_state: file_system 11 | - criteria: 12 | class_method_names: 13 | - close 14 | - flush 15 | - readline 16 | - readlines 17 | - seek 18 | - truncate 19 | - writelines 20 | - write 21 | class_instance: IOBase 22 | side_effects: 23 | - mutated_value: 24 | self_ref: SELF_REF 25 | - criteria: 26 | class_method_names: 27 | - __enter__ 28 | class_instance: IOBase 29 | side_effects: 30 | - views: 31 | - result: RESULT 32 | - self_ref: SELF_REF 33 | -------------------------------------------------------------------------------- /lineapy/plugins/jinja_templates/kubeflow/kubeflow_dockerfile.jinja: -------------------------------------------------------------------------------- 1 | # Be sure to build this docker file with the following command 2 | # docker build -t {{ pipeline_name }}:lineapy -f {{ pipeline_name }}_Dockerfile . 3 | 4 | FROM python:{{ python_version }} 5 | 6 | RUN mkdir /tmp/installers 7 | WORKDIR /tmp/installers 8 | 9 | # Copy all the requirements to run current DAG 10 | COPY ./{{ pipeline_name }}_requirements.txt ./ 11 | 12 | # Install kubeflow python sdk 13 | RUN apt update 14 | RUN pip install kfp 15 | 16 | # Install required libs 17 | RUN pip install -r ./{{ pipeline_name }}_requirements.txt 18 | 19 | WORKDIR /home 20 | COPY ./{{ pipeline_name }}_module.py ./ 21 | COPY ./{{ pipeline_name }}_dag.py ./ 22 | 23 | # Set environment variable so module file can be 24 | # found by kubeflow components 25 | ENV PYTHONPATH=/home:${PYTHON_PATH} 26 | 27 | ENTRYPOINT ["python", "{{ pipeline_name }}_module.py"] 28 | 29 | 30 | -------------------------------------------------------------------------------- /tests/outputs/expected/sliced_housing_multiple_w_dependencies_dag.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import sliced_housing_multiple_w_dependencies 4 | from airflow import DAG 5 | from airflow.operators.python_operator import PythonOperator 6 | from airflow.utils.dates import days_ago 7 | 8 | default_dag_args = {"owner": "airflow", "retries": 2, "start_date": days_ago(1)} 9 | 10 | dag = DAG( 11 | dag_id="sliced_housing_multiple_w_dependencies_dag", 12 | schedule_interval="*/15 * * * *", 13 | max_active_runs=1, 14 | catchup=False, 15 | default_args=default_dag_args, 16 | ) 17 | 18 | 19 | p_value = PythonOperator( 20 | dag=dag, 21 | task_id="p_value_task", 22 | python_callable=sliced_housing_multiple_w_dependencies.p_value, 23 | ) 24 | 25 | y = PythonOperator( 26 | dag=dag, 27 | task_id="y_task", 28 | python_callable=sliced_housing_multiple_w_dependencies.y, 29 | ) 30 | 31 | 32 | p_value >> y 33 | -------------------------------------------------------------------------------- /tests/end_to_end/__snapshots__/test_op/test_sub.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | from pathlib import * 3 | from lineapy.data.types import * 4 | from lineapy.utils.utils import get_new_id 5 | 6 | source_1 = SourceCode( 7 | code="""x = 1 8 | y=-x""", 9 | location=PosixPath("[source file path]"), 10 | ) 11 | call_1 = CallNode( 12 | source_location=SourceLocation( 13 | lineno=2, 14 | col_offset=2, 15 | end_lineno=2, 16 | end_col_offset=4, 17 | source_code=source_1.id, 18 | ), 19 | function_id=LookupNode( 20 | name="neg", 21 | ).id, 22 | positional_args=[ 23 | LiteralNode( 24 | source_location=SourceLocation( 25 | lineno=1, 26 | col_offset=4, 27 | end_lineno=1, 28 | end_col_offset=5, 29 | source_code=source_1.id, 30 | ), 31 | value=1, 32 | ).id 33 | ], 34 | ) 35 | -------------------------------------------------------------------------------- /tests/integration/__snapshots__/test_slice/test_slice[matplotlib_dash_joinstyle].py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | 4 | size = 256, 16 5 | dpi = 72.0 6 | figsize = size[0] / float(dpi), size[1] / float(dpi) 7 | fig = plt.figure(figsize=figsize, dpi=dpi) 8 | plt.axes([0, 0, 1, 1], frameon=False) 9 | plt.plot( 10 | np.arange(3), 11 | [0, 1, 0], 12 | color="blue", 13 | dashes=[12, 5], 14 | linewidth=8, 15 | dash_joinstyle="miter", 16 | ) 17 | plt.plot( 18 | 4 + np.arange(3), 19 | [0, 1, 0], 20 | color="blue", 21 | dashes=[12, 5], 22 | linewidth=8, 23 | dash_joinstyle="bevel", 24 | ) 25 | plt.plot( 26 | 8 + np.arange(3), 27 | [0, 1, 0], 28 | color="blue", 29 | dashes=[12, 5], 30 | linewidth=8, 31 | dash_joinstyle="round", 32 | ) 33 | plt.xlim(0, 12), plt.ylim(-1, 2) 34 | plt.xticks([]), plt.yticks([]) 35 | plt.savefig("../figures/dash_joinstyle.png", dpi=dpi) 36 | -------------------------------------------------------------------------------- /tests/outputs/expected/sliced_housing_multiple.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | 4 | def p_value(): 5 | import pandas as pd 6 | from sklearn.ensemble import RandomForestClassifier 7 | 8 | assets = pd.read_csv("ames_train_cleaned.csv") 9 | 10 | def is_new(col): 11 | return col > 1970 12 | 13 | assets["is_new"] = is_new(assets["Year_Built"]) 14 | clf = RandomForestClassifier(random_state=0) 15 | y = assets["is_new"] 16 | x = assets[["SalePrice", "Lot_Area", "Garage_Area"]] 17 | clf.fit(x, y) 18 | p = clf.predict([[100 * 1000, 10, 4]]) 19 | pickle.dump(p, open("pickle-sample.pkl", "wb")) 20 | 21 | 22 | def y(): 23 | import pandas as pd 24 | 25 | assets = pd.read_csv("ames_train_cleaned.csv") 26 | 27 | def is_new(col): 28 | return col > 1970 29 | 30 | assets["is_new"] = is_new(assets["Year_Built"]) 31 | y = assets["is_new"] 32 | pickle.dump(y, open("pickle-sample.pkl", "wb")) 33 | -------------------------------------------------------------------------------- /jupyterlab-workspaces/default-37a8.jupyterlab-workspace: -------------------------------------------------------------------------------- 1 | {"data":{"layout-restorer:data":{"main":{"dock":{"type":"tab-area","currentIndex":0,"widgets":["notebook:examples/Demo_1_Preprocessing.ipynb","notebook:examples/Demo_2_Modeling.ipynb"]},"current":"notebook:examples/Demo_1_Preprocessing.ipynb"},"down":{"size":0,"widgets":[]},"left":{"collapsed":true,"widgets":["filebrowser","running-sessions","@jupyterlab/toc:plugin","extensionmanager.main-view"]},"right":{"collapsed":true,"widgets":["jp-property-inspector","debugger-sidebar"]},"relativeSizes":[0,1,0]},"file-browser-filebrowser:cwd":{"path":"examples"},"cloned-outputs:examples/Preprocessing.ipynb:0":{"data":{"path":"examples/Preprocessing.ipynb","index":0}},"notebook:examples/Demo_1_Preprocessing.ipynb":{"data":{"path":"examples/Demo_1_Preprocessing.ipynb","factory":"Notebook"}},"notebook:examples/Demo_2_Modeling.ipynb":{"data":{"path":"examples/Demo_2_Modeling.ipynb","factory":"Notebook"}}},"metadata":{"id":"default"}} -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | alembic==1.8.0 2 | altair==4.2.0 3 | astor==0.8.1 4 | astpretty==2.1.0 5 | asttokens==2.1.0 6 | black==22.3.0 7 | click==8.1.2 8 | cloudpickle==2.2.0 9 | coveralls==3.3.1 10 | fastparquet==0.8.0 11 | flake8==4.0.1 12 | graphviz==0.19.2 13 | isort==5.10.1 14 | jupyterlab==3.3.3 15 | matplotlib==3.5.1 16 | mypy==0.931 17 | nbconvert==6.5.1 18 | nbformat==5.3.0 19 | nbval==0.9.6 20 | networkx==2.6.3 21 | pandas==1.3.5 22 | pandoc==2.2 23 | pdbpp==0.10.3 24 | pg==0.1 25 | Pillow==9.1.1 26 | pre-commit==2.18.1 27 | psycopg2-binary==2.9.5 28 | pydantic==1.9.0 29 | pytest==6.2.5 30 | pytest-alembic==0.8.2 31 | pytest-cov==3.0.0 32 | pytest-virtualenv==1.7.0 33 | pytest-xdist==2.5.0 34 | requests==2.27.1 35 | rich==12.2.0 36 | scikit-learn==1.0.2 37 | scipy==1.7.3 38 | scour==0.38.2 39 | seaborn==0.11.2 40 | SQLAlchemy==1.4.35 41 | syrupy==1.4.5 42 | types-mock==4.0.15 43 | types-PyYAML==6.0.5 44 | types-requests==2.27.16 45 | typing-extensions==4.4.0 46 | -------------------------------------------------------------------------------- /tests/end_to_end/__snapshots__/test_op/test_invert.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | from pathlib import * 3 | from lineapy.data.types import * 4 | from lineapy.utils.utils import get_new_id 5 | 6 | source_1 = SourceCode( 7 | code="""a = 1 8 | b=~a""", 9 | location=PosixPath("[source file path]"), 10 | ) 11 | call_1 = CallNode( 12 | source_location=SourceLocation( 13 | lineno=2, 14 | col_offset=2, 15 | end_lineno=2, 16 | end_col_offset=4, 17 | source_code=source_1.id, 18 | ), 19 | function_id=LookupNode( 20 | name="invert", 21 | ).id, 22 | positional_args=[ 23 | LiteralNode( 24 | source_location=SourceLocation( 25 | lineno=1, 26 | col_offset=4, 27 | end_lineno=1, 28 | end_col_offset=5, 29 | source_code=source_1.id, 30 | ), 31 | value=1, 32 | ).id 33 | ], 34 | ) 35 | -------------------------------------------------------------------------------- /tests/end_to_end/__snapshots__/test_op/test_not.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | from pathlib import * 3 | from lineapy.data.types import * 4 | from lineapy.utils.utils import get_new_id 5 | 6 | source_1 = SourceCode( 7 | code="""a = 1 8 | b=not a""", 9 | location=PosixPath("[source file path]"), 10 | ) 11 | call_1 = CallNode( 12 | source_location=SourceLocation( 13 | lineno=2, 14 | col_offset=2, 15 | end_lineno=2, 16 | end_col_offset=7, 17 | source_code=source_1.id, 18 | ), 19 | function_id=LookupNode( 20 | name="not_", 21 | ).id, 22 | positional_args=[ 23 | LiteralNode( 24 | source_location=SourceLocation( 25 | lineno=1, 26 | col_offset=4, 27 | end_lineno=1, 28 | end_col_offset=5, 29 | source_code=source_1.id, 30 | ), 31 | value=1, 32 | ).id 33 | ], 34 | ) 35 | -------------------------------------------------------------------------------- /docs/mkdocs/concepts/pipeline.md: -------------------------------------------------------------------------------- 1 | # Pipeline 2 | 3 | In the context of data science, a pipeline refers to a series of steps that transform 4 | data into useful information/product. For instance, a common end-to-end machine learning 5 | pipeline includes data preprocessing, model training, and model evaluation steps. These 6 | pipelines are often developed one component at a time. Once the individual components are 7 | developed, they are connected to form an end-to-end pipeline. 8 | 9 | In LineaPy, each component is represented as an artifact, and LineaPy provides APIs to create 10 | pipelines from a group of artifacts. These pipelines can then be run through specific orchestration 11 | engines to handle new data. 12 | 13 | Note that the pipelines created by LineaPy are meant to be reviewed and accepted by developers 14 | before they go into production, and we provide mechanisms to verify the generated pipelines in 15 | the development environment for validation. 16 | -------------------------------------------------------------------------------- /tests/outputs/expected/sliced_housing_multiple_w_dependencies.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | 4 | def p_value(): 5 | import pandas as pd 6 | from sklearn.ensemble import RandomForestClassifier 7 | 8 | assets = pd.read_csv("ames_train_cleaned.csv") 9 | 10 | def is_new(col): 11 | return col > 1970 12 | 13 | assets["is_new"] = is_new(assets["Year_Built"]) 14 | clf = RandomForestClassifier(random_state=0) 15 | y = assets["is_new"] 16 | x = assets[["SalePrice", "Lot_Area", "Garage_Area"]] 17 | clf.fit(x, y) 18 | p = clf.predict([[100 * 1000, 10, 4]]) 19 | pickle.dump(p, open("pickle-sample.pkl", "wb")) 20 | 21 | 22 | def y(): 23 | import pandas as pd 24 | 25 | assets = pd.read_csv("ames_train_cleaned.csv") 26 | 27 | def is_new(col): 28 | return col > 1970 29 | 30 | assets["is_new"] = is_new(assets["Year_Built"]) 31 | y = assets["is_new"] 32 | pickle.dump(y, open("pickle-sample.pkl", "wb")) 33 | -------------------------------------------------------------------------------- /tests/end_to_end/__snapshots__/test_var_aliasing/test_variable_alias.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | from pathlib import * 3 | from lineapy.data.types import * 4 | from lineapy.utils.utils import get_new_id 5 | 6 | source_1 = SourceCode( 7 | code="""a = 1.2 8 | b = a 9 | """, 10 | location=PosixPath("[source file path]"), 11 | ) 12 | call_1 = CallNode( 13 | source_location=SourceLocation( 14 | lineno=2, 15 | col_offset=0, 16 | end_lineno=2, 17 | end_col_offset=5, 18 | source_code=source_1.id, 19 | ), 20 | function_id=LookupNode( 21 | name="l_alias", 22 | ).id, 23 | positional_args=[ 24 | LiteralNode( 25 | source_location=SourceLocation( 26 | lineno=1, 27 | col_offset=4, 28 | end_lineno=1, 29 | end_col_offset=7, 30 | source_code=source_1.id, 31 | ), 32 | value=1.2, 33 | ).id 34 | ], 35 | ) 36 | -------------------------------------------------------------------------------- /tests/end_to_end/test_delete.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | """ 4 | Test the three parts of #95, to cover the Delete AST node 5 | 6 | https://docs.python.org/3/library/ast.html#ast.Delete 7 | """ 8 | 9 | 10 | @pytest.mark.xfail(reason="dont support deleting a variable") 11 | def test_del_var(execute): 12 | 13 | res = execute("a = 1; del a") 14 | assert "a" not in res.values 15 | 16 | 17 | def test_del_subscript(execute): 18 | """ 19 | Part of #95 20 | """ 21 | res = execute("a = [1]; del a[0]") 22 | assert res.values["a"] == [] 23 | 24 | 25 | def test_set_attr(execute): 26 | res = execute("import types; x = types.SimpleNamespace(); x.hi = 1") 27 | assert res.values["x"].hi == 1 28 | 29 | 30 | def test_del_attribute(execute): 31 | """ 32 | Part of #95 33 | """ 34 | res = execute( 35 | "import types; x = types.SimpleNamespace(); x.hi = 1; del x.hi", 36 | ) 37 | x = res.values["x"] 38 | assert not hasattr(x, "hi") 39 | -------------------------------------------------------------------------------- /tests/integration/slices/pandas_stats.py: -------------------------------------------------------------------------------- 1 | # This is the manual slice of: 2 | # weekly 3 | # from file: 4 | # sources/pandas_exercises/06_Stats/Wind_Stats/Exercises_with_solutions.ipynb 5 | 6 | # To verify that linea produces the same slice, run: 7 | # pytest -m integration --runxfail -vv 'tests/integration/test_slice.py::test_slice[pandas_stats]' 8 | 9 | import datetime 10 | 11 | import pandas as pd 12 | 13 | data_url = "https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/06_Stats/Wind_Stats/wind.data" 14 | data = pd.read_csv(data_url, sep="\\s+", parse_dates=[[0, 1, 2]]) 15 | 16 | 17 | def fix_century(x): 18 | year = x.year - 100 if x.year > 1989 else x.year 19 | return datetime.date(year, x.month, x.day) 20 | 21 | 22 | data["Yr_Mo_Dy"] = data["Yr_Mo_Dy"].apply(fix_century) 23 | data["Yr_Mo_Dy"] = pd.to_datetime(data["Yr_Mo_Dy"]) 24 | data = data.set_index("Yr_Mo_Dy") 25 | weekly = data.resample("W").agg(["min", "max", "mean", "std"]) 26 | linea_artifact_value = weekly 27 | -------------------------------------------------------------------------------- /lineapy/_alembic/versions/41a413504720_add_named_var.py: -------------------------------------------------------------------------------- 1 | """add named var 2 | 3 | Revision ID: 41a413504720 4 | Revises: 38d5f834d3b7 5 | Create Date: 2022-07-06 14:14:42.354458 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = "41a413504720" 14 | down_revision = "38d5f834d3b7" 15 | branch_labels = None 16 | depends_on = None 17 | 18 | 19 | def upgrade() -> None: 20 | # ### commands auto generated by Alembic - please adjust! ### 21 | op.create_table( 22 | "assigned_variable_node", 23 | sa.Column("id", sa.String(), nullable=False), 24 | sa.Column("variable_name", sa.String(), nullable=False), 25 | sa.PrimaryKeyConstraint("id", "variable_name"), 26 | ) 27 | # ### end Alembic commands ### 28 | 29 | 30 | def downgrade() -> None: 31 | # ### commands auto generated by Alembic - please adjust! ### 32 | op.drop_table("assigned_variable_node") 33 | # ### end Alembic commands ### 34 | -------------------------------------------------------------------------------- /tests/integration/__snapshots__/test_slice/test_slice[sklearn_compose_plot_feature_union].py: -------------------------------------------------------------------------------- 1 | from sklearn.datasets import load_iris 2 | from sklearn.decomposition import PCA 3 | from sklearn.feature_selection import SelectKBest 4 | from sklearn.model_selection import GridSearchCV 5 | from sklearn.pipeline import FeatureUnion, Pipeline 6 | from sklearn.svm import SVC 7 | 8 | iris = load_iris() 9 | X, y = iris.data, iris.target 10 | pca = PCA(n_components=2) 11 | selection = SelectKBest(k=1) 12 | combined_features = FeatureUnion([("pca", pca), ("univ_select", selection)]) 13 | X_features = combined_features.fit(X, y).transform(X) 14 | svm = SVC(kernel="linear") 15 | pipeline = Pipeline([("features", combined_features), ("svm", svm)]) 16 | param_grid = dict( 17 | features__pca__n_components=[1, 2, 3], 18 | features__univ_select__k=[1, 2], 19 | svm__C=[0.1, 1, 10], 20 | ) 21 | grid_search = GridSearchCV(pipeline, param_grid=param_grid, verbose=10) 22 | grid_search.fit(X, y) 23 | linea_artifact_value = grid_search 24 | -------------------------------------------------------------------------------- /tests/end_to_end/test_dask.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from lineapy.utils.utils import prettify 4 | 5 | dask = pytest.importorskip("dask") 6 | 7 | 8 | def test_dask_read_csv(execute): 9 | code = """import dask.dataframe as dd 10 | df = dd.read_csv('tests/simple_data.csv') 11 | """ 12 | res = execute(code, artifacts=["df"]) 13 | assert res.values["df"]["a"].sum().compute() == 25 14 | 15 | 16 | def test_dask_to_csv(execute): 17 | code = """import dask.dataframe as dd 18 | df = dd.read_csv('tests/simple_data.csv') 19 | df.to_csv('tests/simple_data_dask.csv') 20 | """ 21 | res = execute(code, artifacts=["lineapy.file_system"]) 22 | assert res.artifacts["lineapy.file_system"] == prettify(code) 23 | 24 | 25 | def test_dask_pop(execute): 26 | code = """import dask.dataframe as dd 27 | df = dd.read_csv('tests/simple_data.csv') 28 | df.pop('a') 29 | """ 30 | res = execute(code, artifacts=["df"]) 31 | assert res.values["df"].columns == ["b"] 32 | assert res.artifacts["df"] == prettify(code) 33 | -------------------------------------------------------------------------------- /tests/integration/slices/sklearn_multioutput_plot_classifier_chain_yeast.py: -------------------------------------------------------------------------------- 1 | # This is the manual slice of: 2 | # chains 3 | # from file: 4 | # sources/scikit-learn/examples/multioutput/plot_classifier_chain_yeast.py 5 | 6 | # To verify that linea produces the same slice, run: 7 | # pytest -m integration --runxfail -vv 'tests/integration/test_slice.py::test_slice[sklearn_multioutput_plot_classifier_chain_yeast]' 8 | 9 | from sklearn.datasets import fetch_openml 10 | from sklearn.linear_model import LogisticRegression 11 | from sklearn.model_selection import train_test_split 12 | from sklearn.multioutput import ClassifierChain 13 | 14 | X, Y = fetch_openml("yeast", version=4, return_X_y=True) 15 | Y = Y == "TRUE" 16 | X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0) 17 | base_lr = LogisticRegression() 18 | chains = [ClassifierChain(base_lr, order="random", random_state=i) for i in range(10)] 19 | for chain in chains: 20 | chain.fit(X_train, Y_train) 21 | linea_artifact_value = chains 22 | -------------------------------------------------------------------------------- /lineapy/transformer/source_giver.py: -------------------------------------------------------------------------------- 1 | import ast 2 | 3 | 4 | class SourceGiver: 5 | def transform(self, nodes: ast.Module) -> None: 6 | """ 7 | This call should only happen once asttoken has run its magic 8 | and embellished the ast with tokens and line numbers. 9 | At that point, all this function will do is use those tokens to 10 | figure out end_lineno and end_col_offset for every node in the tree 11 | """ 12 | node: ast.AST 13 | # TODO check if the ast type is a Module instead of simply relying on mypy 14 | for node in ast.walk(nodes): 15 | if not hasattr(node, "lineno"): 16 | continue 17 | 18 | if hasattr(node, "last_token"): 19 | node.end_lineno = node.last_token.end[0] # type: ignore 20 | node.end_col_offset = node.last_token.end[1] # type: ignore 21 | # if isinstance(node, ast.ListComp): 22 | node.col_offset = node.first_token.start[1] # type: ignore 23 | -------------------------------------------------------------------------------- /tests/integration/slices/sklearn_model_selection_plot_randomized_search.py: -------------------------------------------------------------------------------- 1 | # This is the manual slice of: 2 | # grid_search 3 | # from file: 4 | # sources/scikit-learn/examples/model_selection/plot_randomized_search.py 5 | 6 | # To verify that linea produces the same slice, run: 7 | # pytest -m integration --runxfail -vv 'tests/integration/test_slice.py::test_slice[sklearn_model_selection_plot_randomized_search]' 8 | 9 | import numpy as np 10 | from sklearn.datasets import load_digits 11 | from sklearn.linear_model import SGDClassifier 12 | from sklearn.model_selection import GridSearchCV, RandomizedSearchCV 13 | 14 | X, y = load_digits(return_X_y=True, n_class=3) 15 | clf = SGDClassifier(loss="hinge", penalty="elasticnet", fit_intercept=True) 16 | param_grid = { 17 | "average": [True, False], 18 | "l1_ratio": np.linspace(0, 1, num=10), 19 | "alpha": np.power(10, np.arange(-2, 1, dtype=float)), 20 | } 21 | grid_search = GridSearchCV(clf, param_grid=param_grid) 22 | grid_search.fit(X, y) 23 | linea_artifact_value = grid_search 24 | -------------------------------------------------------------------------------- /tests/integration/slices/xgboost_sklearn_examples.py: -------------------------------------------------------------------------------- 1 | # This is the manual slice of: 2 | # lineapy.file_system 3 | # from file: 4 | # sources/xgboost/demo/guide-python/sklearn_examples.py 5 | 6 | # To verify that linea produces the same slice, run: 7 | # pytest -m integration --runxfail -vv 'tests/integration/test_slice.py::test_slice[xgboost_sklearn_examples]' 8 | 9 | """ 10 | Collection of examples for using sklearn interface 11 | ================================================== 12 | 13 | Created on 1 Apr 2015 14 | 15 | @author: Jamie Hall 16 | """ 17 | import pickle 18 | import xgboost as xgb 19 | from sklearn.model_selection import GridSearchCV 20 | from sklearn.datasets import fetch_california_housing 21 | 22 | X, y = fetch_california_housing(return_X_y=True) 23 | xgb_model = xgb.XGBRegressor(n_jobs=1) 24 | clf = GridSearchCV( 25 | xgb_model, 26 | {"max_depth": [2, 4, 6], "n_estimators": [50, 100, 200]}, 27 | verbose=1, 28 | n_jobs=1, 29 | ) 30 | clf.fit(X, y) 31 | pickle.dump(clf, open("best_calif.pkl", "wb")) 32 | -------------------------------------------------------------------------------- /tests/unit/utils/test_config.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from fsspec.core import url_to_fs 4 | from fsspec.implementations.local import LocalFileSystem 5 | 6 | from lineapy.utils.config import options 7 | 8 | 9 | def test_artifact_storage_dir_type(): 10 | """ 11 | Making sure the path we are setting is correct typing, so pandas.io.common.get_handler can process it correctly. 12 | """ 13 | old_artifact_storage_dir = options.safe_get("artifact_storage_dir") 14 | options.set( 15 | "artifact_storage_dir", 16 | "/tmp/somelineapytestprefix/", 17 | ) 18 | assert isinstance( 19 | url_to_fs(str(options.safe_get("artifact_storage_dir")))[0], 20 | LocalFileSystem, 21 | ) 22 | 23 | options.set( 24 | "artifact_storage_dir", 25 | Path("~").expanduser().resolve(), 26 | ) 27 | assert isinstance( 28 | url_to_fs(str(options.safe_get("artifact_storage_dir")))[0], 29 | LocalFileSystem, 30 | ) 31 | 32 | options.set("artifact_storage_dir", old_artifact_storage_dir) 33 | -------------------------------------------------------------------------------- /tests/unit/db/test_db_utils.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from lineapy.db.utils import parse_artifact_version 4 | 5 | 6 | def test_parse_artifact_version(): 7 | cases = ( 8 | (-1, False, None), 9 | (-102, False, None), 10 | (1, True, 1), 11 | (0, True, 0), 12 | (2, True, 2), 13 | (3, True, 3), 14 | (4, True, 4), 15 | (5, True, 5), 16 | (0.3, True, 0), 17 | (3.0, True, 3), 18 | (1.0, True, 1), 19 | ("all", True, "all"), 20 | ("latest", True, "latest"), 21 | ("al", False, None), 22 | ("lattest", False, None), 23 | ("1", True, 1), 24 | ("3", True, 3), 25 | ("5", True, 5), 26 | ("0.3", True, 0), 27 | ("1.1", True, 1), 28 | ) 29 | for version, is_valid, expected in cases: 30 | if is_valid: 31 | assert parse_artifact_version(version) == expected 32 | else: 33 | print(version) 34 | with pytest.raises(ValueError): 35 | parse_artifact_version(version) 36 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish 2 | 3 | on: 4 | push: 5 | branches: 6 | - "v[0-9]+.[0-9]+.x" 7 | tags: 8 | - "v[0-9]+.[0-9]+.[0-9]+" 9 | 10 | jobs: 11 | build-n-publish: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v2 15 | with: 16 | lfs: true 17 | - name: Set up Python 3.9 18 | uses: actions/setup-python@v2 19 | with: 20 | python-version: 3.9 21 | - name: Install dependencies 22 | run: | 23 | python setup.py install && rm -rf build dist 24 | - name: Build Wheels 25 | run: | 26 | pip install wheel && python setup.py sdist bdist_wheel 27 | - name: Check build 28 | run: | 29 | pip install twine && twine check dist/* 30 | - name: Publish package 31 | if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags') 32 | uses: pypa/gh-action-pypi-publish@release/v1 33 | with: 34 | user: __token__ 35 | password: ${{ secrets.PYPI_API_TOKEN }} -------------------------------------------------------------------------------- /tests/integration/slices/pandas_merge.py: -------------------------------------------------------------------------------- 1 | # This is the manual slice of: 2 | # all_data_col 3 | # from file: 4 | # sources/pandas_exercises/05_Merge/Fictitous Names/Exercises_with_solutions.ipynb 5 | 6 | # To verify that linea produces the same slice, run: 7 | # pytest -m integration --runxfail -vv 'tests/integration/test_slice.py::test_slice[pandas_merge]' 8 | 9 | import pandas as pd 10 | 11 | raw_data_1 = { 12 | "subject_id": ["1", "2", "3", "4", "5"], 13 | "first_name": ["Alex", "Amy", "Allen", "Alice", "Ayoung"], 14 | "last_name": ["Anderson", "Ackerman", "Ali", "Aoni", "Atiches"], 15 | } 16 | raw_data_2 = { 17 | "subject_id": ["4", "5", "6", "7", "8"], 18 | "first_name": ["Billy", "Brian", "Bran", "Bryce", "Betty"], 19 | "last_name": ["Bonder", "Black", "Balwner", "Brice", "Btisan"], 20 | } 21 | data1 = pd.DataFrame(raw_data_1, columns=["subject_id", "first_name", "last_name"]) 22 | data2 = pd.DataFrame(raw_data_2, columns=["subject_id", "first_name", "last_name"]) 23 | all_data_col = pd.concat([data1, data2], axis=1) 24 | linea_artifact_value = all_data_col 25 | -------------------------------------------------------------------------------- /lineapy/utils/validate_annotation_spec.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Validate the annotations.yaml files in the instrumentation directory. 4 | """ 5 | import json # for pretty printing dicts 6 | from pathlib import Path 7 | from typing import Any, List 8 | 9 | import pydantic 10 | import yaml 11 | 12 | from lineapy.instrumentation.annotation_spec import ModuleAnnotation 13 | 14 | 15 | def validate_spec(spec_file: Path) -> List[Any]: 16 | """ 17 | Validate all '.annotations.yaml' files at path 18 | and return all invalid items. 19 | 20 | Throws yaml.YAMLError 21 | """ 22 | invalid_specs: List[Any] = [] 23 | with open(spec_file, "r") as f: 24 | doc = yaml.safe_load(f) 25 | 26 | for item in doc: 27 | print( 28 | "Module specification: {}\n".format(json.dumps(item, indent=4)) 29 | ) 30 | 31 | try: 32 | a = ModuleAnnotation(**item) 33 | except pydantic.error_wrappers.ValidationError as e: 34 | invalid_specs.append(item) 35 | return invalid_specs 36 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # syntax=docker/dockerfile:1.2 2 | # Pin syntax as Docker reccomens 3 | # https://docs.docker.com/language/python/build-images/#create-a-dockerfile-for-python 4 | FROM python:3.9-slim 5 | 6 | RUN apt-get update && apt-get -y install git graphviz make libpq-dev gcc && \ 7 | curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | bash \ 8 | && apt-get install git-lfs && git lfs install && apt clean && apt-get autoclean && apt-get autoremove 9 | 10 | WORKDIR /usr/src/base 11 | 12 | # small hack to not keep building all the time 13 | COPY ./setup.py ./ 14 | COPY ./README.md ./ 15 | COPY ./lineapy/__init__.py ./lineapy/ 16 | COPY ./requirements.txt ./ 17 | COPY ./test_pipeline_airflow_req.txt ./ 18 | COPY ./Makefile ./ 19 | 20 | ENV AIRFLOW_HOME=/usr/src/airflow_home 21 | ENV AIRFLOW_VENV=/usr/src/airflow_venv 22 | 23 | #RUN mkdir /usr/src/airflow_home 24 | RUN pip --disable-pip-version-check install -r requirements.txt && make airflow_venv && pip cache purge 25 | 26 | COPY . . 27 | 28 | RUN python setup.py install && rm -rf build 29 | 30 | CMD [ "lineapy" ] 31 | -------------------------------------------------------------------------------- /lineapy/plugins/jinja_templates/ray/ray_dag_base.jinja: -------------------------------------------------------------------------------- 1 | import {{ MODULE_NAME }} 2 | import ray 3 | import pickle 4 | import pathlib 5 | 6 | ray.init(runtime_env = {{RAY_RUNTIME_ENV}}, storage = "{{RAY_STORAGE}}") 7 | 8 | {% for task_def in task_definitions %} 9 | {{ task_def }} 10 | {% endfor %} 11 | 12 | # Specify argument values for your pipeline run. 13 | pipeline_arguments = {{ dag_params }} 14 | 15 | {% for task_name, task_def in tasks.items() %} 16 | {%- if task_def.return_vars|length > 0 %} 17 | {%- for var in task_def.return_vars %}{{ var }}{{ ',' if not loop.last else '' }}{%- endfor %} 18 | {%- else %} 19 | _ 20 | {%- endif %} = task_{{task_name}}.{%- block bind_or_remote %}{% endblock %}( 21 | {%- for var in task_def.user_input_variables %}pipeline_arguments["{{ var }}"]{{ ',' if not loop.last else '' }}{%- endfor %}{%- if task_def.loaded_input_variables|length > 0 and task_def.user_input_variables|length > 0 %},{%- endif %}{%- for var in task_def.loaded_input_variables %} {{var}} {{ ',' if not loop.last else '' }}{%- endfor %} 22 | ) 23 | {% endfor %} 24 | 25 | {%- block ray_dag_execution %}{% endblock %} -------------------------------------------------------------------------------- /examples/self-hosting-lineapy/lineapy-notebook/notebook-start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright (c) Jupyter Development Team. 3 | # Distributed under the terms of the Modified BSD License. 4 | # LineaPy extensions (c) Linea Labs 5 | 6 | set -e 7 | 8 | # The Jupyter command to launch 9 | # JupyterLab by default 10 | DOCKER_STACKS_JUPYTER_CMD="${DOCKER_STACKS_JUPYTER_CMD:=lab}" 11 | 12 | if [[ -n "${JUPYTERHUB_API_TOKEN}" ]]; then 13 | echo "WARNING: using start-singleuser.sh instead of start-notebook.sh to start a server associated with JupyterHub." 14 | exec /usr/local/bin/start-singleuser.sh "$@" 15 | fi 16 | 17 | wrapper="" 18 | if [[ "${RESTARTABLE}" == "yes" ]]; then 19 | wrapper="run-one-constantly" 20 | fi 21 | 22 | if [[ -f /requirements.txt ]] 23 | then 24 | echo "Installing system requirements." 25 | pip3 install -r /requirements.txt 26 | fi 27 | 28 | # Verify lineapy environment is set up correctly 29 | lineapy python /verify_environment.py 30 | 31 | # shellcheck disable=SC1091,SC2086 32 | exec /usr/local/bin/start.sh ${wrapper} lineapy jupyter ${DOCKER_STACKS_JUPYTER_CMD} "--NotebookApp.token=''" 33 | -------------------------------------------------------------------------------- /PERFORMANCE.md: -------------------------------------------------------------------------------- 1 | # Performance Profiling 2 | 3 | We have had luck using the [py-spy](https://github.com/benfred/py-spy) tool, 4 | which runs your Python script in a separate process and samples it, to 5 | profile our tests to get a rough sense of how long things take: 6 | 7 | ```bash 8 | # Run with sudo so it can inspect the subprocess 9 | sudo py-spy record \ 10 | # Save as speedscope so we can load in the browser 11 | --format speedscope \ 12 | # Group by function name, instead of line number 13 | --function \ 14 | # Increase the sampling rate from 100 to 200 times per second 15 | -r 200 -- pytest tests/ 16 | ``` 17 | 18 | After creating your trace, you can load it [in 19 | Speedscope](https://www.speedscope.app/). 20 | 21 | In this example, we are inspecting calls to `transform`. 22 | We see that it cumulatively takes up 12% of total time and that most of the 23 | time inside of it is spent visiting imports, as well as committing to the DB: 24 | 25 | Screen Shot 2021-10-12 at 2 29 10 PM 26 | -------------------------------------------------------------------------------- /tests/unit/db/test_literal_node.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | 4 | @pytest.mark.parametrize( 5 | "literal_value", 6 | [ 7 | # Need to escape quotes since formatted string will strip these 8 | # and test this as an integer otherwise. 9 | pytest.param("'10'", id="String"), 10 | pytest.param(False, id="Boolean"), 11 | pytest.param(10, id="Int"), 12 | pytest.param(10.0, id="Float"), 13 | pytest.param(None, id="None"), 14 | pytest.param(b"10", id="Bytes"), 15 | ], 16 | ) 17 | def test_literal_node_value(execute, literal_value): 18 | """ 19 | Test that the literal node is serialized and deserialized correctly 20 | to the DB for supported types. 21 | 22 | TODO: Add test case for ellipses. 23 | """ 24 | code = f"""import lineapy 25 | val={literal_value} 26 | art = lineapy.save(val, "val") 27 | """ 28 | res = execute( 29 | code, 30 | snapshot=False, 31 | ) 32 | 33 | art = res.values["art"] 34 | art_val = art.db.get_node_by_id(art.node_id).value 35 | expected_val = res.values["val"] 36 | assert art_val == expected_val 37 | -------------------------------------------------------------------------------- /tests/integration/slices/sklearn_tree_plot_cost_complexity_pruning.py: -------------------------------------------------------------------------------- 1 | # This is the manual slice of: 2 | # depth 3 | # from file: 4 | # sources/scikit-learn/examples/tree/plot_cost_complexity_pruning.py 5 | 6 | # To verify that linea produces the same slice, run: 7 | # pytest -m integration --runxfail -vv 'tests/integration/test_slice.py::test_slice[sklearn_tree_plot_cost_complexity_pruning]' 8 | 9 | from sklearn.datasets import load_breast_cancer 10 | from sklearn.model_selection import train_test_split 11 | from sklearn.tree import DecisionTreeClassifier 12 | 13 | X, y = load_breast_cancer(return_X_y=True) 14 | X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) 15 | clf = DecisionTreeClassifier(random_state=0) 16 | path = clf.cost_complexity_pruning_path(X_train, y_train) 17 | ccp_alphas, impurities = path.ccp_alphas, path.impurities 18 | clfs = [] 19 | for ccp_alpha in ccp_alphas: 20 | clf = DecisionTreeClassifier(random_state=0, ccp_alpha=ccp_alpha) 21 | clf.fit(X_train, y_train) 22 | clfs.append(clf) 23 | clfs = clfs[:-1] 24 | depth = [clf.tree_.max_depth for clf in clfs] 25 | linea_artifact_value = depth 26 | -------------------------------------------------------------------------------- /tests/end_to_end/__snapshots__/test_misc/TestEndToEnd.test_simple.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | from pathlib import * 3 | from lineapy.data.types import * 4 | from lineapy.utils.utils import get_new_id 5 | 6 | source_1 = SourceCode( 7 | code="a = abs(11)", 8 | location=PosixPath("[source file path]"), 9 | ) 10 | call_1 = CallNode( 11 | source_location=SourceLocation( 12 | lineno=1, 13 | col_offset=4, 14 | end_lineno=1, 15 | end_col_offset=11, 16 | source_code=source_1.id, 17 | ), 18 | function_id=LookupNode( 19 | source_location=SourceLocation( 20 | lineno=1, 21 | col_offset=4, 22 | end_lineno=1, 23 | end_col_offset=7, 24 | source_code=source_1.id, 25 | ), 26 | name="abs", 27 | ).id, 28 | positional_args=[ 29 | LiteralNode( 30 | source_location=SourceLocation( 31 | lineno=1, 32 | col_offset=8, 33 | end_lineno=1, 34 | end_col_offset=10, 35 | source_code=source_1.id, 36 | ), 37 | value=11, 38 | ).id 39 | ], 40 | ) 41 | -------------------------------------------------------------------------------- /tests/integration/slices/sklearn_preprocessing_plot_scaling_importance.py: -------------------------------------------------------------------------------- 1 | # This is the manual slice of: 2 | # (unscaled_clf, std_clf) 3 | # from file: 4 | # sources/scikit-learn/examples/preprocessing/plot_scaling_importance.py 5 | 6 | # To verify that linea produces the same slice, run: 7 | # pytest -m integration --runxfail -vv 'tests/integration/test_slice.py::test_slice[sklearn_preprocessing_plot_scaling_importance]' 8 | 9 | from sklearn.datasets import load_wine 10 | from sklearn.decomposition import PCA 11 | from sklearn.model_selection import train_test_split 12 | from sklearn.naive_bayes import GaussianNB 13 | from sklearn.pipeline import make_pipeline 14 | from sklearn.preprocessing import StandardScaler 15 | 16 | RANDOM_STATE = 42 17 | features, target = load_wine(return_X_y=True) 18 | X_train, X_test, y_train, y_test = train_test_split( 19 | features, target, test_size=0.3, random_state=RANDOM_STATE 20 | ) 21 | unscaled_clf = make_pipeline(PCA(n_components=2), GaussianNB()) 22 | unscaled_clf.fit(X_train, y_train) 23 | std_clf = make_pipeline(StandardScaler(), PCA(n_components=2), GaussianNB()) 24 | std_clf.fit(X_train, y_train) 25 | linea_artifact_value = unscaled_clf, std_clf 26 | -------------------------------------------------------------------------------- /tests/unit/migration/test_migrations.py: -------------------------------------------------------------------------------- 1 | from sqlalchemy import text 2 | 3 | 4 | def test_38d5f834d3b7_orig(alembic_engine, alembic_runner): 5 | alembic_runner.migrate_up_to("38d5f834d3b7") 6 | 7 | with alembic_engine.connect() as conn: 8 | assert conn.execute( 9 | text("SELECT name FROM sqlite_master WHERE type='table'") 10 | ).fetchall() == [ 11 | ("alembic_version",), 12 | ("execution",), 13 | ("source_code",), 14 | ("node",), 15 | ("session_context",), 16 | ("artifact",), 17 | ("call_node",), 18 | ("global_node",), 19 | ("import_node",), 20 | ("literal_assign_node",), 21 | ("lookup",), 22 | ("mutate_node",), 23 | ("node_value",), 24 | ("global_reference",), 25 | ("implicit_dependency",), 26 | ("keyword_arg",), 27 | ("positional_arg",), 28 | ] 29 | 30 | assert conn.execute( 31 | text( 32 | "SELECT 1 FROM PRAGMA_TABLE_INFO('session_context') WHERE name='python_version';" 33 | ) 34 | ).fetchall() == [(1,)] 35 | -------------------------------------------------------------------------------- /docs/gen_ref_pages.py: -------------------------------------------------------------------------------- 1 | """ 2 | Generate the code reference pages. 3 | 4 | Adapted from https://mkdocstrings.github.io/recipes/ 5 | """ 6 | 7 | from pathlib import Path 8 | 9 | import mkdocs_gen_files 10 | 11 | nav = mkdocs_gen_files.Nav() 12 | 13 | SKIP_DIRS = ["_alembic"] 14 | 15 | for path in sorted(Path("..", "lineapy").rglob("*.py")): 16 | if not set(path.parts).isdisjoint(SKIP_DIRS): 17 | continue 18 | 19 | module_path = path.relative_to("..").with_suffix("") 20 | doc_path = path.relative_to("..").with_suffix(".md") 21 | full_doc_path = Path("reference", doc_path) 22 | 23 | parts = list(module_path.parts) 24 | 25 | if parts[-1] == "__init__": 26 | parts = parts[:-1] 27 | doc_path = doc_path.with_name("index.md") 28 | full_doc_path = full_doc_path.with_name("index.md") 29 | elif parts[-1] == "__main__": 30 | continue 31 | 32 | nav[parts] = doc_path.as_posix() 33 | 34 | with mkdocs_gen_files.open(full_doc_path, "w") as fd: 35 | identifier = ".".join(parts) 36 | print("::: " + identifier, file=fd) 37 | 38 | with mkdocs_gen_files.open("reference/nav.md", "w") as nav_file: 39 | nav_file.writelines(nav.build_literate_nav()) 40 | -------------------------------------------------------------------------------- /lineapy/plugins/jinja_templates/module/module.jinja: -------------------------------------------------------------------------------- 1 | {% if default_input_parameters|length>0 %} 2 | import argparse 3 | {% endif %} 4 | 5 | {{module_imports}} 6 | 7 | {{artifact_functions}} 8 | 9 | {{session_functions}} 10 | 11 | def run_all_sessions({%- for input_parameter in default_input_parameters %} 12 | {{indentation_block}}{{input_parameter}}, 13 | {%- endfor -%}): 14 | {{indentation_block}}artifacts = dict() 15 | {{module_function_body | indent(4, True)}} 16 | {{indentation_block}}return artifacts 17 | 18 | if __name__ == "__main__": 19 | {{indentation_block}}# Edit this section to customize the behavior of artifacts 20 | {% if default_input_parameters|length>0 -%} 21 | {{indentation_block}}parser = argparse.ArgumentParser() 22 | {% for parser_block in parser_blocks -%} 23 | {{indentation_block}}{{parser_block}} 24 | {% endfor -%} 25 | {{indentation_block}}args = parser.parse_args() 26 | {{indentation_block}}artifacts = run_all_sessions({%- for parser_input_parameter in parser_input_parameters %} 27 | {{indentation_block}}{{indentation_block}}{{parser_input_parameter}}, 28 | {%- endfor -%}) 29 | {% else -%} 30 | {{indentation_block}}artifacts = run_all_sessions() 31 | {% endif -%} 32 | {{indentation_block}}print(artifacts) 33 | -------------------------------------------------------------------------------- /examples/self-hosting-lineapy/lineapy-notebook/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG OWNER=jupyter 2 | # If you need ARM set ARCH to "aarch64-" 3 | ARG ARCH 4 | ARG BASE_CONTAINER=$OWNER/minimal-notebook:${ARCH}latest 5 | FROM $BASE_CONTAINER 6 | 7 | LABEL maintainer="LineaPy Project " 8 | 9 | SHELL ["/bin/bash", "-o", "pipefail", "-c"] 10 | 11 | USER root 12 | 13 | # curl required for health check 14 | RUN apt update -y 15 | RUN apt install -y curl 16 | 17 | # switch back to notebook user so permissions on files are correct 18 | USER $NB_UID 19 | 20 | # prevent pip timing out on slow internet connections 21 | RUN export PIP_DEFAULT_TIMEOUT=1000 22 | 23 | # deps for lineapy 24 | RUN pip install -U pip 25 | RUN pip install fsspec s3fs psycopg2-binary 26 | 27 | # install lineapy 28 | RUN pip install lineapy==0.2.1 29 | 30 | # other nice to have libraries 31 | RUN pip install pandas==1.4.3 scikit-learn==1.1.2 seaborn==0.11.2 32 | 33 | COPY notebook-start.sh /usr/local/bin/notebook-start.sh 34 | 35 | # Make mountpoints for airflow so that they are mounted as non-root 36 | RUN mkdir -p /home/jovyan/work/airflow/dags 37 | RUN mkdir -p /home/jovyan/work/airflow/plugins 38 | 39 | ENTRYPOINT ["tini", "-g", "--"] 40 | CMD ["notebook-start.sh"] 41 | -------------------------------------------------------------------------------- /lineapy/utils/migration.py: -------------------------------------------------------------------------------- 1 | # Code based on https://improveandrepeat.com/2021/09/python-friday-87-handling-pre-existing-tables-with-alembic-and-sqlalchemy/ 2 | # Code based on https://github.com/talkpython/data-driven-web-apps-with-flask 3 | 4 | from alembic import op 5 | from sqlalchemy import engine_from_config, inspect 6 | 7 | from lineapy.utils.config import options 8 | 9 | 10 | def table_exists(table, schema=None): 11 | engine = engine_from_config( 12 | {"sqlalchemy.url": options.database_url}, prefix="sqlalchemy." 13 | ) 14 | insp = inspect(engine) 15 | return insp.has_table(table, schema) 16 | 17 | 18 | def ensure_table(name, *args, **kwargs): 19 | if not table_exists(name): 20 | op.create_table(name, *args, **kwargs) 21 | 22 | 23 | def table_has_column(table, column): 24 | engine = engine_from_config( 25 | {"sqlalchemy.url": options.database_url}, prefix="sqlalchemy." 26 | ) 27 | insp = inspect(engine) 28 | return any([column == col["name"] for col in insp.get_columns(table)]) 29 | 30 | 31 | def ensure_column(table_name, column, *args, **kwargs): 32 | if not table_has_column(table_name, column.name): 33 | op.add_column(table_name, column, *args, **kwargs) 34 | -------------------------------------------------------------------------------- /tests/end_to_end/__snapshots__/test_var_aliasing/test_alias_by_value.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | from pathlib import * 3 | from lineapy.data.types import * 4 | from lineapy.utils.utils import get_new_id 5 | 6 | source_1 = SourceCode( 7 | code="""a = 0 8 | b = a 9 | a = 2 10 | """, 11 | location=PosixPath("[source file path]"), 12 | ) 13 | call_1 = CallNode( 14 | source_location=SourceLocation( 15 | lineno=2, 16 | col_offset=0, 17 | end_lineno=2, 18 | end_col_offset=5, 19 | source_code=source_1.id, 20 | ), 21 | function_id=LookupNode( 22 | name="l_alias", 23 | ).id, 24 | positional_args=[ 25 | LiteralNode( 26 | source_location=SourceLocation( 27 | lineno=1, 28 | col_offset=4, 29 | end_lineno=1, 30 | end_col_offset=5, 31 | source_code=source_1.id, 32 | ), 33 | value=0, 34 | ).id 35 | ], 36 | ) 37 | literal_2 = LiteralNode( 38 | source_location=SourceLocation( 39 | lineno=3, 40 | col_offset=4, 41 | end_lineno=3, 42 | end_col_offset=5, 43 | source_code=source_1.id, 44 | ), 45 | value=2, 46 | ) 47 | -------------------------------------------------------------------------------- /tests/end_to_end/test_classdef.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | 4 | def test_basic_classdef(execute): 5 | code = """class A(): 6 | def __init__(self, varname:str): 7 | self.varname = varname 8 | a = A("myclass") 9 | """ 10 | res = execute(code) 11 | assert res.values["a"].varname == "myclass" 12 | 13 | 14 | GLOBAL_MUTATE_CODE = """new_value="newval" 15 | class A(): 16 | def __init__(self, initialname:str): 17 | self.varname = initialname 18 | def update_name(newname:str): 19 | self.varname = newname 20 | 21 | class Modifier(): 22 | def modify_A(self,classinstance): 23 | classinstance.varname = new_value 24 | 25 | a = A("origvalue") 26 | b = Modifier() 27 | b.modify_A(a) 28 | """ 29 | 30 | 31 | def test_mutate_classvar_values(execute): 32 | res = execute(GLOBAL_MUTATE_CODE) 33 | assert res.values["a"].varname == "newval" 34 | 35 | 36 | @pytest.mark.xfail( 37 | reason="slicing calls to class's functions arent parsed \ 38 | since classes are blackboxes right now." 39 | ) 40 | def test_mutate_classvar_slice(execute): 41 | res = execute(GLOBAL_MUTATE_CODE, artifacts=["a", "b"]) 42 | assert res.artifacts["a"] == GLOBAL_MUTATE_CODE 43 | assert res.artifacts["b"] == GLOBAL_MUTATE_CODE 44 | -------------------------------------------------------------------------------- /lineapy/plugins/loader.py: -------------------------------------------------------------------------------- 1 | import importlib.util 2 | import sys 3 | import tempfile 4 | from importlib.abc import Loader 5 | from pathlib import Path 6 | 7 | from lineapy.plugins.base_pipeline_writer import BasePipelineWriter 8 | from lineapy.utils.utils import prettify 9 | 10 | 11 | def load_as_module(writer: BasePipelineWriter): 12 | """ 13 | Writing module text to a temp file and load module with names of 14 | ``session_art1_art2_...``` 15 | """ 16 | 17 | module_name = f"session_{'_'.join(writer.artifact_collection.session_artifacts.keys())}" 18 | temp_folder = tempfile.mkdtemp() 19 | temp_module_path = Path(temp_folder, f"{module_name}.py") 20 | 21 | with open(temp_module_path, "w") as f: 22 | f.writelines(prettify(writer._compose_module())) 23 | 24 | spec = importlib.util.spec_from_file_location( 25 | module_name, temp_module_path 26 | ) 27 | if spec is not None: 28 | session_module = importlib.util.module_from_spec(spec) 29 | assert isinstance(spec.loader, Loader) 30 | sys.modules["module.name"] = session_module 31 | spec.loader.exec_module(session_module) 32 | return session_module 33 | else: 34 | raise Exception("LineaPy cannot retrive a module.") 35 | -------------------------------------------------------------------------------- /lineapy/visualizer/optimize_svg.py: -------------------------------------------------------------------------------- 1 | """ 2 | Optimizes an SVG file to reduce the size in the notebook. 3 | """ 4 | 5 | import subprocess 6 | import tempfile 7 | from pathlib import Path 8 | 9 | # https://github.com/scour-project/scour#usage 10 | OPTIONS = [ 11 | "--strip-xml-prolog", 12 | "--remove-titles", 13 | "--remove-descriptions", 14 | "--remove-metadata", 15 | "--remove-descriptive-elements", 16 | "--enable-comment-stripping", 17 | "--no-line-breaks", 18 | "--enable-id-stripping", 19 | "--shorten-ids", 20 | "--create-groups", 21 | ] 22 | 23 | 24 | def optimize_svg(svg: str) -> str: 25 | # Calls optimize in subprocess to avoid needing to tie ourselves 26 | # to scours's internal Python API, which is likely less stable and not 27 | # documented. 28 | with tempfile.TemporaryDirectory() as tmpdirname: 29 | tmp_dir = Path(tmpdirname) 30 | input_path = tmp_dir / "input.svg" 31 | output_path = tmp_dir / "output.svg" 32 | input_path.write_text(svg) 33 | subprocess.run( 34 | ["scour", "-i", str(input_path), "-o", str(output_path)] + OPTIONS, 35 | capture_output=True, 36 | check=True, 37 | ) 38 | return output_path.read_text() 39 | -------------------------------------------------------------------------------- /tests/test_globals_dict.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | 3 | import pytest 4 | 5 | from lineapy.execution.globals_dict import GlobalsDict, GlobalsDictResult 6 | 7 | 8 | @pytest.mark.parametrize( 9 | "code,inputs,accessed_inputs,added_or_modified", 10 | ( 11 | pytest.param("x", {"x": 1}, ["x"], {}, id="load input"), 12 | pytest.param("x = 1", {}, [], {"x": 1}, id="save output"), 13 | pytest.param("x = 1", {"x": 2}, [], {"x": 1}, id="overwrite input"), 14 | pytest.param( 15 | "x += 1", {"x": 1}, ["x"], {"x": 2}, id="ovewrite and access input" 16 | ), 17 | pytest.param( 18 | "x = 2\nx", {"x": 1}, [], {"x": 2}, id="read after write" 19 | ), 20 | ), 21 | ) 22 | def test_results( 23 | code: str, inputs: Dict[str, object], accessed_inputs, added_or_modified 24 | ): 25 | g = GlobalsDict() 26 | g.setup_globals(inputs) 27 | b = compile(code, "", "exec") 28 | exec(b, g) 29 | intended_res = GlobalsDictResult(accessed_inputs, added_or_modified) 30 | assert g.teardown_globals() == intended_res 31 | 32 | # Try again to make sure it works second time 33 | g.setup_globals(inputs) 34 | exec(b, g) 35 | assert g.teardown_globals() == intended_res 36 | -------------------------------------------------------------------------------- /tests/integration/slices/matplotlib_dash_joinstyle.py: -------------------------------------------------------------------------------- 1 | # This is the manual slice of: 2 | # lineapy.file_system 3 | # from file: 4 | # sources/matplotlib-tutorial/scripts/dash_joinstyle.py 5 | 6 | # To verify that linea produces the same slice, run: 7 | # pytest -m integration --runxfail -vv 'tests/integration/test_slice.py::test_slice[matplotlib_dash_joinstyle]' 8 | 9 | import matplotlib.pyplot as plt 10 | import numpy as np 11 | 12 | size = 256, 16 13 | dpi = 72.0 14 | figsize = size[0] / float(dpi), size[1] / float(dpi) 15 | fig = plt.figure(figsize=figsize, dpi=dpi) 16 | fig.patch.set_alpha(0) 17 | plt.axes([0, 0, 1, 1], frameon=False) 18 | plt.plot( 19 | np.arange(3), 20 | [0, 1, 0], 21 | color="blue", 22 | dashes=[12, 5], 23 | linewidth=8, 24 | dash_joinstyle="miter", 25 | ) 26 | plt.plot( 27 | 4 + np.arange(3), 28 | [0, 1, 0], 29 | color="blue", 30 | dashes=[12, 5], 31 | linewidth=8, 32 | dash_joinstyle="bevel", 33 | ) 34 | plt.plot( 35 | 8 + np.arange(3), 36 | [0, 1, 0], 37 | color="blue", 38 | dashes=[12, 5], 39 | linewidth=8, 40 | dash_joinstyle="round", 41 | ) 42 | plt.xlim(0, 12), plt.ylim(-1, 2) 43 | plt.xticks([]), plt.yticks([]) 44 | plt.savefig("../figures/dash_joinstyle.png", dpi=dpi) 45 | -------------------------------------------------------------------------------- /lineapy/utils/analytics/utils.py: -------------------------------------------------------------------------------- 1 | from functools import wraps 2 | from typing import Callable, TypeVar, cast 3 | 4 | from lineapy.data.types import LineaID 5 | from lineapy.db.db import RelationalLineaDB 6 | from lineapy.instrumentation.annotation_spec import ExternalState 7 | from lineapy.utils.analytics.event_schemas import LibImportEvent 8 | from lineapy.utils.analytics.usage_tracking import do_not_track, track 9 | 10 | C = TypeVar("C", bound=Callable) 11 | 12 | 13 | def allow_do_not_track(fn: C) -> C: 14 | @wraps(fn) 15 | def decorator(*args, **kwargs): 16 | if do_not_track(): 17 | return 18 | return fn(*args, **kwargs) 19 | 20 | return cast(C, decorator) 21 | 22 | 23 | # checking earlier to avoid doing extra DB query work 24 | @allow_do_not_track 25 | def send_lib_info_from_db(db: RelationalLineaDB, session_id: LineaID): 26 | import_nodes = db.get_libraries_for_session(session_id) 27 | [ 28 | track(LibImportEvent(str(n.package_name), str(n.version))) 29 | for n in import_nodes 30 | if n.package_name != "lineapy" 31 | ] 32 | return 33 | 34 | 35 | def side_effect_to_str(reference: object): 36 | if isinstance(reference, ExternalState): 37 | return reference.external_state 38 | return "" 39 | -------------------------------------------------------------------------------- /lineapy/graph_reader/types.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | 4 | @dataclass 5 | class InputVariable: 6 | """ 7 | Class to generate code related input variable and it's default value 8 | 9 | Attributes 10 | ---------- 11 | variable_name: 12 | variable name 13 | value: 14 | variable value 15 | value_type: 16 | variable object type 17 | default_args: 18 | assignment of variable to a default value 19 | ex: ``a = 1`` 20 | parser_body: 21 | code block that uses python parser library to get a input variable from CLI. 22 | ex: ``parser.add_argument('--a', default=1, type=int)`` 23 | parser_args: 24 | code block that unpacks input variable from args. 25 | ex: ``a = args.a`` 26 | 27 | """ 28 | 29 | def __init__(self, variable_name, value, value_type) -> None: 30 | self.variable_name = variable_name 31 | self.value = value 32 | self.value_type = value_type.__name__ 33 | self.default_args = f"{self.variable_name} = {repr(self.value)}" 34 | self.parser_body = f"parser.add_argument('--{self.variable_name}', type={self.value_type}, default={repr(self.value)})" 35 | self.parser_args = f"{self.variable_name} = args.{self.variable_name}" 36 | -------------------------------------------------------------------------------- /tests/integration/slices/sklearn_compose_plot_feature_union.py: -------------------------------------------------------------------------------- 1 | # This is the manual slice of: 2 | # grid_search 3 | # from file: 4 | # sources/scikit-learn/examples/compose/plot_feature_union.py 5 | 6 | # To verify that linea produces the same slice, run: 7 | # pytest -m integration --runxfail -vv 'tests/integration/test_slice.py::test_slice[sklearn_compose_plot_feature_union]' 8 | 9 | from sklearn.datasets import load_iris 10 | from sklearn.decomposition import PCA 11 | from sklearn.feature_selection import SelectKBest 12 | from sklearn.model_selection import GridSearchCV 13 | from sklearn.pipeline import FeatureUnion, Pipeline 14 | from sklearn.svm import SVC 15 | 16 | iris = load_iris() 17 | X, y = iris.data, iris.target 18 | pca = PCA(n_components=2) 19 | selection = SelectKBest(k=1) 20 | combined_features = FeatureUnion([("pca", pca), ("univ_select", selection)]) 21 | X_features = combined_features.fit(X, y).transform(X) 22 | svm = SVC(kernel="linear") 23 | pipeline = Pipeline([("features", combined_features), ("svm", svm)]) 24 | param_grid = dict( 25 | features__pca__n_components=[1, 2, 3], 26 | features__univ_select__k=[1, 2], 27 | svm__C=[0.1, 1, 10], 28 | ) 29 | grid_search = GridSearchCV(pipeline, param_grid=param_grid, verbose=10) 30 | grid_search.fit(X, y) 31 | linea_artifact_value = grid_search 32 | -------------------------------------------------------------------------------- /lineapy/exceptions/create_frame.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copied from https://naleraphael.github.io/blog/posts/devlog_create_a_builtin_frame_object/ 3 | """ 4 | import ctypes 5 | from types import CodeType, FrameType 6 | 7 | P_SIZE = ctypes.sizeof(ctypes.c_void_p) 8 | IS_X64 = P_SIZE == 8 9 | 10 | P_MEM_TYPE = ctypes.POINTER(ctypes.c_ulong if IS_X64 else ctypes.c_uint) 11 | 12 | ctypes.pythonapi.PyFrame_New.argtypes = ( 13 | P_MEM_TYPE, # PyThreadState *tstate 14 | P_MEM_TYPE, # PyCodeObject *code 15 | ctypes.py_object, # PyObject *globals 16 | ctypes.py_object, # PyObject *locals 17 | ) 18 | ctypes.pythonapi.PyFrame_New.restype = ctypes.py_object # PyFrameObject* 19 | 20 | ctypes.pythonapi.PyThreadState_Get.argtypes = () 21 | ctypes.pythonapi.PyThreadState_Get.restype = P_MEM_TYPE 22 | 23 | 24 | def create_frame(code: CodeType) -> FrameType: 25 | """ 26 | Creates a new frame object from a code object. 27 | """ 28 | 29 | return ctypes.pythonapi.PyFrame_New( 30 | ctypes.pythonapi.PyThreadState_Get(), # thread state 31 | ctypes.cast(id(code), P_MEM_TYPE), # a code object 32 | # Make sure not to set __file__ in the globals, 33 | # or else ipython will look at it and change the file name 34 | {}, # a dict of globals 35 | {}, # a dict of locals 36 | ) 37 | -------------------------------------------------------------------------------- /lineapy/annotations/internal/operator.annotations.yaml: -------------------------------------------------------------------------------- 1 | - module: operator 2 | annotations: 3 | - criteria: # setitem(dict, key, value) 4 | function_name: setitem 5 | side_effects: 6 | - mutated_value: 7 | positional_argument_index: 0 8 | - views: 9 | - positional_argument_index: 2 10 | - positional_argument_index: 0 11 | - criteria: # getitem(dict, key) 12 | function_name: getitem 13 | side_effects: 14 | - views: 15 | - positional_argument_index: 0 16 | - result: RESULT 17 | - criteria: # delitem(dict, key) 18 | function_name: delitem 19 | side_effects: 20 | - mutated_value: 21 | positional_argument_index: 0 22 | - criteria: # inplace ops 23 | function_names: 24 | - iadd 25 | - iand 26 | - iconcat 27 | - ifloordiv 28 | - ilshift 29 | - imod 30 | - imul 31 | - imatmul 32 | - ior 33 | - ipow 34 | - irshift 35 | - isub 36 | - itruediv 37 | - ixor 38 | side_effects: 39 | - mutated_value: 40 | positional_argument_index: 0 41 | - views: 42 | - positional_argument_index: 0 43 | - result: RESULT 44 | -------------------------------------------------------------------------------- /tests/unit/plugins/test_utils.py: -------------------------------------------------------------------------------- 1 | from lineapy.plugins import utils 2 | 3 | 4 | def test_slugify() -> None: 5 | """ 6 | Taken from https://github.com/django/blob/master/tests/utils_tests/test_text.py 7 | """ 8 | items = ( 9 | # given - expected - Unicode? 10 | ("Hello, World!", "hello_world", False), 11 | ("spam & eggs", "spam_eggs", False), 12 | (" multiple---dash and space ", "multiple_dash_and_space", False), 13 | ("\t whitespace-in-value \n", "whitespace_in_value", False), 14 | ("underscore_in-value", "underscore_in_value", False), 15 | ("__strip__underscore-value___", "strip__underscore_value", False), 16 | ("--strip-dash-value---", "strip_dash_value", False), 17 | ("__strip-mixed-value---", "strip_mixed_value", False), 18 | ("_ -strip-mixed-value _-", "strip_mixed_value", False), 19 | ("spam & ıçüş", "spam_ıçüş", True), 20 | ("spam & ıçüş", "spam_cus", False), 21 | ("foo ıç bar", "foo_ıç_bar", True), 22 | (" foo ıç bar", "foo_ıç_bar", True), 23 | ("你好", "你好", True), 24 | ("İstanbul", "istanbul", True), 25 | ("var-name-is-_private", "var_name_is__private", False), 26 | ) 27 | for value, output, is_unicode in items: 28 | assert utils.slugify(value, allow_unicode=is_unicode) == output 29 | -------------------------------------------------------------------------------- /tests/end_to_end/test_blackbox_tracing.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | LINEA_CODE = """import lineapy 4 | """ 5 | 6 | CODE = """import matplotlib.pyplot as plt 7 | import numpy as np 8 | 9 | size = 256, 16 10 | dpi = 72.0 11 | figsize = size[0] / float(dpi), size[1] / float(dpi) 12 | fig = plt.figure(figsize=figsize, dpi=dpi) 13 | plt.axes([0, 0, 1, 1], frameon=False) 14 | 15 | dash_styles = ["miter", "bevel", "round"] 16 | 17 | for i in range(3): 18 | plt.plot( 19 | i * 4 + np.arange(3), 20 | [0, 1, 0], 21 | color="blue", 22 | dashes=[12, 5], 23 | linewidth=8, 24 | dash_joinstyle=dash_styles[i], 25 | ) 26 | 27 | plt.xlim(0, 12), plt.ylim(-1, 2) 28 | plt.xticks([]), plt.yticks([]) 29 | plt.savefig("output/dash_joinstyle.png", dpi=dpi) 30 | 31 | """ 32 | 33 | ARTIFACT_CODE = """ 34 | artifact = lineapy.save(lineapy.file_system, "test_mplt") 35 | """ 36 | 37 | 38 | @pytest.mark.xfail(reason="libraries used inside a blackbox are not captured") 39 | def test_mplt_inside_blackbox_does_not_fail(execute): 40 | # simply a test to check if the code runs without exceptions. 41 | # Later on this will be edited to ensure that the slice is accurate. 42 | res = execute(LINEA_CODE + CODE + ARTIFACT_CODE, snapshot=False) 43 | assert res.values["artifact"].get_code() == CODE 44 | # assert res.values["fig"] is not None 45 | -------------------------------------------------------------------------------- /lineapy/plugins/pipeline_writer_factory.py: -------------------------------------------------------------------------------- 1 | from lineapy.data.types import PipelineType 2 | from lineapy.plugins.airflow_pipeline_writer import AirflowPipelineWriter 3 | from lineapy.plugins.argo_pipeline_writer import ARGOPipelineWriter 4 | from lineapy.plugins.base_pipeline_writer import BasePipelineWriter 5 | from lineapy.plugins.dvc_pipeline_writer import DVCPipelineWriter 6 | from lineapy.plugins.kubeflow_pipeline_writer import KubeflowPipelineWriter 7 | from lineapy.plugins.ray_pipeline_writer import RayPipelineWriter 8 | 9 | 10 | class PipelineWriterFactory: 11 | @classmethod 12 | def get( 13 | cls, 14 | pipeline_type: PipelineType = PipelineType.SCRIPT, 15 | *args, 16 | **kwargs, 17 | ): 18 | if pipeline_type == PipelineType.AIRFLOW: 19 | return AirflowPipelineWriter(*args, **kwargs) 20 | elif pipeline_type == PipelineType.DVC: 21 | return DVCPipelineWriter(*args, **kwargs) 22 | elif pipeline_type == PipelineType.ARGO: 23 | return ARGOPipelineWriter(*args, **kwargs) 24 | elif pipeline_type == PipelineType.KUBEFLOW: 25 | return KubeflowPipelineWriter(*args, **kwargs) 26 | elif pipeline_type == PipelineType.RAY: 27 | return RayPipelineWriter(*args, **kwargs) 28 | else: 29 | return BasePipelineWriter(*args, **kwargs) 30 | -------------------------------------------------------------------------------- /tests/end_to_end/test_list_slice.py: -------------------------------------------------------------------------------- 1 | def test_empty_slice(execute): 2 | res = execute("x = [1, 2, 3][:]", snapshot=False) 3 | assert res.values["x"] == [1, 2, 3] 4 | 5 | 6 | def test_slice_with_step(execute): 7 | res = execute("x = [1, 2, 3][::2]", snapshot=False) 8 | assert res.values["x"] == [1, 3] 9 | 10 | 11 | def test_slice_with_step_and_start(execute): 12 | res = execute("x = [1, 2, 3][0::2]", snapshot=False) 13 | assert res.values["x"] == [1, 3] 14 | 15 | 16 | def test_slice_with_step_and_stop(execute): 17 | res = execute("x = [1, 2, 3][:2:2]", snapshot=False) 18 | assert res.values["x"] == [1] 19 | 20 | 21 | def test_slice_with_step_and_start_and_stop(execute): 22 | res = execute("x = [1, 2, 3][1:2:2]", snapshot=False) 23 | assert res.values["x"] == [2] 24 | 25 | 26 | def test_slice_with_start(execute): 27 | res = execute("x = [1, 2, 3][1:]", snapshot=False) 28 | assert res.values["x"] == [2, 3] 29 | 30 | 31 | def test_subscript(execute): 32 | SUBSCRIPT = """ 33 | ls = [1,2,3,4] 34 | ls[0] = 1 35 | a = 4 36 | ls[1] = a 37 | ls[2:3] = [30] 38 | ls[3:a] = [40] 39 | """ 40 | res = execute(SUBSCRIPT, snapshot=False) 41 | assert len(res.values["ls"]) == 4 42 | assert res.values["ls"][0] == 1 43 | assert res.values["ls"][1] == 4 44 | assert res.values["ls"][2] == 30 45 | assert res.values["ls"][3] == 40 46 | -------------------------------------------------------------------------------- /tests/end_to_end/test_set.py: -------------------------------------------------------------------------------- 1 | from lineapy.utils.utils import prettify 2 | 3 | 4 | def test_set_init(execute): 5 | code = """x={1,1,2} 6 | """ 7 | res = execute(code, artifacts=["x"]) 8 | assert res.slice("x") == prettify(code) 9 | assert res.values["x"] == {1, 2} 10 | 11 | 12 | def test_set_add_mutates(execute): 13 | code = """x = set() 14 | x.add(10) 15 | """ 16 | res = execute(code, artifacts=["x"]) 17 | assert res.slice("x") == prettify(code) 18 | 19 | 20 | def test_set_getitem_view(execute): 21 | code = """y = set() 22 | x = [y] 23 | y.add(10) 24 | """ 25 | res = execute(code, artifacts=["x"]) 26 | assert res.slice("x") == prettify(code) 27 | 28 | 29 | def test_set_add_mutates_inner(execute): 30 | code = """x = set() 31 | y = [x] 32 | x.add(10) 33 | y[0].add(11) 34 | """ 35 | res = execute(code, artifacts=["x", "y"]) 36 | assert res.slice("x") == prettify(code) 37 | assert res.slice("y") == prettify(code) 38 | 39 | 40 | def test_update_set_mutates(execute): 41 | code = """x = set() 42 | x.update({1,1,2}) 43 | """ 44 | res = execute(code, artifacts=["x"]) 45 | assert res.slice("x") == prettify(code) 46 | 47 | 48 | def test_set_clear_mutates(execute): 49 | code = """x = set() 50 | x.add(10) 51 | x.clear() 52 | """ 53 | res = execute(code, artifacts=["x"]) 54 | assert res.slice("x") == prettify(code) 55 | -------------------------------------------------------------------------------- /lineapy/system_tracing/exec_and_record_function_calls.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from sys import gettrace, settrace 3 | from types import CodeType 4 | from typing import Dict 5 | 6 | from lineapy.system_tracing._trace_func import TraceFunc 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | 11 | def exec_and_record_function_calls( 12 | code: CodeType, globals_: Dict[str, object] 13 | ) -> TraceFunc: 14 | """ 15 | Execute the code while recording all the function calls which originate from the code object. 16 | 17 | While recording function calls, we use sys.settrace() with LineaPy's tracer to extract relevant 18 | information during the runtime of the user's code's function, and reset the tracer after the user 19 | function has completed execution to prevent unnecessary logging. 20 | However, to ensure LineaPy works correctly while debugging using VSCode, we first capture any 21 | existing tracers using sys.gettrace(), perform our analysis using the LineaPy tracer, and reset 22 | the existing tracer using sys.settrace() 23 | """ 24 | logger.debug("Executing code") 25 | original_trace = gettrace() 26 | trace_func = TraceFunc(code) 27 | try: 28 | settrace(trace_func) 29 | exec(code, globals_) 30 | # Always stop tracing even if exception raised 31 | finally: 32 | settrace(original_trace) 33 | return trace_func 34 | -------------------------------------------------------------------------------- /tests/unit/graph_reader/test_artifact_get_code.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import MagicMock 2 | 3 | import pytest 4 | 5 | from lineapy.api.api_utils import de_lineate_code 6 | 7 | FAKE_PATH = "/tmp/path/to/value/file/xey" 8 | 9 | 10 | @pytest.mark.parametrize( 11 | "code, expected", 12 | [ 13 | pytest.param("", "", id="blank"), 14 | pytest.param("x = 1", "x = 1", id="nolinea"), 15 | pytest.param( 16 | """import lineapy 17 | lineapy.save(x,"xey")""", 18 | f"""import pickle 19 | 20 | pickle.dump(x,open("{FAKE_PATH}","wb"))""", 21 | id="lineapy_save", 22 | ), 23 | pytest.param( 24 | "x = lineapy.get('x').get_value()", 25 | f"""import pickle 26 | x = pickle.load(open("{FAKE_PATH}","rb"))""", 27 | id="lineapy_get", 28 | ), 29 | pytest.param( 30 | """import lineapy 31 | x = lineapy.get('x').get_value() 32 | y = lineapy.get('y')""", 33 | f"""import pickle 34 | import lineapy 35 | x = pickle.load(open("{FAKE_PATH}","rb")) 36 | y = lineapy.get('y')""", 37 | id="lineapy_get_partial_replace", 38 | ), 39 | ], 40 | ) 41 | def test__de_linealize_code(code, expected): 42 | db = MagicMock() 43 | db.get_node_value_path = MagicMock(return_value=FAKE_PATH) # type: ignore 44 | delineazed = de_lineate_code(code, db) 45 | assert delineazed == expected 46 | -------------------------------------------------------------------------------- /tests/end_to_end/__snapshots__/test_lambda/test_lambda_with_primitives.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | from pathlib import * 3 | from lineapy.data.types import * 4 | from lineapy.utils.utils import get_new_id 5 | 6 | source_1 = SourceCode( 7 | code="""a = 10 8 | b = lambda x: x + 10 9 | c = b(a) 10 | """, 11 | location=PosixPath("[source file path]"), 12 | ) 13 | call_2 = CallNode( 14 | source_location=SourceLocation( 15 | lineno=3, 16 | col_offset=4, 17 | end_lineno=3, 18 | end_col_offset=8, 19 | source_code=source_1.id, 20 | ), 21 | function_id=CallNode( 22 | source_location=SourceLocation( 23 | lineno=2, 24 | col_offset=4, 25 | end_lineno=2, 26 | end_col_offset=20, 27 | source_code=source_1.id, 28 | ), 29 | function_id=LookupNode( 30 | name="l_exec_expr", 31 | ).id, 32 | positional_args=[ 33 | LiteralNode( 34 | value="lambda x: x + 10", 35 | ).id 36 | ], 37 | ).id, 38 | positional_args=[ 39 | LiteralNode( 40 | source_location=SourceLocation( 41 | lineno=1, 42 | col_offset=4, 43 | end_lineno=1, 44 | end_col_offset=6, 45 | source_code=source_1.id, 46 | ), 47 | value=10, 48 | ).id 49 | ], 50 | ) 51 | -------------------------------------------------------------------------------- /tests/unit/transformer/test_source_giver.py: -------------------------------------------------------------------------------- 1 | # type: ignore 2 | import ast 3 | import sys 4 | 5 | import pytest 6 | 7 | from lineapy.transformer.source_giver import SourceGiver 8 | 9 | 10 | @pytest.mark.parametrize( 11 | "code,lineno", 12 | [ 13 | ( 14 | """a = 10 15 | b = lambda x: x + 10 16 | c = b(a) 17 | """, 18 | 1, 19 | ), 20 | ("""a = 10;b=10""", 1), 21 | ], 22 | ids=["multiline", "singleline"], 23 | ) 24 | def test_source_giver_adds_end_lineno(code, lineno): 25 | if sys.version_info >= (3, 8): 26 | pytest.skip("SourceGiver not invoked for Python 3.8+") 27 | import asttokens 28 | 29 | tree = ast.parse(code) 30 | # ensure that the end_lineno is not available and fetching it raises exceptions 31 | with pytest.raises(AttributeError): 32 | print(tree.body[0].end_lineno) 33 | 34 | # now we invoke the SourceGiver and add end_linenos in 2 steps - first we run the tree thr asttokens 35 | asttokens.ASTTokens(code, parse=False, tree=tree) 36 | # double check that the line numbers cooked up by asttokens are correct 37 | assert tree.body[0].last_token.end[0] == lineno 38 | 39 | # and in step 2, run the tree thr SourceGiver and copy the asttokens's token values 40 | # so that the tree looks like 3.8+ tree with all the end_linenos etc 41 | SourceGiver().transform(tree) 42 | assert tree.body[0].end_lineno == lineno 43 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.9" 2 | services: 3 | lineapy: 4 | image: ${IMAGE_NAME} 5 | build: . 6 | environment: 7 | - LINEAPY_DATABASE_URL=${LINEAPY_DATABASE_URL} 8 | volumes: 9 | - ./lineapy:/usr/src/base/lineapy 10 | - ./tests:/usr/src/base/tests 11 | - ./pyproject.toml:/usr/src/base/pyproject.toml 12 | - ./pytest.ini:/usr/src/base/pytest.ini 13 | - ./docs:/usr/src/base/docs 14 | networks: 15 | - lineapy 16 | 17 | postgres: 18 | image: postgres:latest 19 | environment: 20 | - POSTGRES_USER=postgres 21 | - POSTGRES_PASSWORD=${POSTGRES_PASSWORD} 22 | - POSTGRES_DB=postgres 23 | ports: 24 | - "5432:5432" 25 | networks: 26 | - lineapy 27 | 28 | # wait_for_deps2: 29 | # image: willwill/wait-for-it 30 | # command: [ postgres:5432 ] 31 | # networks: 32 | # - lineapy 33 | 34 | wait_for_deps: 35 | image: dadarek/wait-for-dependencies 36 | command: postgres:5432 37 | networks: 38 | - lineapy 39 | 40 | lineapy-airflow: 41 | image: ${IMAGE_NAME_AIRFLOW} 42 | build: 43 | args: 44 | IMAGE_NAME: ${IMAGE_NAME} 45 | context: . 46 | dockerfile: Dockerfile-airflow 47 | depends_on: 48 | - lineapy 49 | ports: 50 | - 8080:8080 51 | command: airflow standalone 52 | networks: 53 | - lineapy 54 | 55 | networks: 56 | lineapy: 57 | driver: bridge 58 | external: true 59 | -------------------------------------------------------------------------------- /docs/mkdocs/concepts/artifact-store.md: -------------------------------------------------------------------------------- 1 | # Artifact Store 2 | 3 | LineaPy saves artifacts in the artifact store, which is a centralized repository for artifacts and 4 | their metadata (e.g., creation time, version). Under the hood, it is a collection of two data structures: 5 | 6 | - Serialized artifact values (i.e., pickle files) 7 | - Database that stores artifact metadata (e.g., timestamp, version, code, pointer to the serialized value) 8 | 9 | Encapsulating both value and code, as well as other metadata such as creation time and version, 10 | LineaPy's artifact store provides a more unified and streamlined experience to save, manage, and reuse 11 | works from different people over time. Contrast this with a typical setup where the team stores their 12 | outputs in one place (e.g., a key-value store) and the code in another (e.g., GitHub repo) — we can 13 | imagine how difficult it would be to maintain correlations between the two. LineaPy simplifies lineage tracking 14 | by storing all correlations in one framework: artifact store. 15 | 16 | LineaPy's artifact store is globally accessible, which means the user can view, load, and build on artifacts across 17 | different development sessions and even different projects. This unified global storage is designed to accelerate the overall 18 | development process, which is iterative in nature. Moreover, it can facilitate collaboration between different teams 19 | as it provides a single source of truth for all prior relevant work. 20 | -------------------------------------------------------------------------------- /tests/end_to_end/test_stack_trace.py: -------------------------------------------------------------------------------- 1 | """ 2 | Verifies we are rewriting the stack trace properly. 3 | """ 4 | 5 | import traceback 6 | from typing import cast 7 | 8 | import pytest 9 | 10 | from lineapy.exceptions.user_exception import UserException 11 | 12 | 13 | def test_call_exception(execute): 14 | code = """def divide_me(a): 15 | return a/0 16 | x = divide_me(1) 17 | """ 18 | with pytest.raises(UserException) as e: 19 | execute(code) 20 | 21 | # Test that the first line of the inner exception is the line in the source 22 | # file for this call node 23 | inner_exception = cast(Exception, e.value.__cause__) 24 | assert ( 25 | traceback.extract_tb(inner_exception.__traceback__)[0].line 26 | == "x = divide_me(1)" 27 | ) 28 | 29 | 30 | def test_syntax_error(execute): 31 | code = """a = 10 32 | a+++ 33 | """ 34 | with pytest.raises(UserException) as e: 35 | execute(code) 36 | # Verify that the first line is the proper line from the file 37 | inner_exception = cast(SyntaxError, e.value.__cause__) 38 | assert inner_exception.text == "a+++\n" 39 | 40 | 41 | def test_name_error(execute): 42 | code = """a = 10 43 | x 44 | """ 45 | with pytest.raises(UserException) as e: 46 | execute(code) 47 | # Verify that the first line is the proper line from the file 48 | inner_exception = cast(NameError, e.value.__cause__) 49 | assert inner_exception.args == ("name 'x' is not defined",) 50 | -------------------------------------------------------------------------------- /tests/integration/__snapshots__/test_slice/test_slice[pytorch_intro_torchscript].py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class MyDecisionGate(torch.nn.Module): 5 | def forward(self, x): 6 | if x.sum() > 0: 7 | return x 8 | else: 9 | return -x 10 | 11 | 12 | class MyCell(torch.nn.Module): 13 | def __init__(self, dg): 14 | super(MyCell, self).__init__() 15 | self.dg = dg 16 | self.linear = torch.nn.Linear(4, 4) 17 | 18 | def forward(self, x, h): 19 | new_h = torch.tanh(self.dg(self.linear(x)) + h) 20 | return new_h, new_h 21 | 22 | 23 | scripted_gate = torch.jit.script(MyDecisionGate()) 24 | x, h = torch.rand(3, 4), torch.rand(3, 4) 25 | 26 | 27 | class MyRNNLoop(torch.nn.Module): 28 | def __init__(self): 29 | super(MyRNNLoop, self).__init__() 30 | self.cell = torch.jit.trace(MyCell(scripted_gate), (x, h)) 31 | 32 | def forward(self, xs): 33 | h, y = torch.zeros(3, 4), torch.zeros(3, 4) 34 | for i in range(xs.size(0)): 35 | y, h = self.cell(xs[i], h) 36 | return y, h 37 | 38 | 39 | class WrapRNN(torch.nn.Module): 40 | def __init__(self): 41 | super(WrapRNN, self).__init__() 42 | self.loop = torch.jit.script(MyRNNLoop()) 43 | 44 | def forward(self, xs): 45 | y, h = self.loop(xs) 46 | return torch.relu(y) 47 | 48 | 49 | traced = torch.jit.trace(WrapRNN(), torch.rand(10, 3, 4)) 50 | traced.save("wrapped_rnn.pt") 51 | -------------------------------------------------------------------------------- /tests/integration/slices/pytorch_vision_tensor_transform.py: -------------------------------------------------------------------------------- 1 | # This is the manual slice of: 2 | # lineapy.file_system 3 | # from file: 4 | # sources/pytorch-vision/gallery/plot_scripted_tensor_transforms.py 5 | 6 | # To verify that linea produces the same slice, run: 7 | # pytest -m integration --runxfail -vv 'tests/integration/test_slice.py::test_slice[pytorch_vision_tensor_transform]' 8 | 9 | import torch 10 | import torchvision.transforms as T 11 | 12 | torch.manual_seed(1) 13 | import torch.nn as nn 14 | 15 | device = "cuda" if torch.cuda.is_available() else "cpu" 16 | from torchvision.models import resnet18 17 | 18 | 19 | class Predictor(nn.Module): 20 | def __init__(self): 21 | super().__init__() 22 | self.resnet18 = resnet18(pretrained=True, progress=False).eval() 23 | self.transforms = nn.Sequential( 24 | T.Resize([256]), 25 | T.CenterCrop(224), 26 | T.ConvertImageDtype(torch.float), 27 | T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), 28 | ) 29 | 30 | def forward(self, x: torch.Tensor) -> torch.Tensor: 31 | with torch.no_grad(): 32 | x = self.transforms(x) 33 | y_pred = self.resnet18(x) 34 | return y_pred.argmax(dim=1) 35 | 36 | 37 | predictor = Predictor().to(device) 38 | scripted_predictor = torch.jit.script(predictor).to(device) 39 | import tempfile 40 | 41 | with tempfile.NamedTemporaryFile() as f: 42 | scripted_predictor.save(f.name) 43 | -------------------------------------------------------------------------------- /lineapy/transformer/py37_transformer.py: -------------------------------------------------------------------------------- 1 | import ast 2 | 3 | from lineapy.transformer.base_transformer import BaseTransformer 4 | from lineapy.utils.deprecation_utils import Constant 5 | 6 | 7 | class Py37Transformer(BaseTransformer): 8 | def _convert_to_constant(self, value, node) -> Constant: 9 | if not hasattr( 10 | node, "end_lineno" 11 | ): # somehow didnt go through our sourcegiver 12 | return Constant( 13 | value=value, lineno=node.lineno, col_offset=node.col_offset 14 | ) 15 | else: 16 | return Constant( 17 | value=value, 18 | lineno=node.lineno, 19 | end_lineno=node.end_lineno, # type: ignore 20 | col_offset=node.col_offset, 21 | end_col_offset=node.end_col_offset, # type: ignore 22 | ) 23 | 24 | def visit_Ellipsis(self, node: ast.Ellipsis) -> Constant: 25 | return self._convert_to_constant(..., node) 26 | 27 | def visit_Str(self, node: ast.Str) -> Constant: 28 | return self._convert_to_constant(node.s, node) 29 | 30 | def visit_Num(self, node: ast.Num) -> Constant: 31 | return self._convert_to_constant(node.n, node) 32 | 33 | def visit_NameConstant(self, node: ast.NameConstant) -> Constant: 34 | return self._convert_to_constant(node.value, node) 35 | 36 | def visit_Bytes(self, node: ast.Bytes) -> Constant: 37 | return self._convert_to_constant(node.s, node) 38 | -------------------------------------------------------------------------------- /tests/end_to_end/__snapshots__/test_assign_destruc/test_variable_alias_nested.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | from pathlib import * 3 | from lineapy.data.types import * 4 | from lineapy.utils.utils import get_new_id 5 | 6 | source_1 = SourceCode( 7 | code="""a = 0 8 | b = a 9 | c = b""", 10 | location=PosixPath("[source file path]"), 11 | ) 12 | call_2 = CallNode( 13 | source_location=SourceLocation( 14 | lineno=3, 15 | col_offset=0, 16 | end_lineno=3, 17 | end_col_offset=5, 18 | source_code=source_1.id, 19 | ), 20 | function_id=LookupNode( 21 | name="l_alias", 22 | ).id, 23 | positional_args=[ 24 | CallNode( 25 | source_location=SourceLocation( 26 | lineno=2, 27 | col_offset=0, 28 | end_lineno=2, 29 | end_col_offset=5, 30 | source_code=source_1.id, 31 | ), 32 | function_id=LookupNode( 33 | name="l_alias", 34 | ).id, 35 | positional_args=[ 36 | LiteralNode( 37 | source_location=SourceLocation( 38 | lineno=1, 39 | col_offset=4, 40 | end_lineno=1, 41 | end_col_offset=5, 42 | source_code=source_1.id, 43 | ), 44 | value=0, 45 | ).id 46 | ], 47 | ).id 48 | ], 49 | ) 50 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.black] 2 | line-length = 79 3 | extend-exclude = '(__snapshots__|integration/slices|integration/sources|integration/envs|outputs|_alembic|unit/plugins/expected|examples|env)' 4 | 5 | [tool.isort] 6 | profile = "black" 7 | skip_gitignore = true 8 | skip = ["__snapshots__"] 9 | skip_glob=["tests/integration/slices/*", "tests/integration/sources/*", "tests/integration/envs/*", "tests/outputs/*", "lineapy/_alembic/*", "env/*"] 10 | line_length = 79 11 | 12 | [tool.coverage.run] 13 | # Trace which side of branches were taken 14 | # https://coverage.readthedocs.io/en/latest/branch.html#branch 15 | branch = true 16 | # Ignore coverage on app, since we are letting it rot 17 | omit = ["lineapy/app/*"] 18 | relative_files = true 19 | 20 | [tool.mypy] 21 | 22 | exclude = '(/__snapshots__/|sliced_housing_dag*.py|tutorials/.*|integration/slices/.*|integration/sources/.*|integration/envs/.*|/outputs/|/build/|/_alembic/|env/)$' 23 | 24 | # https://docs.sqlalchemy.org/en/14/orm/extensions/mypy.html 25 | # https://pydantic-docs.helpmanual.io/mypy_plugin/#enabling-the-plugin 26 | plugins = ["sqlalchemy.ext.mypy.plugin", "pydantic.mypy"] 27 | 28 | 29 | # Enable function body type checking, even if function types are not annotated 30 | check_untyped_defs = true 31 | 32 | # Dont check libraries without stubs 33 | ignore_missing_imports = true 34 | 35 | # Do not raise errors on ignores which we don't need - hassle when supporting multiple python versions 36 | warn_unused_ignores = false 37 | 38 | warn_unreachable = true 39 | -------------------------------------------------------------------------------- /tests/notebook/test_is_executing.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "92e3dbb4", 6 | "metadata": {}, 7 | "source": [ 8 | "Test that linea tracing is active" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "id": "06ac3074-0510-43fe-86d2-84101db99956", 15 | "metadata": { 16 | "execution": { 17 | "iopub.execute_input": "2022-01-13T19:00:04.651272Z", 18 | "iopub.status.busy": "2022-01-13T19:00:04.650632Z", 19 | "iopub.status.idle": "2022-01-13T19:00:04.815525Z", 20 | "shell.execute_reply": "2022-01-13T19:00:04.814859Z" 21 | }, 22 | "tags": [] 23 | }, 24 | "outputs": [], 25 | "source": [ 26 | "import lineapy\n", 27 | "assert lineapy._is_executing()" 28 | ] 29 | } 30 | ], 31 | "metadata": { 32 | "kernelspec": { 33 | "display_name": "Python 3 (ipykernel)", 34 | "language": "python", 35 | "name": "python3" 36 | }, 37 | "language_info": { 38 | "codemirror_mode": { 39 | "name": "ipython", 40 | "version": 3 41 | }, 42 | "file_extension": ".py", 43 | "mimetype": "text/x-python", 44 | "name": "python", 45 | "nbconvert_exporter": "python", 46 | "pygments_lexer": "ipython3", 47 | "version": "3.9.6" 48 | }, 49 | "widgets": { 50 | "application/vnd.jupyter.widget-state+json": { 51 | "state": {}, 52 | "version_major": 2, 53 | "version_minor": 0 54 | } 55 | } 56 | }, 57 | "nbformat": 4, 58 | "nbformat_minor": 5 59 | } 60 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # See https://pre-commit.com for more information 2 | # See https://pre-commit.com/hooks.html for more hooks 3 | # We use local hooks so that we don't pass the changed filenames in as args, 4 | # so that the ignores we defined in the configs work 5 | repos: 6 | - repo: local 7 | hooks: 8 | - id: flake8 9 | name: flake8 10 | entry: flake8 . 11 | language: python 12 | additional_dependencies: ["flake8==4.0.1"] 13 | types: [python] 14 | pass_filenames: false 15 | - repo: local 16 | hooks: 17 | - id: isort 18 | name: isort 19 | entry: isort . 20 | language: python 21 | additional_dependencies: ["isort==5.10.1"] 22 | types: [python] 23 | pass_filenames: false 24 | - repo: local 25 | hooks: 26 | - id: black 27 | name: black 28 | entry: black . 29 | language: python 30 | # https://github.com/psf/black/issues/2964 31 | additional_dependencies: ["black==22.3.0"] 32 | types: [python] 33 | pass_filenames: false 34 | - repo: local 35 | hooks: 36 | - id: mypy 37 | name: mypy 38 | entry: mypy . 39 | language: python 40 | additional_dependencies: ["mypy==0.931", "SQLAlchemy==1.4.29", "sqlalchemy[mypy]", 41 | "mypy-extensions==0.4.3", "pydantic==1.9.0", "types-PyYAML", "types-requests", "types-mock"] 42 | types: [python] 43 | pass_filenames: false 44 | -------------------------------------------------------------------------------- /lineapy/plugins/jinja_templates/airflow/airflow_dag_PythonOperator.jinja: -------------------------------------------------------------------------------- 1 | import {{ MODULE_NAME }} 2 | import pickle 3 | import pathlib 4 | from airflow import DAG 5 | from airflow.utils.dates import days_ago 6 | from airflow.operators.python_operator import PythonOperator 7 | 8 | {% for task_def in task_definitions %} 9 | {{ task_def }} 10 | {% endfor %} 11 | 12 | default_dag_args = { 13 | "owner": "{{ OWNER }}", 14 | "retries": {{ RETRIES }}, 15 | "start_date": {{ START_DATE }}, 16 | {%- if (dag_params|length > 0) %} 17 | "params": {{ dag_params }}, 18 | {%- endif %} 19 | } 20 | 21 | with DAG( 22 | dag_id="{{ DAG_NAME }}_dag", 23 | schedule_interval="{{ SCHEDULE_INTERVAL }}", 24 | max_active_runs={{ MAX_ACTIVE_RUNS }}, 25 | catchup={{ CATCHUP }}, 26 | default_args=default_dag_args, 27 | ) as dag: 28 | 29 | {% for task_name, task_def in tasks.items() %} 30 | {{ task_name }} = PythonOperator( 31 | task_id="{{ task_name }}_task", 32 | python_callable=task_{{ task_name }}, 33 | {%- if task_def.user_input_variables|length > 0 %} 34 | op_kwargs={ 35 | {%- for var in task_def.user_input_variables %} 36 | "{{ var }}": "{{ '{{' }} params.{{ var }} {{ '}}' }}"{{ ',' if not loop.last else '' }} 37 | {%- endfor %} 38 | }, 39 | {%- endif %} 40 | ) 41 | {% endfor %} 42 | 43 | {% if task_dependencies is not none %} 44 | {% for TASK_DEPENDENCIES in task_dependencies %} 45 | {{TASK_DEPENDENCIES}} 46 | {% endfor %} 47 | {%endif %} 48 | -------------------------------------------------------------------------------- /tests/test_api.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | from pathlib import Path 3 | 4 | from lineapy.api.artifact_serializer import _try_write_to_pickle 5 | from lineapy.api.models.linea_artifact import LineaArtifact 6 | from lineapy.utils.config import options 7 | 8 | 9 | def test_execute_slice(execute): 10 | """ 11 | Tests that executing a slice of a graph yields the same result as executing the graph 12 | """ 13 | c = """x = [] 14 | if True: 15 | x = [] 16 | x.append(1) 17 | """ 18 | res = execute(c, artifacts=["x"], snapshot=False) 19 | artifactorm = res.db.get_artifactorm_by_name("x") 20 | full_graph_artifact = LineaArtifact( 21 | db=res.db, 22 | _execution_id=artifactorm.execution_id, 23 | _node_id=artifactorm.node_id, 24 | _session_id=artifactorm.node.session_id, 25 | _version=artifactorm.version, 26 | name=artifactorm.name, 27 | ) 28 | 29 | slice_graph_artifact_res = full_graph_artifact.execute() 30 | assert slice_graph_artifact_res == res.values["x"] 31 | assert ( 32 | res.artifacts["x"] 33 | == """if True: 34 | x = [] 35 | x.append(1) 36 | """ 37 | ) 38 | assert res.values["x"] == [1] 39 | 40 | 41 | def test_write_to_pickle(): 42 | _try_write_to_pickle(42, "test_pickle") 43 | pickle_path = ( 44 | Path(options.safe_get("artifact_storage_dir")) / "test_pickle" 45 | ) 46 | assert pickle_path.exists() 47 | 48 | with pickle_path.open("rb") as f: 49 | assert pickle.load(f) == 42 50 | --------------------------------------------------------------------------------