├── debian ├── compat ├── luigi.install ├── luigi-server.install ├── rules ├── changelog └── control ├── test ├── contrib │ ├── __init__.py │ ├── hdfs │ │ └── webhdfs_client_test.py │ ├── _webhdfs_test.py │ ├── scalding_test.py │ ├── redis_test.py │ ├── ecs_test.py │ └── cascading_test.py ├── visualiser │ └── __init__.py ├── create_packages_archive_root │ ├── package.egg-info │ │ └── top_level.txt │ ├── module.py │ └── package │ │ ├── __init__.py │ │ ├── subpackage │ │ ├── __init__.py │ │ └── submodule.py │ │ ├── submodule_without_imports.py │ │ ├── submodule.py │ │ └── submodule_with_absolute_import.py ├── gcloud-credentials.json.enc ├── testconfig │ ├── luigi.cfg │ ├── core-site.xml │ ├── log4j.properties │ └── logging.cfg ├── namespace_test_helper.py ├── other_module.py ├── runtests.py ├── most_common_test.py ├── set_task_name_test.py ├── dynamic_import_test.py ├── factorial_test.py ├── remote_scheduler_test.py ├── subtask_test.py ├── recursion_test.py ├── priority_test.py ├── namespace_test.py ├── helpers_test.py ├── task_history_test.py ├── _mysqldb_test.py ├── test_ssh.py ├── clone_test.py ├── dict_parameter_test.py ├── import_test.py ├── fib_test.py ├── task_bulk_complete_test.py ├── mock_test.py ├── instance_test.py ├── webhdfs_minicluster.py ├── test_sigpipe.py ├── lock_test.py ├── wrap_test.py ├── simulate_test.py ├── task_test.py └── instance_wrap_test.py ├── dummy_test_module ├── __init__.py └── not_imported.py ├── doc ├── .gitignore ├── api │ └── luigi.six.rst ├── history.png ├── luigi.png ├── user_recs.png ├── web_server.png ├── history_by_id.png ├── task_breakdown.png ├── aggregate_artists.png ├── dependency_graph.png ├── execution_model.png ├── history_by_name.png ├── parameters_enum.png ├── task_parameters.png ├── task_with_targets.png ├── parameters_recursion.png ├── visualiser_front_page.png ├── parameters_date_algebra.png ├── tasks_with_dependencies.png ├── tasks_input_output_requires.png ├── command_line.rst ├── index.rst └── design_and_limitations.rst ├── luigi ├── static │ └── visualiser │ │ ├── fonts │ │ ├── FontAwesome.otf │ │ ├── fontawesome-webfont.eot │ │ ├── fontawesome-webfont.ttf │ │ ├── fontawesome-webfont.woff │ │ ├── fontawesome-webfont.woff2 │ │ ├── glyphicons-halflings-regular.eot │ │ ├── glyphicons-halflings-regular.ttf │ │ └── glyphicons-halflings-regular.woff │ │ ├── lib │ │ ├── datatables │ │ │ └── images │ │ │ │ ├── favicon.ico │ │ │ │ ├── sort_asc.png │ │ │ │ ├── sort_both.png │ │ │ │ ├── sort_desc.png │ │ │ │ ├── Sorting icons.psd │ │ │ │ ├── sort_asc_disabled.png │ │ │ │ └── sort_desc_disabled.png │ │ ├── jquery-ui │ │ │ └── css │ │ │ │ └── images │ │ │ │ ├── animated-overlay.gif │ │ │ │ ├── ui-icons_222222_256x240.png │ │ │ │ ├── ui-icons_2e83ff_256x240.png │ │ │ │ ├── ui-icons_454545_256x240.png │ │ │ │ ├── ui-icons_888888_256x240.png │ │ │ │ ├── ui-icons_cd0a0a_256x240.png │ │ │ │ ├── ui-bg_flat_0_aaaaaa_40x100.png │ │ │ │ ├── ui-bg_flat_75_ffffff_40x100.png │ │ │ │ ├── ui-bg_glass_55_fbf9ee_1x400.png │ │ │ │ ├── ui-bg_glass_65_ffffff_1x400.png │ │ │ │ ├── ui-bg_glass_75_dadada_1x400.png │ │ │ │ ├── ui-bg_glass_75_e6e6e6_1x400.png │ │ │ │ ├── ui-bg_glass_95_fef1ec_1x400.png │ │ │ │ └── ui-bg_highlight-soft_75_cccccc_1x100.png │ │ └── AdminLTE │ │ │ └── css │ │ │ └── skin-green.min.css │ │ ├── mockdata │ │ ├── fetch_error │ │ ├── dep_graph │ │ └── task_list │ │ ├── test.html │ │ ├── css │ │ └── tipsy.css │ │ └── js │ │ └── test │ │ └── graph_test.js ├── templates │ ├── header.html │ ├── menu.html │ ├── recent.html │ ├── show.html │ └── layout.html ├── contrib │ ├── __init__.py │ ├── hdfs │ │ ├── error.py │ │ ├── clients.py │ │ ├── abstract_client.py │ │ └── __init__.py │ ├── pyspark_runner.py │ ├── sparkey.py │ ├── sge_runner.py │ ├── target.py │ └── redis_store.py ├── __main__.py ├── tools │ ├── __init__.py │ └── luigi_grep.py ├── scalding.py ├── webhdfs.py ├── hive.py ├── task_status.py ├── hdfs.py ├── hadoop.py ├── hadoop_jar.py ├── event.py ├── deprecate_kwarg.py ├── cmdline.py ├── __init__.py ├── task_history.py ├── mrrunner.py └── retcodes.py ├── MANIFEST.in ├── scripts └── ci │ ├── conditional_tox.sh │ └── setup_hadoop_env.sh ├── bin ├── luigi └── luigid ├── examples ├── hello_world.py ├── __init__.py ├── foo.py ├── foo_complex.py ├── wordcount_hadoop.py ├── dynamic_requirements.py ├── wordcount.py └── ssh_remote_execution.py ├── .coveragerc ├── RELEASE-PROCESS.rst ├── sitecustomize.py ├── .travis.yml ├── CONTRIBUTING.rst ├── tox.ini ├── .gitignore └── setup.py /debian/compat: -------------------------------------------------------------------------------- 1 | 8 2 | -------------------------------------------------------------------------------- /test/contrib/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /debian/luigi.install: -------------------------------------------------------------------------------- 1 | usr/lib 2 | -------------------------------------------------------------------------------- /dummy_test_module/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /debian/luigi-server.install: -------------------------------------------------------------------------------- 1 | usr/bin 2 | -------------------------------------------------------------------------------- /doc/.gitignore: -------------------------------------------------------------------------------- 1 | _static 2 | _build 3 | _templates 4 | -------------------------------------------------------------------------------- /doc/api/luigi.six.rst: -------------------------------------------------------------------------------- 1 | luigi.six module 2 | ================ 3 | -------------------------------------------------------------------------------- /test/visualiser/__init__.py: -------------------------------------------------------------------------------- 1 | # Tests for visualiser javascript. 2 | -------------------------------------------------------------------------------- /debian/rules: -------------------------------------------------------------------------------- 1 | #!/usr/bin/make -f 2 | 3 | %: 4 | dh $@ --with python2 5 | -------------------------------------------------------------------------------- /doc/history.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Todo/luigi/master/doc/history.png -------------------------------------------------------------------------------- /doc/luigi.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Todo/luigi/master/doc/luigi.png -------------------------------------------------------------------------------- /test/create_packages_archive_root/package.egg-info/top_level.txt: -------------------------------------------------------------------------------- 1 | package 2 | -------------------------------------------------------------------------------- /doc/user_recs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Todo/luigi/master/doc/user_recs.png -------------------------------------------------------------------------------- /doc/web_server.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Todo/luigi/master/doc/web_server.png -------------------------------------------------------------------------------- /doc/history_by_id.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Todo/luigi/master/doc/history_by_id.png -------------------------------------------------------------------------------- /doc/task_breakdown.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Todo/luigi/master/doc/task_breakdown.png -------------------------------------------------------------------------------- /doc/aggregate_artists.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Todo/luigi/master/doc/aggregate_artists.png -------------------------------------------------------------------------------- /doc/dependency_graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Todo/luigi/master/doc/dependency_graph.png -------------------------------------------------------------------------------- /doc/execution_model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Todo/luigi/master/doc/execution_model.png -------------------------------------------------------------------------------- /doc/history_by_name.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Todo/luigi/master/doc/history_by_name.png -------------------------------------------------------------------------------- /doc/parameters_enum.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Todo/luigi/master/doc/parameters_enum.png -------------------------------------------------------------------------------- /doc/task_parameters.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Todo/luigi/master/doc/task_parameters.png -------------------------------------------------------------------------------- /doc/task_with_targets.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Todo/luigi/master/doc/task_with_targets.png -------------------------------------------------------------------------------- /doc/parameters_recursion.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Todo/luigi/master/doc/parameters_recursion.png -------------------------------------------------------------------------------- /doc/visualiser_front_page.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Todo/luigi/master/doc/visualiser_front_page.png -------------------------------------------------------------------------------- /doc/parameters_date_algebra.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Todo/luigi/master/doc/parameters_date_algebra.png -------------------------------------------------------------------------------- /doc/tasks_with_dependencies.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Todo/luigi/master/doc/tasks_with_dependencies.png -------------------------------------------------------------------------------- /test/gcloud-credentials.json.enc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Todo/luigi/master/test/gcloud-credentials.json.enc -------------------------------------------------------------------------------- /doc/tasks_input_output_requires.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Todo/luigi/master/doc/tasks_input_output_requires.png -------------------------------------------------------------------------------- /luigi/static/visualiser/fonts/FontAwesome.otf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Todo/luigi/master/luigi/static/visualiser/fonts/FontAwesome.otf -------------------------------------------------------------------------------- /dummy_test_module/not_imported.py: -------------------------------------------------------------------------------- 1 | import luigi 2 | 3 | 4 | class UnimportedTask(luigi.Task): 5 | def complete(self): 6 | return False 7 | -------------------------------------------------------------------------------- /luigi/static/visualiser/fonts/fontawesome-webfont.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Todo/luigi/master/luigi/static/visualiser/fonts/fontawesome-webfont.eot -------------------------------------------------------------------------------- /luigi/static/visualiser/fonts/fontawesome-webfont.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Todo/luigi/master/luigi/static/visualiser/fonts/fontawesome-webfont.ttf -------------------------------------------------------------------------------- /luigi/static/visualiser/fonts/fontawesome-webfont.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Todo/luigi/master/luigi/static/visualiser/fonts/fontawesome-webfont.woff -------------------------------------------------------------------------------- /luigi/static/visualiser/fonts/fontawesome-webfont.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Todo/luigi/master/luigi/static/visualiser/fonts/fontawesome-webfont.woff2 -------------------------------------------------------------------------------- /luigi/static/visualiser/lib/datatables/images/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Todo/luigi/master/luigi/static/visualiser/lib/datatables/images/favicon.ico -------------------------------------------------------------------------------- /luigi/static/visualiser/lib/datatables/images/sort_asc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Todo/luigi/master/luigi/static/visualiser/lib/datatables/images/sort_asc.png -------------------------------------------------------------------------------- /luigi/static/visualiser/lib/datatables/images/sort_both.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Todo/luigi/master/luigi/static/visualiser/lib/datatables/images/sort_both.png -------------------------------------------------------------------------------- /luigi/static/visualiser/lib/datatables/images/sort_desc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Todo/luigi/master/luigi/static/visualiser/lib/datatables/images/sort_desc.png -------------------------------------------------------------------------------- /debian/changelog: -------------------------------------------------------------------------------- 1 | luigi (0.0) unstable; urgency=low 2 | 3 | * Initial release 4 | 5 | -- Erik Bernhardsson Thu, 2 May 2013 00:00:00 +0100 6 | -------------------------------------------------------------------------------- /luigi/static/visualiser/fonts/glyphicons-halflings-regular.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Todo/luigi/master/luigi/static/visualiser/fonts/glyphicons-halflings-regular.eot -------------------------------------------------------------------------------- /luigi/static/visualiser/fonts/glyphicons-halflings-regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Todo/luigi/master/luigi/static/visualiser/fonts/glyphicons-halflings-regular.ttf -------------------------------------------------------------------------------- /luigi/static/visualiser/fonts/glyphicons-halflings-regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Todo/luigi/master/luigi/static/visualiser/fonts/glyphicons-halflings-regular.woff -------------------------------------------------------------------------------- /luigi/static/visualiser/lib/datatables/images/Sorting icons.psd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Todo/luigi/master/luigi/static/visualiser/lib/datatables/images/Sorting icons.psd -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.rst 2 | include LICENSE 3 | include examples/*.py 4 | include test/*.py 5 | recursive-include luigi/static * 6 | include luigi/templates/*.html 7 | 8 | -------------------------------------------------------------------------------- /luigi/static/visualiser/lib/datatables/images/sort_asc_disabled.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Todo/luigi/master/luigi/static/visualiser/lib/datatables/images/sort_asc_disabled.png -------------------------------------------------------------------------------- /luigi/static/visualiser/lib/datatables/images/sort_desc_disabled.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Todo/luigi/master/luigi/static/visualiser/lib/datatables/images/sort_desc_disabled.png -------------------------------------------------------------------------------- /luigi/static/visualiser/lib/jquery-ui/css/images/animated-overlay.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Todo/luigi/master/luigi/static/visualiser/lib/jquery-ui/css/images/animated-overlay.gif -------------------------------------------------------------------------------- /luigi/static/visualiser/lib/jquery-ui/css/images/ui-icons_222222_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Todo/luigi/master/luigi/static/visualiser/lib/jquery-ui/css/images/ui-icons_222222_256x240.png -------------------------------------------------------------------------------- /luigi/static/visualiser/lib/jquery-ui/css/images/ui-icons_2e83ff_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Todo/luigi/master/luigi/static/visualiser/lib/jquery-ui/css/images/ui-icons_2e83ff_256x240.png -------------------------------------------------------------------------------- /luigi/static/visualiser/lib/jquery-ui/css/images/ui-icons_454545_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Todo/luigi/master/luigi/static/visualiser/lib/jquery-ui/css/images/ui-icons_454545_256x240.png -------------------------------------------------------------------------------- /luigi/static/visualiser/lib/jquery-ui/css/images/ui-icons_888888_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Todo/luigi/master/luigi/static/visualiser/lib/jquery-ui/css/images/ui-icons_888888_256x240.png -------------------------------------------------------------------------------- /luigi/static/visualiser/lib/jquery-ui/css/images/ui-icons_cd0a0a_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Todo/luigi/master/luigi/static/visualiser/lib/jquery-ui/css/images/ui-icons_cd0a0a_256x240.png -------------------------------------------------------------------------------- /luigi/static/visualiser/lib/jquery-ui/css/images/ui-bg_flat_0_aaaaaa_40x100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Todo/luigi/master/luigi/static/visualiser/lib/jquery-ui/css/images/ui-bg_flat_0_aaaaaa_40x100.png -------------------------------------------------------------------------------- /luigi/static/visualiser/lib/jquery-ui/css/images/ui-bg_flat_75_ffffff_40x100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Todo/luigi/master/luigi/static/visualiser/lib/jquery-ui/css/images/ui-bg_flat_75_ffffff_40x100.png -------------------------------------------------------------------------------- /luigi/static/visualiser/lib/jquery-ui/css/images/ui-bg_glass_55_fbf9ee_1x400.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Todo/luigi/master/luigi/static/visualiser/lib/jquery-ui/css/images/ui-bg_glass_55_fbf9ee_1x400.png -------------------------------------------------------------------------------- /luigi/static/visualiser/lib/jquery-ui/css/images/ui-bg_glass_65_ffffff_1x400.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Todo/luigi/master/luigi/static/visualiser/lib/jquery-ui/css/images/ui-bg_glass_65_ffffff_1x400.png -------------------------------------------------------------------------------- /luigi/static/visualiser/lib/jquery-ui/css/images/ui-bg_glass_75_dadada_1x400.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Todo/luigi/master/luigi/static/visualiser/lib/jquery-ui/css/images/ui-bg_glass_75_dadada_1x400.png -------------------------------------------------------------------------------- /luigi/static/visualiser/lib/jquery-ui/css/images/ui-bg_glass_75_e6e6e6_1x400.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Todo/luigi/master/luigi/static/visualiser/lib/jquery-ui/css/images/ui-bg_glass_75_e6e6e6_1x400.png -------------------------------------------------------------------------------- /luigi/static/visualiser/lib/jquery-ui/css/images/ui-bg_glass_95_fef1ec_1x400.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Todo/luigi/master/luigi/static/visualiser/lib/jquery-ui/css/images/ui-bg_glass_95_fef1ec_1x400.png -------------------------------------------------------------------------------- /test/testconfig/luigi.cfg: -------------------------------------------------------------------------------- 1 | [core] 2 | logging_conf_file: test/testconfig/logging.cfg 3 | 4 | [hdfs] 5 | client: hadoopcli 6 | snakebite_autoconfig: false 7 | namenode_host: localhost 8 | namenode_port: 50030 9 | -------------------------------------------------------------------------------- /luigi/static/visualiser/lib/jquery-ui/css/images/ui-bg_highlight-soft_75_cccccc_1x100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Todo/luigi/master/luigi/static/visualiser/lib/jquery-ui/css/images/ui-bg_highlight-soft_75_cccccc_1x100.png -------------------------------------------------------------------------------- /scripts/ci/conditional_tox.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -ex 4 | 5 | ENDENV=$(echo $TOXENV | tail -c 7) 6 | if [[ $ENDENV == gcloud ]] 7 | then 8 | [[ $DIDNT_CREATE_GCP_CREDS = 1 ]] || tox 9 | else 10 | tox --hashseed 1 11 | fi 12 | -------------------------------------------------------------------------------- /luigi/templates/header.html: -------------------------------------------------------------------------------- 1 | 2 | Luigi Task History 3 | 4 | 5 | -------------------------------------------------------------------------------- /test/testconfig/core-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | fs.defaultFS 7 | hdfs://localhost:50030/ 8 | 9 | 10 | -------------------------------------------------------------------------------- /bin/luigi: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import sys 4 | import warnings 5 | import luigi.cmdline 6 | 7 | 8 | def main(argv): 9 | warnings.warn("'bin/luigi' has moved to console script 'luigi'", DeprecationWarning) 10 | luigi.cmdline.luigi_run(argv) 11 | 12 | 13 | if __name__ == '__main__': 14 | main(sys.argv[1:]) 15 | -------------------------------------------------------------------------------- /bin/luigid: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import sys 4 | import warnings 5 | import luigi.cmdline 6 | 7 | 8 | def main(argv): 9 | warnings.warn("'bin/luigid' has moved to console script 'luigid'", DeprecationWarning) 10 | luigi.cmdline.luigid(argv) 11 | 12 | 13 | if __name__ == '__main__': 14 | main(sys.argv[1:]) 15 | -------------------------------------------------------------------------------- /test/testconfig/log4j.properties: -------------------------------------------------------------------------------- 1 | hadoop.root.logger=INFO,stderr 2 | log4j.logger.org.apache.hadoop=INFO,stderr 3 | log4j.logger.org.apache.hadoop.util.NativeCodeLoader=Off 4 | 5 | log4j.appender.stderr = org.apache.log4j.ConsoleAppender 6 | log4j.appender.stderr.layout = org.apache.log4j.PatternLayout 7 | log4j.appender.stderr.Target = System.err -------------------------------------------------------------------------------- /examples/hello_world.py: -------------------------------------------------------------------------------- 1 | import luigi 2 | 3 | 4 | class HelloWorldTask(luigi.Task): 5 | task_namespace = 'examples' 6 | 7 | def run(self): 8 | print("{task} says: Hello world!".format(task=self.__class__.__name__)) 9 | 10 | if __name__ == '__main__': 11 | luigi.run(['examples.HelloWorldTask', '--workers', '1', '--local-scheduler']) 12 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [report] 2 | omit = 3 | luigi/mrrunner.py 4 | */python?.?/* 5 | */site-packages/nose/* 6 | *__init__* 7 | *test/* 8 | */luigi/six.py 9 | */.tox/* 10 | */setup.py 11 | */bin/luigidc 12 | */dummy_test_module/* 13 | sitecustomize.py 14 | hadoop_test.py 15 | minicluster.py 16 | [run] 17 | parallel=True 18 | -------------------------------------------------------------------------------- /test/testconfig/logging.cfg: -------------------------------------------------------------------------------- 1 | [loggers] 2 | keys=root 3 | 4 | [handlers] 5 | keys=consoleHandler 6 | 7 | [formatters] 8 | keys=simpleFormatter 9 | 10 | [logger_root] 11 | level=DEBUG 12 | handlers=consoleHandler 13 | 14 | [handler_consoleHandler] 15 | class=StreamHandler 16 | level=DEBUG 17 | formatter=simpleFormatter 18 | args=(sys.stdout,) 19 | 20 | [formatter_simpleFormatter] 21 | format=%(levelname)s: %(message)s 22 | -------------------------------------------------------------------------------- /luigi/static/visualiser/mockdata/fetch_error: -------------------------------------------------------------------------------- 1 | { 2 | "response": { 3 | "taskId": "FactorTask(product=2)", 4 | "error": "Runtime error:\nTraceback (most recent call last):\n File '/Users/davw/projects/luigi-core/luigi/worker.py', line 164, in _run_task\n task.run()\n File '/Users/davw/projects/luigi-core/test/scheduler_visualisation_test.py', line 62, in run\n raise Exception('Error Message')\nException: Error Message\n" 5 | } 6 | } -------------------------------------------------------------------------------- /RELEASE-PROCESS.rst: -------------------------------------------------------------------------------- 1 | For maintainers of luigi, who have push access to pypi. Here's how you upload 2 | luigi to pypi. 3 | 4 | 1. Update version number in setup.py, if needed. Commit and push. 5 | 2. pypi (Executing ``python setup.py sdist upload``) 6 | 3. Add tag on github (https://github.com/spotify/luigi/releases), including changelog 7 | 8 | If you know a better way, please say so! I'm (arash) not used to releasing code 9 | to pypi! 10 | 11 | Currently, luigi is not released on any particular schedule and it is not 12 | strictly abiding semantic versioning. 13 | -------------------------------------------------------------------------------- /luigi/static/visualiser/test.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Luigi Visualiser Tests 5 | 6 | 7 | 8 |
9 |
10 | 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /examples/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | -------------------------------------------------------------------------------- /luigi/templates/menu.html: -------------------------------------------------------------------------------- 1 | 5 | 6 | 7 | {% extends "layout.html" %} 8 | 9 | 10 | 11 | {% block content %} 12 | 13 |
14 | {% if tasknames %} 15 |

[ Task History ]

16 | 23 | {% end %} 24 |
25 | 26 | {% end %} 27 | -------------------------------------------------------------------------------- /test/create_packages_archive_root/module.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | -------------------------------------------------------------------------------- /test/create_packages_archive_root/package/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | -------------------------------------------------------------------------------- /test/create_packages_archive_root/package/subpackage/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | -------------------------------------------------------------------------------- /test/create_packages_archive_root/package/submodule_without_imports.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | -------------------------------------------------------------------------------- /test/create_packages_archive_root/package/submodule.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | import os # NOQA 19 | -------------------------------------------------------------------------------- /test/create_packages_archive_root/package/subpackage/submodule.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | import os # NOQA 19 | -------------------------------------------------------------------------------- /luigi/contrib/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | """ 18 | Package containing optional and-on functionality. 19 | """ 20 | -------------------------------------------------------------------------------- /luigi/__main__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2016 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | from luigi.cmdline import luigi_run 18 | 19 | if __name__ == '__main__': 20 | luigi_run() 21 | -------------------------------------------------------------------------------- /test/create_packages_archive_root/package/submodule_with_absolute_import.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | from __future__ import absolute_import 19 | 20 | import os # NOQA 21 | -------------------------------------------------------------------------------- /luigi/templates/recent.html: -------------------------------------------------------------------------------- 1 | {% include "header.html" %} 2 |

Luigi Task History

3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | {% for task in tasks %} 15 | 16 | 17 | 18 | 19 | 20 | 23 | 24 | {% end %} 25 | 26 |
NameHostLast ActionStatusParameters
{{task.name}}{{task.host}}{{task.events[0].ts}}{{task.events[0].event_name}}{% for (k, param) in task.parameters.items() %} 21 |
{{k}}{{param.value}}
22 | {% end %}
27 | -------------------------------------------------------------------------------- /luigi/tools/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) 2014 Spotify AB 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | # use this file except in compliance with the License. You may obtain a copy of 6 | # the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | # License for the specific language governing permissions and limitations under 14 | # the License. 15 | 16 | """ 17 | Sort of a standard library for doing stuff with Tasks at a somewhat abstract level. 18 | 19 | Submodule introduced to stop growing util.py unstructured. 20 | """ 21 | -------------------------------------------------------------------------------- /doc/command_line.rst: -------------------------------------------------------------------------------- 1 | .. highlight:: bash 2 | 3 | Running from the Command Line 4 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 5 | 6 | The prefered way to run luigi tasks is through the ``luigi`` command line tool 7 | that will be installed with the pip package. 8 | 9 | .. code-block:: python 10 | 11 | # my_module.py, available in your sys.path 12 | import luigi 13 | 14 | class MyTask(luigi.Task): 15 | x = luigi.IntParameter() 16 | y = luigi.IntParameter(default=45) 17 | 18 | def run(self): 19 | print self.x + self.y 20 | 21 | Should be run like this 22 | 23 | .. code-block:: console 24 | 25 | $ luigi --module my_module MyTask --x 123 --y 456 --local-scheduler 26 | 27 | Or alternatively like this: 28 | 29 | .. code-block:: console 30 | 31 | $ python -m luigi --module my_module MyTask --x 100 --local-scheduler 32 | -------------------------------------------------------------------------------- /doc/index.rst: -------------------------------------------------------------------------------- 1 | .. Luigi documentation master file, created by 2 | sphinx-quickstart on Sat Feb 8 00:56:43 2014. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | .. include:: ../README.rst 7 | 8 | Table of Contents 9 | ----------------- 10 | 11 | .. toctree:: 12 | :maxdepth: 2 13 | 14 | example_top_artists.rst 15 | workflows.rst 16 | tasks.rst 17 | parameters.rst 18 | command_line.rst 19 | central_scheduler.rst 20 | execution_model.rst 21 | luigi_patterns.rst 22 | configuration.rst 23 | design_and_limitations.rst 24 | 25 | API Reference 26 | ------------- 27 | 28 | .. autosummary:: 29 | :toctree: api 30 | 31 | luigi 32 | luigi.contrib 33 | luigi.tools 34 | 35 | 36 | Indices and tables 37 | ================== 38 | 39 | * :ref:`genindex` 40 | * :ref:`modindex` 41 | * :ref:`search` 42 | 43 | -------------------------------------------------------------------------------- /luigi/scalding.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | """ 18 | luigi.scalding has moved to luigi.contrib.scalding 19 | """ 20 | 21 | import warnings 22 | 23 | from luigi.contrib.scalding import * # NOQA 24 | 25 | warnings.warn("luigi.scalding has now moved to luigi.contrib.scalding", DeprecationWarning, stacklevel=3) 26 | -------------------------------------------------------------------------------- /test/namespace_test_helper.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | import luigi 19 | 20 | luigi.namespace("mynamespace") 21 | 22 | 23 | class Foo(luigi.Task): 24 | p = luigi.Parameter() 25 | 26 | 27 | class Bar(Foo): 28 | task_namespace = "othernamespace" # namespace override 29 | 30 | luigi.namespace() 31 | -------------------------------------------------------------------------------- /luigi/webhdfs.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | """ 18 | luigi.webhdfs has moved to luigi.contrib.webhdfs 19 | """ 20 | 21 | import warnings 22 | 23 | from luigi.contrib.webhdfs import * # NOQA 24 | 25 | warnings.warn("luigi.webhdfs module has been moved to luigi.contrib.webhdfs", 26 | DeprecationWarning) 27 | -------------------------------------------------------------------------------- /luigi/hive.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | """ 19 | The hive module has been moved to ``luigi.contrib.hive`` 20 | """ 21 | 22 | import warnings 23 | 24 | from luigi.contrib.hive import * # NOQA 25 | 26 | warnings.warn("luigi.hive module has been moved to luigi.contrib.hive", 27 | DeprecationWarning) 28 | -------------------------------------------------------------------------------- /test/other_module.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | import luigi 19 | 20 | 21 | class OtherModuleTask(luigi.Task): 22 | p = luigi.Parameter() 23 | 24 | def output(self): 25 | return luigi.LocalTarget(self.p) 26 | 27 | def run(self): 28 | with self.output().open('w') as f: 29 | f.write('Done!') 30 | -------------------------------------------------------------------------------- /luigi/task_status.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | """ 18 | Possible values for a Task's status in the Scheduler 19 | """ 20 | 21 | PENDING = 'PENDING' 22 | FAILED = 'FAILED' 23 | DONE = 'DONE' 24 | RUNNING = 'RUNNING' 25 | SUSPENDED = 'SUSPENDED' # Only kept for backward compatibility with old clients 26 | UNKNOWN = 'UNKNOWN' 27 | DISABLED = 'DISABLED' 28 | -------------------------------------------------------------------------------- /luigi/hdfs.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | """ 18 | luigi.hdfs has moved to :py:mod:`luigi.contrib.hdfs` 19 | """ 20 | # Delete this file any time after 28 July 2015 21 | 22 | import warnings 23 | 24 | from luigi.contrib.hdfs import * # NOQA 25 | 26 | warnings.warn("luigi.hdfs module has been moved to luigi.contrib.hdfs", 27 | DeprecationWarning) 28 | -------------------------------------------------------------------------------- /luigi/hadoop.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | """ 18 | luigi.hadoop has moved to :py:mod:`luigi.contrib.hadoop` 19 | """ 20 | # Delete this file any time after 28 July 2015 21 | 22 | import warnings 23 | 24 | from luigi.contrib.hadoop import * # NOQA 25 | warnings.warn("luigi.hadoop module has been moved to luigi.contrib.hadoop", 26 | DeprecationWarning) 27 | -------------------------------------------------------------------------------- /luigi/hadoop_jar.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | """ 18 | luigi.hadoop_jar has moved to :py:mod:`luigi.contrib.hadoop_jar` 19 | """ 20 | # Delete this file any time after 28 July 2015 21 | 22 | import warnings 23 | 24 | from luigi.contrib.hadoop_jar import * # NOQA 25 | 26 | warnings.warn("luigi.hadoop_jar module has been moved to luigi.contrib.hadoop_jar", 27 | DeprecationWarning) 28 | -------------------------------------------------------------------------------- /test/runtests.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | import warnings 19 | 20 | import nose 21 | 22 | if __name__ == '__main__': 23 | with warnings.catch_warnings(): 24 | warnings.simplefilter("default") 25 | warnings.filterwarnings( 26 | "ignore", 27 | message='(.*)outputs has no custom(.*)', 28 | category=UserWarning 29 | ) 30 | nose.main() 31 | -------------------------------------------------------------------------------- /debian/control: -------------------------------------------------------------------------------- 1 | Source: luigi 2 | Section: libs 3 | Priority: extra 4 | Maintainer: Elias Freider 5 | Build-Depends: debhelper (>= 7), 6 | python-all (>= 2.7), 7 | python-psycopg2, 8 | python-pyparsing, 9 | python-setuptools, 10 | python-simplejson, 11 | python-tornado (>= 2.3), 12 | python-mock, 13 | python-daemon, 14 | openssh-client, 15 | python-sqlalchemy, 16 | python-unittest2, 17 | snakebite 18 | Standards-Version: 3.7.3 19 | X-Python-Version: >= 2.7 20 | 21 | Package: luigi 22 | Architecture: all 23 | Depends: ${shlibs:Depends}, 24 | ${misc:Depends}, 25 | ${python:Depends}, 26 | python-pyparsing, 27 | python-sqlalchemy, 28 | python-mechanize 29 | Description: Workflow mgmgt + task scheduling + dependency resolution 30 | 31 | Package: luigi-server 32 | Architecture: all 33 | Depends: ${shlibs:Depends}, 34 | ${misc:Depends}, 35 | ${python:Depends}, 36 | python-daemon, 37 | python-pyparsing, 38 | python-sqlalchemy, 39 | python-tornado (>= 2.3) 40 | Description: Luigi central planner server 41 | -------------------------------------------------------------------------------- /test/most_common_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | from helpers import unittest 19 | 20 | from luigi.tools.range import most_common 21 | 22 | 23 | class MostCommonTest(unittest.TestCase): 24 | 25 | def setUp(self): 26 | self.runs = [ 27 | ([1], (1, 1)), 28 | ([1, 1], (1, 2)), 29 | ([1, 1, 2], (1, 2)), 30 | ([1, 1, 2, 2, 2], (2, 3)) 31 | ] 32 | 33 | def test_runs(self): 34 | for args, result in self.runs: 35 | actual = most_common(args) 36 | expected = result 37 | self.assertEqual(expected, actual) 38 | -------------------------------------------------------------------------------- /luigi/static/visualiser/mockdata/dep_graph: -------------------------------------------------------------------------------- 1 | { 2 | "response": { 3 | "FactorTask(product=12)": { 4 | "deps": [ 5 | "FactorTask(product=2)", 6 | "FactorTask(product=6)" 7 | ], 8 | "start_time": 1369300552.60482, 9 | "status": "PENDING", 10 | "workers": [ 11 | "worker-641996460" 12 | ] 13 | }, 14 | "FactorTask(product=2)": { 15 | "deps": [], 16 | "start_time": 1369300552.60741, 17 | "status": "FAILED", 18 | "workers": [ 19 | "worker-641996460" 20 | ] 21 | }, 22 | "FactorTask(product=3)": { 23 | "deps": [], 24 | "start_time": 1369300552.61154, 25 | "status": "PENDING", 26 | "workers": [ 27 | "worker-641996460" 28 | ] 29 | }, 30 | "FactorTask(product=6)": { 31 | "deps": [ 32 | "FactorTask(product=2)", 33 | "FactorTask(product=3)" 34 | ], 35 | "start_time": 1369300552.609396, 36 | "status": "DONE", 37 | "workers": [ 38 | "worker-641996460" 39 | ] 40 | } 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /luigi/static/visualiser/mockdata/task_list: -------------------------------------------------------------------------------- 1 | { 2 | "response": { 3 | "FactorTask(product=12)": { 4 | "deps": [ 5 | "FactorTask(product=2)", 6 | "FactorTask(product=6)" 7 | ], 8 | "start_time": 1369300552.60482, 9 | "status": "PENDING", 10 | "workers": [ 11 | "worker-641996460" 12 | ] 13 | }, 14 | "FactorTask(product=2)": { 15 | "deps": [], 16 | "start_time": 1369300552.60741, 17 | "status": "FAILED", 18 | "workers": [ 19 | "worker-641996460" 20 | ] 21 | }, 22 | "FactorTask(product=3)": { 23 | "deps": [], 24 | "start_time": 1369300552.61154, 25 | "status": "PENDING", 26 | "workers": [ 27 | "worker-641996460" 28 | ] 29 | }, 30 | "FactorTask(product=6)": { 31 | "deps": [ 32 | "FactorTask(product=2)", 33 | "FactorTask(product=3)" 34 | ], 35 | "start_time": 1369300552.609396, 36 | "status": "DONE", 37 | "workers": [ 38 | "worker-641996460" 39 | ] 40 | } 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /luigi/contrib/hdfs/error.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | """ 19 | The implementations of the hdfs clients. The hadoop cli client and the 20 | snakebite client. 21 | """ 22 | 23 | 24 | class HDFSCliError(Exception): 25 | 26 | def __init__(self, command, returncode, stdout, stderr): 27 | self.returncode = returncode 28 | self.stdout = stdout 29 | self.stderr = stderr 30 | msg = ("Command %r failed [exit code %d]\n" 31 | "---stdout---\n" 32 | "%s\n" 33 | "---stderr---\n" 34 | "%s" 35 | "------------") % (command, returncode, stdout, stderr) 36 | super(HDFSCliError, self).__init__(msg) 37 | -------------------------------------------------------------------------------- /luigi/event.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | ''' Definitions needed for events. See :ref:`Events` for info on how to use it.''' 19 | 20 | 21 | class Event(object): 22 | # TODO nice descriptive subclasses of Event instead of strings? pass their instances to the callback instead of an undocumented arg list? 23 | DEPENDENCY_DISCOVERED = "event.core.dependency.discovered" # triggered for every (task, upstream task) pair discovered in a jobflow 24 | DEPENDENCY_MISSING = "event.core.dependency.missing" 25 | DEPENDENCY_PRESENT = "event.core.dependency.present" 26 | BROKEN_TASK = "event.core.task.broken" 27 | START = "event.core.start" 28 | FAILURE = "event.core.failure" 29 | SUCCESS = "event.core.success" 30 | PROCESSING_TIME = "event.core.processing_time" 31 | -------------------------------------------------------------------------------- /test/set_task_name_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | from helpers import unittest 19 | 20 | import luigi 21 | 22 | 23 | def create_class(cls_name): 24 | class NewTask(luigi.WrapperTask): 25 | pass 26 | 27 | NewTask.__name__ = cls_name 28 | 29 | return NewTask 30 | 31 | 32 | create_class('MyNewTask') 33 | 34 | 35 | class SetTaskNameTest(unittest.TestCase): 36 | 37 | ''' I accidentally introduced an issue in this commit: 38 | https://github.com/spotify/luigi/commit/6330e9d0332e6152996292a39c42f752b9288c96 39 | 40 | This causes tasks not to get exposed if they change name later. Adding a unit test 41 | to resolve the issue. ''' 42 | 43 | def test_set_task_name(self): 44 | luigi.run(['--local-scheduler', '--no-lock', 'MyNewTask']) 45 | 46 | 47 | if __name__ == '__main__': 48 | luigi.run() 49 | -------------------------------------------------------------------------------- /luigi/templates/show.html: -------------------------------------------------------------------------------- 1 | {% include "header.html" %} 2 |
3 |
4 |

Info

5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 |
Task Id{{task.id}}
Task Name{{task.name}}
Host{{task.host}}
MoreAll "{{task.name}}" runs
25 |
26 |
27 |

Parameters

28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | {% for (k, param) in task.parameters.items() %} 37 | 38 | 39 | 40 | 41 | {% end %} 42 | 43 |
NameValue
{{k}}{{param.value}}
44 |

Actions

45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | {% for event in task.events %} 54 | 55 | 56 | 57 | 58 | {% end %} 59 | 60 | 61 |
StatusAction Time
{{event.event_name}}{{event.ts}}
62 | -------------------------------------------------------------------------------- /sitecustomize.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | 5 | def patch_process_for_coverage(): 6 | # patch multiprocessing module to get coverage 7 | # https://bitbucket.org/ned/coveragepy/issue/117/enable-coverage-measurement-of-code-run-by 8 | from coverage.collector import Collector 9 | from coverage import coverage 10 | import multiprocessing 11 | # detect if coverage was running in forked process 12 | 13 | if sys.version_info >= (3, 4): 14 | klass = multiprocessing.process.BaseProcess 15 | else: 16 | klass = multiprocessing.Process 17 | 18 | if Collector._collectors: 19 | original = multiprocessing.Process._bootstrap 20 | 21 | class ProcessWithCoverage(multiprocessing.Process): 22 | def _bootstrap(self): 23 | cov = coverage( 24 | data_suffix=True, 25 | config_file=os.getenv('COVERAGE_PROCESS_START', True) 26 | ) 27 | cov.start() 28 | try: 29 | return original(self) 30 | finally: 31 | cov.stop() 32 | cov.save() 33 | 34 | if sys.version_info >= (3, 4): 35 | klass._bootstrap = ProcessWithCoverage._bootstrap 36 | else: 37 | multiprocessing.Process = ProcessWithCoverage 38 | 39 | 40 | if os.getenv('FULL_COVERAGE', 'false') == 'true': 41 | try: 42 | import coverage 43 | coverage.process_startup() 44 | patch_process_for_coverage() 45 | except ImportError: 46 | pass 47 | -------------------------------------------------------------------------------- /test/dynamic_import_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | from helpers import LuigiTestCase 19 | 20 | import luigi 21 | import luigi.interface 22 | import tempfile 23 | import re 24 | 25 | _testing_glob_var = None 26 | 27 | 28 | class CmdlineTest(LuigiTestCase): 29 | 30 | def test_dynamic_loading(self): 31 | with tempfile.NamedTemporaryFile(dir='test/', prefix="_foo_module", suffix='.py') as temp_module_file: 32 | temp_module_file.file.write(b''' 33 | import luigi 34 | 35 | class FooTask(luigi.Task): 36 | x = luigi.IntParameter() 37 | 38 | def run(self): 39 | luigi._testing_glob_var = self.x 40 | ''') 41 | temp_module_file.file.flush() 42 | temp_module_path = temp_module_file.name 43 | temp_module_name = re.search(r'/(_foo_module.*).py', temp_module_path).group(1) 44 | luigi.interface.run(['--module', temp_module_name, 'FooTask', '--x', '123', '--local-scheduler', '--no-lock']) 45 | 46 | self.assertEqual(luigi._testing_glob_var, 123) 47 | -------------------------------------------------------------------------------- /test/factorial_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | from helpers import unittest 19 | 20 | import luigi 21 | 22 | 23 | class Factorial(luigi.Task): 24 | 25 | ''' This calculates factorials *online* and does not write its results anywhere 26 | 27 | Demonstrates the ability for dependencies between Tasks and not just between their output. 28 | ''' 29 | n = luigi.IntParameter(default=100) 30 | 31 | def requires(self): 32 | if self.n > 1: 33 | return Factorial(self.n - 1) 34 | 35 | def run(self): 36 | if self.n > 1: 37 | self.value = self.n * self.requires().value 38 | else: 39 | self.value = 1 40 | self.complete = lambda: True 41 | 42 | def complete(self): 43 | return False 44 | 45 | 46 | class FactorialTest(unittest.TestCase): 47 | 48 | def test_invoke(self): 49 | luigi.build([Factorial(100)], local_scheduler=True) 50 | self.assertEqual(Factorial(42).value, 1405006117752879898543142606244511569936384000000000) 51 | 52 | if __name__ == '__main__': 53 | luigi.run() 54 | -------------------------------------------------------------------------------- /test/remote_scheduler_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | import os 19 | import tempfile 20 | import unittest 21 | 22 | import luigi.server 23 | import server_test 24 | 25 | tempdir = tempfile.mkdtemp() 26 | 27 | 28 | class DummyTask(luigi.Task): 29 | id = luigi.Parameter() 30 | 31 | def run(self): 32 | f = self.output().open('w') 33 | f.close() 34 | 35 | def output(self): 36 | return luigi.LocalTarget(os.path.join(tempdir, str(self.id))) 37 | 38 | 39 | class RemoteSchedulerTest(server_test.ServerTestBase): 40 | 41 | def _test_run(self, workers): 42 | tasks = [DummyTask(id) for id in range(20)] 43 | luigi.build(tasks, workers=workers, scheduler_port=self.get_http_port()) 44 | 45 | for t in tasks: 46 | self.assertEqual(t.complete(), True) 47 | self.assertTrue(os.path.exists(t.output().path)) 48 | 49 | def test_single_worker(self): 50 | self._test_run(workers=1) 51 | 52 | def test_multiple_workers(self): 53 | self._test_run(workers=10) 54 | 55 | 56 | if __name__ == '__main__': 57 | unittest.main() 58 | -------------------------------------------------------------------------------- /test/subtask_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | import abc 19 | from helpers import unittest 20 | 21 | import luigi 22 | 23 | 24 | class AbstractTask(luigi.Task): 25 | k = luigi.IntParameter() 26 | 27 | @abc.abstractproperty 28 | def foo(self): 29 | raise NotImplementedError 30 | 31 | @abc.abstractmethod 32 | def helper_function(self): 33 | raise NotImplementedError 34 | 35 | def run(self): 36 | return ",".join([self.foo, self.helper_function()]) 37 | 38 | 39 | class Implementation(AbstractTask): 40 | 41 | @property 42 | def foo(self): 43 | return "bar" 44 | 45 | def helper_function(self): 46 | return "hello" * self.k 47 | 48 | 49 | class AbstractSubclassTest(unittest.TestCase): 50 | 51 | def test_instantiate_abstract(self): 52 | def try_instantiate(): 53 | AbstractTask(k=1) 54 | 55 | self.assertRaises(TypeError, try_instantiate) 56 | 57 | def test_instantiate(self): 58 | self.assertEqual("bar,hellohello", Implementation(k=2).run()) 59 | 60 | if __name__ == '__main__': 61 | luigi.run() 62 | -------------------------------------------------------------------------------- /examples/foo.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | from __future__ import print_function 19 | import os 20 | import shutil 21 | import time 22 | 23 | import luigi 24 | 25 | 26 | class Foo(luigi.WrapperTask): 27 | task_namespace = 'examples' 28 | 29 | def run(self): 30 | print("Running Foo") 31 | 32 | def requires(self): 33 | for i in range(10): 34 | yield Bar(i) 35 | 36 | 37 | class Bar(luigi.Task): 38 | task_namespace = 'examples' 39 | num = luigi.IntParameter() 40 | 41 | def run(self): 42 | time.sleep(1) 43 | self.output().open('w').close() 44 | 45 | def output(self): 46 | """ 47 | Returns the target output for this task. 48 | 49 | :return: the target output for this task. 50 | :rtype: object (:py:class:`~luigi.target.Target`) 51 | """ 52 | time.sleep(1) 53 | return luigi.LocalTarget('/tmp/bar/%d' % self.num) 54 | 55 | 56 | if __name__ == "__main__": 57 | if os.path.exists('/tmp/bar'): 58 | shutil.rmtree('/tmp/bar') 59 | 60 | luigi.run(['examples.Foo', '--workers', '2', '--local-scheduler']) 61 | -------------------------------------------------------------------------------- /luigi/contrib/pyspark_runner.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright 2012-2015 Spotify AB 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | """ 20 | The pyspark program. 21 | 22 | This module will be run by spark-submit for PySparkTask jobs. 23 | 24 | The first argument is a path to the pickled instance of the PySparkTask, 25 | other arguments are the ones returned by PySparkTask.app_options() 26 | 27 | """ 28 | 29 | from __future__ import print_function 30 | 31 | try: 32 | import cPickle as pickle 33 | except ImportError: 34 | import pickle 35 | import logging 36 | import sys 37 | 38 | 39 | class PySparkRunner(object): 40 | 41 | def __init__(self, job, *args): 42 | with open(job, "rb") as fd: 43 | self.job = pickle.load(fd) 44 | self.args = args 45 | 46 | def run(self): 47 | from pyspark import SparkContext, SparkConf 48 | conf = SparkConf() 49 | self.job.setup(conf) 50 | with SparkContext(conf=conf) as sc: 51 | self.job.setup_remote(sc) 52 | self.job.main(sc, *self.args) 53 | 54 | 55 | if __name__ == '__main__': 56 | logging.basicConfig(level=logging.WARN) 57 | PySparkRunner(*sys.argv[1:]).run() 58 | -------------------------------------------------------------------------------- /test/recursion_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | from __future__ import print_function 18 | 19 | import datetime 20 | from helpers import unittest 21 | 22 | import luigi 23 | import luigi.interface 24 | from luigi.mock import MockTarget 25 | 26 | 27 | class Popularity(luigi.Task): 28 | date = luigi.DateParameter(default=datetime.date.today() - datetime.timedelta(1)) 29 | 30 | def output(self): 31 | return MockTarget('/tmp/popularity/%s.txt' % self.date.strftime('%Y-%m-%d')) 32 | 33 | def requires(self): 34 | return Popularity(self.date - datetime.timedelta(1)) 35 | 36 | def run(self): 37 | f = self.output().open('w') 38 | for line in self.input().open('r'): 39 | print(int(line.strip()) + 1, file=f) 40 | 41 | f.close() 42 | 43 | 44 | class RecursionTest(unittest.TestCase): 45 | 46 | def setUp(self): 47 | MockTarget.fs.get_all_data()['/tmp/popularity/2009-01-01.txt'] = b'0\n' 48 | 49 | def test_invoke(self): 50 | luigi.build([Popularity(datetime.date(2009, 1, 5))], local_scheduler=True) 51 | 52 | self.assertEqual(MockTarget.fs.get_data('/tmp/popularity/2009-01-05.txt'), b'4\n') 53 | -------------------------------------------------------------------------------- /test/priority_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | from helpers import unittest 19 | 20 | import luigi 21 | import luigi.notifications 22 | 23 | luigi.notifications.DEBUG = True 24 | 25 | 26 | class PrioTask(luigi.Task): 27 | prio = luigi.Parameter() 28 | run_counter = 0 29 | 30 | @property 31 | def priority(self): 32 | return self.prio 33 | 34 | def requires(self): 35 | if self.prio > 10: 36 | return PrioTask(self.prio - 10) 37 | 38 | def run(self): 39 | self.t = PrioTask.run_counter 40 | PrioTask.run_counter += 1 41 | 42 | def complete(self): 43 | return hasattr(self, 't') 44 | 45 | 46 | class PriorityTest(unittest.TestCase): 47 | 48 | def test_priority(self): 49 | p, q, r = PrioTask(1), PrioTask(2), PrioTask(3) 50 | luigi.build([p, q, r], local_scheduler=True) 51 | self.assertTrue(r.t < q.t < p.t) 52 | 53 | def test_priority_w_dep(self): 54 | x, y, z = PrioTask(25), PrioTask(15), PrioTask(5) 55 | a, b, c = PrioTask(24), PrioTask(14), PrioTask(4) 56 | luigi.build([a, b, c, x, y, z], local_scheduler=True) 57 | self.assertTrue(z.t < y.t < x.t < c.t < b.t < a.t) 58 | -------------------------------------------------------------------------------- /luigi/deprecate_kwarg.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | import warnings 19 | 20 | 21 | def deprecate_kwarg(old_name, new_name, kw_value): 22 | """ 23 | Rename keyword arguments, but keep backwards compatibility. 24 | 25 | Usage: 26 | 27 | .. code-block: python 28 | 29 | >>> @deprecate_kwarg('old', 'new', 'defval') 30 | ... def some_func(old='defval'): 31 | ... print(old) 32 | ... 33 | >>> some_func(new='yay') 34 | yay 35 | >>> some_func(old='yaay') 36 | yaay 37 | >>> some_func() 38 | defval 39 | 40 | """ 41 | def real_decorator(function): 42 | def new_function(*args, **kwargs): 43 | value = kw_value 44 | if old_name in kwargs: 45 | warnings.warn('Keyword argument {0} is deprecated, use {1}' 46 | .format(old_name, new_name)) 47 | value = kwargs[old_name] 48 | if new_name in kwargs: 49 | value = kwargs[new_name] 50 | del kwargs[new_name] 51 | kwargs[old_name] = value 52 | return function(*args, **kwargs) 53 | return new_function 54 | return real_decorator 55 | -------------------------------------------------------------------------------- /test/contrib/hdfs/webhdfs_client_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2015 VNG Corporation 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | from nose.plugins.attrib import attr 19 | 20 | from helpers import with_config 21 | from webhdfs_minicluster import WebHdfsMiniClusterTestCase 22 | from contrib.hdfs_test import HdfsTargetTestMixin 23 | from luigi.contrib.hdfs import WebHdfsClient 24 | 25 | 26 | @attr('minicluster') 27 | class WebHdfsTargetTest(WebHdfsMiniClusterTestCase, HdfsTargetTestMixin): 28 | 29 | def run(self, result=None): 30 | conf = {'hdfs': {'client': 'webhdfs'}, 31 | 'webhdfs': {'port': str(self.cluster.webhdfs_port)}, 32 | } 33 | with_config(conf)(super(WebHdfsTargetTest, self).run)(result) 34 | 35 | def test_actually_using_webhdfs(self): 36 | self.assertTrue(isinstance(self.create_target().fs, WebHdfsClient)) 37 | 38 | # Here is a bunch of tests that are currently failing. As should be 39 | # mentioned in the WebHdfsClient docs, it is not yet feature complete. 40 | test_slow_exists = None 41 | test_glob_exists = None 42 | test_with_close = None 43 | test_with_exception = None 44 | 45 | # This one fails when run together with the whole test suite 46 | test_write_cleanup_no_close = None 47 | -------------------------------------------------------------------------------- /test/namespace_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | from helpers import unittest 19 | 20 | import luigi 21 | import namespace_test_helper # declares another Foo in namespace mynamespace 22 | 23 | 24 | class Foo(luigi.Task): 25 | pass 26 | 27 | 28 | class FooSubclass(Foo): 29 | pass 30 | 31 | 32 | class TestNamespacing(unittest.TestCase): 33 | 34 | def test_vanilla(self): 35 | self.assertEqual(Foo.task_namespace, None) 36 | self.assertEqual(Foo.task_family, "Foo") 37 | self.assertEqual(str(Foo()), "Foo()") 38 | 39 | self.assertEqual(FooSubclass.task_namespace, None) 40 | self.assertEqual(FooSubclass.task_family, "FooSubclass") 41 | self.assertEqual(str(FooSubclass()), "FooSubclass()") 42 | 43 | def test_namespace(self): 44 | self.assertEqual(namespace_test_helper.Foo.task_namespace, "mynamespace") 45 | self.assertEqual(namespace_test_helper.Foo.task_family, "mynamespace.Foo") 46 | self.assertEqual(str(namespace_test_helper.Foo(1)), "mynamespace.Foo(p=1)") 47 | 48 | self.assertEqual(namespace_test_helper.Bar.task_namespace, "othernamespace") 49 | self.assertEqual(namespace_test_helper.Bar.task_family, "othernamespace.Bar") 50 | self.assertEqual(str(namespace_test_helper.Bar(1)), "othernamespace.Bar(p=1)") 51 | -------------------------------------------------------------------------------- /test/helpers_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2016 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | import luigi 18 | import luigi.date_interval 19 | import luigi.interface 20 | import luigi.notifications 21 | from helpers import LuigiTestCase, RunOnceTask 22 | 23 | 24 | class LuigiTestCaseTest(LuigiTestCase): 25 | 26 | def test_1(self): 27 | class MyClass(luigi.Task): 28 | pass 29 | 30 | self.assertTrue(self.run_locally(['MyClass'])) 31 | 32 | def test_2(self): 33 | class MyClass(luigi.Task): 34 | pass 35 | 36 | self.assertTrue(self.run_locally(['MyClass'])) 37 | 38 | 39 | class RunOnceTaskTest(LuigiTestCase): 40 | 41 | def test_complete_behavior(self): 42 | """ 43 | Verify that RunOnceTask works as expected. 44 | 45 | This task will fail if it was a normal ``luigi.Task``, because 46 | RequiringTask wouldn't run becaue missing depedency at runtime. 47 | """ 48 | class MyTask(RunOnceTask): 49 | pass 50 | 51 | class RequiringTask(luigi.Task): 52 | counter = 0 53 | 54 | def requires(self): 55 | yield MyTask() 56 | 57 | def run(self): 58 | RequiringTask.counter += 1 59 | 60 | self.run_locally(['RequiringTask']) 61 | self.assertEqual(1, RequiringTask.counter) 62 | -------------------------------------------------------------------------------- /luigi/cmdline.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | import logging 4 | import sys 5 | 6 | from luigi.retcodes import run_with_retcodes 7 | 8 | 9 | def luigi_run(argv=sys.argv[1:]): 10 | run_with_retcodes(argv) 11 | 12 | 13 | def luigid(argv=sys.argv[1:]): 14 | import luigi.server 15 | import luigi.process 16 | import luigi.configuration 17 | parser = argparse.ArgumentParser(description=u'Central luigi server') 18 | parser.add_argument(u'--background', help=u'Run in background mode', action='store_true') 19 | parser.add_argument(u'--pidfile', help=u'Write pidfile') 20 | parser.add_argument(u'--logdir', help=u'log directory') 21 | parser.add_argument(u'--state-path', help=u'Pickled state file') 22 | parser.add_argument(u'--address', help=u'Listening interface') 23 | parser.add_argument(u'--unix-socket', help=u'Unix socket path') 24 | parser.add_argument(u'--port', default=8082, help=u'Listening port') 25 | 26 | opts = parser.parse_args(argv) 27 | 28 | if opts.state_path: 29 | config = luigi.configuration.get_config() 30 | config.set('scheduler', 'state_path', opts.state_path) 31 | 32 | if opts.background: 33 | # daemonize sets up logging to spooled log files 34 | logging.getLogger().setLevel(logging.INFO) 35 | luigi.process.daemonize(luigi.server.run, api_port=opts.port, 36 | address=opts.address, pidfile=opts.pidfile, 37 | logdir=opts.logdir, unix_socket=opts.unix_socket) 38 | else: 39 | if opts.logdir: 40 | logging.basicConfig(level=logging.INFO, format=luigi.process.get_log_format(), 41 | filename=os.path.join(opts.logdir, "luigi-server.log")) 42 | else: 43 | logging.basicConfig(level=logging.INFO, format=luigi.process.get_log_format()) 44 | luigi.server.run(api_port=opts.port, address=opts.address, unix_socket=opts.unix_socket) 45 | -------------------------------------------------------------------------------- /test/task_history_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | from helpers import LuigiTestCase 19 | 20 | import luigi 21 | import luigi.scheduler 22 | import luigi.task_history 23 | import luigi.worker 24 | 25 | luigi.notifications.DEBUG = True 26 | 27 | 28 | class SimpleTaskHistory(luigi.task_history.TaskHistory): 29 | 30 | def __init__(self): 31 | self.actions = [] 32 | 33 | def task_scheduled(self, task): 34 | self.actions.append(('scheduled', task.id)) 35 | 36 | def task_finished(self, task, successful): 37 | self.actions.append(('finished', task.id)) 38 | 39 | def task_started(self, task, worker_host): 40 | self.actions.append(('started', task.id)) 41 | 42 | 43 | class TaskHistoryTest(LuigiTestCase): 44 | 45 | def test_run(self): 46 | th = SimpleTaskHistory() 47 | sch = luigi.scheduler.CentralPlannerScheduler(task_history_impl=th) 48 | with luigi.worker.Worker(scheduler=sch) as w: 49 | class MyTask(luigi.Task): 50 | pass 51 | 52 | task = MyTask() 53 | w.add(task) 54 | w.run() 55 | 56 | self.assertEqual(th.actions, [ 57 | ('scheduled', task.task_id), 58 | ('started', task.task_id), 59 | ('finished', task.task_id) 60 | ]) 61 | -------------------------------------------------------------------------------- /test/_mysqldb_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | from helpers import unittest 19 | 20 | import mysql.connector 21 | from luigi.contrib.mysqldb import MySqlTarget 22 | 23 | host = 'localhost' 24 | port = 3306 25 | database = 'luigi_test' 26 | username = None 27 | password = None 28 | table_updates = 'table_updates' 29 | 30 | 31 | def _create_test_database(): 32 | con = mysql.connector.connect(user=username, 33 | password=password, 34 | host=host, 35 | port=port, 36 | autocommit=True) 37 | con.cursor().execute('CREATE DATABASE IF NOT EXISTS %s' % database) 38 | 39 | 40 | _create_test_database() 41 | target = MySqlTarget(host, database, username, password, '', 'update_id') 42 | 43 | 44 | class MySqlTargetTest(unittest.TestCase): 45 | 46 | def test_touch_and_exists(self): 47 | drop() 48 | self.assertFalse(target.exists(), 49 | 'Target should not exist before touching it') 50 | target.touch() 51 | self.assertTrue(target.exists(), 52 | 'Target should exist after touching it') 53 | 54 | 55 | def drop(): 56 | con = target.connect(autocommit=True) 57 | con.cursor().execute('DROP TABLE IF EXISTS %s' % table_updates) 58 | -------------------------------------------------------------------------------- /test/test_ssh.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | import subprocess 19 | from helpers import unittest 20 | 21 | from luigi.contrib.ssh import RemoteContext 22 | 23 | 24 | class TestMockedRemoteContext(unittest.TestCase): 25 | 26 | def test_subprocess_delegation(self): 27 | """ Test subprocess call structure using mock module """ 28 | orig_Popen = subprocess.Popen 29 | self.last_test = None 30 | 31 | def Popen(cmd, **kwargs): 32 | self.last_test = cmd 33 | 34 | subprocess.Popen = Popen 35 | context = RemoteContext( 36 | "some_host", 37 | username="luigi", 38 | key_file="/some/key.pub" 39 | ) 40 | context.Popen(["ls"]) 41 | self.assertTrue("ssh" in self.last_test) 42 | self.assertTrue("-i" in self.last_test) 43 | self.assertTrue("/some/key.pub" in self.last_test) 44 | self.assertTrue("luigi@some_host" in self.last_test) 45 | self.assertTrue("ls" in self.last_test) 46 | 47 | subprocess.Popen = orig_Popen 48 | 49 | def test_check_output_fail_connect(self): 50 | """ Test check_output to a non-existing host """ 51 | context = RemoteContext("__NO_HOST_LIKE_THIS__", connect_timeout=1) 52 | self.assertRaises( 53 | subprocess.CalledProcessError, 54 | context.check_output, ["ls"] 55 | ) 56 | -------------------------------------------------------------------------------- /test/clone_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | from helpers import unittest 19 | 20 | import luigi 21 | import luigi.notifications 22 | 23 | luigi.notifications.DEBUG = True 24 | 25 | 26 | class LinearSum(luigi.Task): 27 | lo = luigi.IntParameter() 28 | hi = luigi.IntParameter() 29 | 30 | def requires(self): 31 | if self.hi > self.lo: 32 | return self.clone(hi=self.hi - 1) 33 | 34 | def run(self): 35 | if self.hi > self.lo: 36 | self.s = self.requires().s + self.f(self.hi - 1) 37 | else: 38 | self.s = 0 39 | self.complete = lambda: True # workaround since we don't write any output 40 | 41 | def complete(self): 42 | return False 43 | 44 | def f(self, x): 45 | return x 46 | 47 | 48 | class PowerSum(LinearSum): 49 | p = luigi.IntParameter() 50 | 51 | def f(self, x): 52 | return x ** self.p 53 | 54 | 55 | class CloneTest(unittest.TestCase): 56 | 57 | def test_args(self): 58 | t = LinearSum(lo=42, hi=45) 59 | self.assertEqual(t.param_args, (42, 45)) 60 | self.assertEqual(t.param_kwargs, {'lo': 42, 'hi': 45}) 61 | 62 | def test_recursion(self): 63 | t = LinearSum(lo=42, hi=45) 64 | luigi.build([t], local_scheduler=True) 65 | self.assertEqual(t.s, 42 + 43 + 44) 66 | 67 | def test_inheritance(self): 68 | t = PowerSum(lo=42, hi=45, p=2) 69 | luigi.build([t], local_scheduler=True) 70 | self.assertEqual(t.s, 42 ** 2 + 43 ** 2 + 44 ** 2) 71 | -------------------------------------------------------------------------------- /examples/foo_complex.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | import os 19 | import shutil 20 | import time 21 | import random 22 | 23 | import luigi 24 | 25 | max_depth = 10 26 | max_total_nodes = 50 27 | current_nodes = 0 28 | 29 | 30 | class Foo(luigi.Task): 31 | task_namespace = 'examples' 32 | 33 | def run(self): 34 | print("Running Foo") 35 | 36 | def requires(self): 37 | global current_nodes 38 | for i in range(30 / max_depth): 39 | current_nodes += 1 40 | yield Bar(i) 41 | 42 | 43 | class Bar(luigi.Task): 44 | task_namespace = 'examples' 45 | 46 | num = luigi.IntParameter() 47 | 48 | def run(self): 49 | time.sleep(1) 50 | self.output().open('w').close() 51 | 52 | def requires(self): 53 | global current_nodes 54 | 55 | if max_total_nodes > current_nodes: 56 | valor = int(random.uniform(1, 30)) 57 | for i in range(valor / max_depth): 58 | current_nodes += 1 59 | yield Bar(current_nodes) 60 | 61 | def output(self): 62 | """ 63 | Returns the target output for this task. 64 | 65 | :return: the target output for this task. 66 | :rtype: object (:py:class:`~luigi.target.Target`) 67 | """ 68 | time.sleep(1) 69 | return luigi.LocalTarget('/tmp/bar/%d' % self.num) 70 | 71 | 72 | if __name__ == "__main__": 73 | if os.path.exists('/tmp/bar'): 74 | shutil.rmtree('/tmp/bar') 75 | 76 | luigi.run(['examples.Foo', '--workers', '2', '--local-scheduler']) 77 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | services: 4 | - elasticsearch 5 | - mysql 6 | 7 | env: 8 | global: 9 | - PIP_DOWNLOAD_CACHE=$HOME/.pip-cache 10 | - POSTGRES_USER=postgres 11 | - GCS_TEST_PROJECT_ID=luigi-travistestenvironment 12 | - GCS_TEST_BUCKET=luigi-travistestenvironment 13 | - GOOGLE_APPLICATION_CREDENTIALS=test/gcloud-credentials.json 14 | matrix: 15 | - TOXENV=flake8 16 | - TOXENV=docs 17 | - TOXENV=py27-nonhdfs 18 | - TOXENV=py33-nonhdfs 19 | - TOXENV=py34-nonhdfs 20 | - TOXENV=py27-unixsocket 21 | - TOXENV=py33-unixsocket 22 | - TOXENV=py34-unixsocket 23 | - TOXENV=py27-cdh 24 | - TOXENV=py33-cdh 25 | - TOXENV=py34-cdh 26 | - TOXENV=pypy-scheduler 27 | - TOXENV=py27-gcloud 28 | - TOXENV=py27-postgres 29 | # - TOXENV=visualiser 30 | # Disabling this test because of intermittent failures :-/ 31 | 32 | # Python 3.5 has to go here until Travis adds it to the default build images. 33 | # https://github.com/travis-ci/travis-ci/issues/4794#issuecomment-143758799 34 | matrix: 35 | include: 36 | - python: 3.5 37 | env: TOXENV=py35-nonhdfs 38 | - python: 3.5 39 | env: TOXENV=py35-unixsocket 40 | - python: 3.5 41 | env: TOXENV=py35-cdh 42 | 43 | sudo: false 44 | 45 | cache: 46 | directories: 47 | - $HOME/.pip-cache 48 | 49 | install: 50 | - pip install 'tox<3.0' 51 | 52 | before_install: 53 | - openssl aes-256-cbc -K $encrypted_e05f6ccc270e_key -iv $encrypted_e05f6ccc270e_iv -in test/gcloud-credentials.json.enc -out test/gcloud-credentials.json -d 54 | || export DIDNT_CREATE_GCP_CREDS=1 55 | 56 | before_script: 57 | # TODO, only do this step for the postgres environment 58 | - psql -c 'create database spotify;' -U postgres 59 | 60 | # allow ssh loopback 61 | - ssh-keygen -t rsa -N '' -C '' -f ~/.ssh/id_rsa 62 | - cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys 63 | - ssh -o StrictHostKeyChecking=no localhost true 64 | 65 | # Create mysql database if possible but fail silently if not available. 66 | - mysql -e 'create database IF NOT EXISTS luigi_test;' -uroot || true 67 | 68 | script: 69 | - tox --version 70 | - ./scripts/ci/conditional_tox.sh 71 | 72 | branches: 73 | only: 74 | - master 75 | -------------------------------------------------------------------------------- /luigi/static/visualiser/css/tipsy.css: -------------------------------------------------------------------------------- 1 | .tipsy { font-size: 10px; position: absolute; padding: 5px; z-index: 100000; } 2 | .tipsy-inner { background-color: #000; color: #FFF; max-width: 200px; padding: 5px 8px 4px 8px; text-align: center; } 3 | 4 | /* Rounded corners */ 5 | .tipsy-inner { border-radius: 3px; -moz-border-radius: 3px; -webkit-border-radius: 3px; } 6 | 7 | /* Uncomment for shadow */ 8 | .tipsy-inner { box-shadow: 0 0 5px #000000; -webkit-box-shadow: 0 0 5px #000000; -moz-box-shadow: 0 0 5px #000000; } 9 | 10 | .tipsy-arrow { position: absolute; width: 0; height: 0; line-height: 0; border: 5px dashed #000; } 11 | 12 | /* Rules to colour arrows */ 13 | .tipsy-arrow-n { border-bottom-color: #000; } 14 | .tipsy-arrow-s { border-top-color: #000; } 15 | .tipsy-arrow-e { border-left-color: #000; } 16 | .tipsy-arrow-w { border-right-color: #000; } 17 | 18 | .tipsy-n .tipsy-arrow { top: 0px; left: 50%; margin-left: -5px; border-bottom-style: solid; border-top: none; border-left-color: transparent; border-right-color: transparent; } 19 | .tipsy-nw .tipsy-arrow { top: 0; left: 10px; border-bottom-style: solid; border-top: none; border-left-color: transparent; border-right-color: transparent;} 20 | .tipsy-ne .tipsy-arrow { top: 0; right: 10px; border-bottom-style: solid; border-top: none; border-left-color: transparent; border-right-color: transparent;} 21 | .tipsy-s .tipsy-arrow { bottom: 0; left: 50%; margin-left: -5px; border-top-style: solid; border-bottom: none; border-left-color: transparent; border-right-color: transparent; } 22 | .tipsy-sw .tipsy-arrow { bottom: 0; left: 10px; border-top-style: solid; border-bottom: none; border-left-color: transparent; border-right-color: transparent; } 23 | .tipsy-se .tipsy-arrow { bottom: 0; right: 10px; border-top-style: solid; border-bottom: none; border-left-color: transparent; border-right-color: transparent; } 24 | .tipsy-e .tipsy-arrow { right: 0; top: 50%; margin-top: -5px; border-left-style: solid; border-right: none; border-top-color: transparent; border-bottom-color: transparent; } 25 | .tipsy-w .tipsy-arrow { left: 0; top: 50%; margin-top: -5px; border-right-style: solid; border-left: none; border-top-color: transparent; border-bottom-color: transparent; } 26 | -------------------------------------------------------------------------------- /luigi/contrib/sparkey.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | from __future__ import absolute_import 19 | 20 | import luigi 21 | 22 | 23 | class SparkeyExportTask(luigi.Task): 24 | """ 25 | A luigi task that writes to a local sparkey log file. 26 | 27 | Subclasses should implement the requires and output methods. The output 28 | must be a luigi.LocalTarget. 29 | 30 | The resulting sparkey log file will contain one entry for every line in 31 | the input, mapping from the first value to a tab-separated list of the 32 | rest of the line. 33 | 34 | To generate a simple key-value index, yield "key", "value" pairs from the input(s) to this task. 35 | """ 36 | 37 | # the separator used to split input lines 38 | separator = '\t' 39 | 40 | def __init__(self, *args, **kwargs): 41 | super(SparkeyExportTask, self).__init__(*args, **kwargs) 42 | 43 | def run(self): 44 | self._write_sparkey_file() 45 | 46 | def _write_sparkey_file(self): 47 | import sparkey 48 | 49 | infile = self.input() 50 | outfile = self.output() 51 | if not isinstance(outfile, luigi.LocalTarget): 52 | raise TypeError("output must be a LocalTarget") 53 | 54 | # write job output to temporary sparkey file 55 | temp_output = luigi.LocalTarget(is_tmp=True) 56 | w = sparkey.LogWriter(temp_output.path) 57 | for line in infile.open('r'): 58 | k, v = line.strip().split(self.separator, 1) 59 | w[k] = v 60 | w.close() 61 | 62 | # move finished sparkey file to final destination 63 | temp_output.move(outfile.path) 64 | -------------------------------------------------------------------------------- /CONTRIBUTING.rst: -------------------------------------------------------------------------------- 1 | Code of conduct 2 | --------------- 3 | 4 | This project adheres to the `Open Code of Conduct 5 | `_. By 6 | participating, you are expected to honor this code. 7 | 8 | Running the tests 9 | ----------------- 10 | 11 | We are always happy to recieve Pull Requests. When you open a PR, it will 12 | automatically build on Travis. So you're not strictly required to test the 13 | patch locally before submitting it. 14 | 15 | If you do want to run the tests locally you'll need to ``pip install tox`` and 16 | then run one of the tox commands below. 17 | 18 | You will need a ``tox --version`` of at least 2.0. 19 | 20 | .. code:: bash 21 | 22 | # These commands are pretty fast and will tell if you've 23 | # broken something major: 24 | tox -e flake8 25 | tox -e py27-nonhdfs 26 | 27 | # You can also test particular files for even faster iterations 28 | tox -e py27-nonhdfs test/rpc_test.py 29 | 30 | # The visualiser tests require phantomjs to be installed on your path 31 | tox -e visualiser 32 | 33 | # And some of the others involve downloading and running Hadoop: 34 | tox -e py33-cdh 35 | tox -e py34-hdp 36 | 37 | Where ``flake8`` is the lint checking, ``py27`` is obviously Python 2.7. 38 | ``nonhdfs`` are tests not running on the Hadoop minicluster and ``cdh`` and 39 | ``hdp`` are two different hadoop distributions. For most local development it's 40 | usually enough to run the lint checking and a python version for ``nonhdfs`` 41 | and let Travis run for the whole matrix. 42 | 43 | For `cdh` and `hdp`, tox will download the hadoop distribution for you. You 44 | however have to have Java installed and the `JAVA_HOME` environment variable 45 | set. 46 | 47 | For more details, check out the ``.travis.yml`` and ``tox.ini`` files. 48 | 49 | Writing documentation 50 | ===================== 51 | 52 | All documentation for luigi is written in `reStructuredText/Sphinx markup 53 | `_ and are both in the 54 | code as docstrings and in `.rst`. Pull requests should come with documentation 55 | when appropriate. 56 | 57 | You can check the syntax for your documentation by running 58 | 59 | .. code:: bash 60 | 61 | tox -e docs 62 | -------------------------------------------------------------------------------- /test/contrib/_webhdfs_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | import os 19 | from helpers import unittest 20 | 21 | from luigi.contrib import webhdfs 22 | 23 | 24 | class TestWebHdfsTarget(unittest.TestCase): 25 | 26 | ''' 27 | This test requires a running Hadoop cluster with WebHdfs enabled 28 | This test requires the luigi.cfg file to have a `hdfs` section 29 | with the namenode_host, namenode_port and user settings. 30 | ''' 31 | 32 | def setUp(self): 33 | self.testDir = "/tmp/luigi-test".format() 34 | self.path = os.path.join(self.testDir, 'out.txt') 35 | self.client = webhdfs.WebHdfsClient() 36 | self.target = webhdfs.WebHdfsTarget(self.path) 37 | 38 | def tearDown(self): 39 | if self.client.exists(self.testDir): 40 | self.client.remove(self.testDir, recursive=True) 41 | 42 | def test_write(self): 43 | self.assertFalse(self.client.exists(self.path)) 44 | output = self.target.open('w') 45 | output.write('this is line 1\n') 46 | output.write('this is line #2\n') 47 | output.close() 48 | self.assertTrue(self.client.exists(self.path)) 49 | 50 | def test_read(self): 51 | self.test_write() 52 | input_ = self.target.open('r') 53 | all_test = 'this is line 1\nthis is line #2\n' 54 | self.assertEqual(all_test, input_.read()) 55 | input_.close() 56 | 57 | def test_read_lines(self): 58 | self.test_write() 59 | input_ = self.target.open('r') 60 | lines = list(input_.readlines()) 61 | self.assertEqual(lines[0], 'this is line 1') 62 | self.assertEqual(lines[1], 'this is line #2') 63 | input_.close() 64 | -------------------------------------------------------------------------------- /test/dict_parameter_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | from helpers import unittest, in_parse 19 | 20 | import luigi 21 | import luigi.interface 22 | import json 23 | import collections 24 | 25 | 26 | class DictParameterTask(luigi.Task): 27 | param = luigi.DictParameter() 28 | 29 | 30 | class DictParameterTest(unittest.TestCase): 31 | 32 | _dict = collections.OrderedDict([('username', 'me'), ('password', 'secret')]) 33 | 34 | def test_parse(self): 35 | d = luigi.DictParameter().parse(json.dumps(DictParameterTest._dict)) 36 | self.assertEqual(d, DictParameterTest._dict) 37 | 38 | def test_serialize(self): 39 | d = luigi.DictParameter().serialize(DictParameterTest._dict) 40 | self.assertEqual(d, '{"username": "me", "password": "secret"}') 41 | 42 | def test_parse_and_serialize(self): 43 | inputs = ['{"username": "me", "password": "secret"}', '{"password": "secret", "username": "me"}'] 44 | for json_input in inputs: 45 | _dict = luigi.DictParameter().parse(json_input) 46 | self.assertEqual(json_input, luigi.DictParameter().serialize(_dict)) 47 | 48 | def test_parse_interface(self): 49 | in_parse(["DictParameterTask", "--param", '{"username": "me", "password": "secret"}'], 50 | lambda task: self.assertEqual(task.param, DictParameterTest._dict)) 51 | 52 | def test_serialize_task(self): 53 | t = DictParameterTask(DictParameterTest._dict) 54 | self.assertEqual(str(t), 'DictParameterTask(param={"username": "me", "password": "secret"})') 55 | 56 | def test_parse_invalid_input(self): 57 | self.assertRaises(ValueError, lambda: luigi.DictParameter().parse('{"invalid"}')) 58 | -------------------------------------------------------------------------------- /luigi/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | """ 18 | Package containing core luigi functionality. 19 | """ 20 | 21 | from luigi import task 22 | from luigi.task import Task, Config, ExternalTask, WrapperTask, namespace 23 | 24 | from luigi import target 25 | from luigi.target import Target 26 | 27 | from luigi import file # wtf @ naming 28 | from luigi.file import File, LocalTarget 29 | 30 | from luigi import rpc 31 | from luigi.rpc import RemoteScheduler, RPCError 32 | from luigi import parameter 33 | from luigi.parameter import ( 34 | Parameter, 35 | DateParameter, MonthParameter, YearParameter, DateHourParameter, DateMinuteParameter, 36 | DateIntervalParameter, TimeDeltaParameter, 37 | IntParameter, FloatParameter, BooleanParameter, BoolParameter, 38 | TaskParameter, EnumParameter, DictParameter 39 | ) 40 | 41 | from luigi import configuration 42 | 43 | from luigi import interface 44 | from luigi.interface import run, build 45 | 46 | from luigi import event 47 | from luigi.event import Event 48 | 49 | from .tools import range # just makes the tool classes available from command line 50 | 51 | 52 | __all__ = [ 53 | 'task', 'Task', 'Config', 'ExternalTask', 'WrapperTask', 'namespace', 54 | 'target', 'Target', 'File', 'LocalTarget', 'rpc', 'RemoteScheduler', 55 | 'RPCError', 'parameter', 'Parameter', 'DateParameter', 'MonthParameter', 56 | 'YearParameter', 'DateHourParameter', 'DateMinuteParameter', 'range', 57 | 'DateIntervalParameter', 'TimeDeltaParameter', 'IntParameter', 58 | 'FloatParameter', 'BooleanParameter', 'BoolParameter', 'TaskParameter', 59 | 'EnumParameter', 'DictParameter', 'configuration', 'interface', 'file', 'run', 'build', 60 | 'event', 'Event' 61 | ] 62 | -------------------------------------------------------------------------------- /test/contrib/scalding_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | import luigi 19 | from luigi.contrib import scalding 20 | 21 | import mock 22 | import os 23 | import random 24 | import shutil 25 | import tempfile 26 | import unittest 27 | 28 | 29 | class MyScaldingTask(scalding.ScaldingJobTask): 30 | scala_source = luigi.Parameter() 31 | 32 | def source(self): 33 | return self.scala_source 34 | 35 | 36 | class ScaldingTest(unittest.TestCase): 37 | def setUp(self): 38 | self.scalding_home = os.path.join(tempfile.gettempdir(), 'scalding-%09d' % random.randint(0, 999999999)) 39 | os.mkdir(self.scalding_home) 40 | self.lib_dir = os.path.join(self.scalding_home, 'lib') 41 | os.mkdir(self.lib_dir) 42 | os.mkdir(os.path.join(self.scalding_home, 'provided')) 43 | os.mkdir(os.path.join(self.scalding_home, 'libjars')) 44 | f = open(os.path.join(self.lib_dir, 'scalding-core-foo'), 'w') 45 | f.close() 46 | 47 | self.scala_source = os.path.join(self.scalding_home, 'my_source.scala') 48 | f = open(self.scala_source, 'w') 49 | f.write('class foo extends Job') 50 | f.close() 51 | 52 | os.environ['SCALDING_HOME'] = self.scalding_home 53 | 54 | def tearDown(self): 55 | shutil.rmtree(self.scalding_home) 56 | 57 | @mock.patch('subprocess.check_call') 58 | @mock.patch('luigi.contrib.hadoop.run_and_track_hadoop_job') 59 | def test_scalding(self, check_call, track_job): 60 | success = luigi.run(['MyScaldingTask', '--scala-source', self.scala_source, '--local-scheduler', '--no-lock']) 61 | self.assertTrue(success) 62 | # TODO: check more stuff 63 | 64 | if __name__ == '__main__': 65 | luigi.run() 66 | -------------------------------------------------------------------------------- /doc/design_and_limitations.rst: -------------------------------------------------------------------------------- 1 | Design and limitations 2 | ---------------------- 3 | 4 | Luigi is the successor to a couple of attempts that we weren't fully happy with. 5 | We learned a lot from our mistakes and some design decisions include: 6 | 7 | - Straightforward command-line integration. 8 | - As little boilerplate as possible. 9 | - Focus on job scheduling and dependency resolution, not a particular platform. 10 | In particular, this means no limitation to Hadoop. 11 | Though Hadoop/HDFS support is built-in and is easy to use, 12 | this is just one of many types of things you can run. 13 | - A file system abstraction where code doesn't have to care about where files are located. 14 | - Atomic file system operations through this abstraction. 15 | If a task crashes it won't lead to a broken state. 16 | - The dependencies are decentralized. 17 | No big config file in XML. 18 | Each task just specifies which inputs it needs and cross-module dependencies are trivial. 19 | - A web server that renders the dependency graph and does locking, etc for free. 20 | - Trivial to extend with new file systems, file formats, and job types. 21 | You can easily write jobs that inserts a Tokyo Cabinet into Cassandra. 22 | Adding support for new systems is generally not very hard. 23 | (Feel free to send us a patch when you're done!) 24 | - Date algebra included. 25 | - Lots of unit tests of the most basic stuff. 26 | 27 | It wouldn't be fair not to mention some limitations with the current design: 28 | 29 | - Its focus is on batch processing so 30 | it's probably less useful for near real-time pipelines or continuously running processes. 31 | - The assumption is that each task is a sizable chunk of work. 32 | While you can probably schedule a few thousand jobs, 33 | it's not meant to scale beyond tens of thousands. 34 | - Luigi does not support distribution of execution. 35 | When you have workers running thousands of jobs daily, this starts to matter, 36 | because the worker nodes get overloaded. 37 | There are some ways to mitigate this (trigger from many nodes, use resources), 38 | but none of them are ideal. 39 | - Luigi does not come with built-in triggering, and you still need to rely on something like 40 | crontab to trigger workflows periodically. 41 | 42 | Also, it should be mentioned that Luigi is named after the world's second most famous plumber. 43 | -------------------------------------------------------------------------------- /luigi/task_history.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | """ 18 | Abstract class for task history. 19 | Currently the only subclass is :py:class:`~luigi.db_task_history.DbTaskHistory`. 20 | """ 21 | 22 | import abc 23 | import logging 24 | 25 | from luigi import six 26 | 27 | logger = logging.getLogger('luigi-interface') 28 | 29 | 30 | class StoredTask(object): 31 | """ 32 | Interface for methods on TaskHistory 33 | """ 34 | 35 | # TODO : do we need this task as distinct from luigi.scheduler.Task? 36 | # this only records host and record_id in addition to task parameters. 37 | 38 | def __init__(self, task, status, host=None): 39 | self._task = task 40 | self.status = status 41 | self.record_id = None 42 | self.host = host 43 | 44 | @property 45 | def task_family(self): 46 | return self._task.family 47 | 48 | @property 49 | def parameters(self): 50 | return self._task.params 51 | 52 | 53 | @six.add_metaclass(abc.ABCMeta) 54 | class TaskHistory(object): 55 | """ 56 | Abstract Base Class for updating the run history of a task 57 | """ 58 | 59 | @abc.abstractmethod 60 | def task_scheduled(self, task): 61 | pass 62 | 63 | @abc.abstractmethod 64 | def task_finished(self, task, successful): 65 | pass 66 | 67 | @abc.abstractmethod 68 | def task_started(self, task, worker_host): 69 | pass 70 | 71 | # TODO(erikbern): should web method (find_latest_runs etc) be abstract? 72 | 73 | 74 | class NopHistory(TaskHistory): 75 | 76 | def task_scheduled(self, task): 77 | pass 78 | 79 | def task_finished(self, task, successful): 80 | pass 81 | 82 | def task_started(self, task, worker_host): 83 | pass 84 | -------------------------------------------------------------------------------- /luigi/contrib/hdfs/clients.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | """ 19 | The implementations of the hdfs clients. The hadoop cli client and the 20 | snakebite client. 21 | """ 22 | 23 | 24 | from luigi.contrib.hdfs import config as hdfs_config 25 | from luigi.contrib.hdfs import snakebite_client as hdfs_snakebite_client 26 | from luigi.contrib.hdfs import webhdfs_client as hdfs_webhdfs_client 27 | from luigi.contrib.hdfs import hadoopcli_clients as hdfs_hadoopcli_clients 28 | import luigi.contrib.target 29 | import logging 30 | 31 | logger = logging.getLogger('luigi-interface') 32 | 33 | 34 | def get_autoconfig_client(): 35 | """ 36 | Creates the client as specified in the `luigi.cfg` configuration. 37 | """ 38 | configured_client = hdfs_config.get_configured_hdfs_client() 39 | if configured_client == "webhdfs": 40 | return hdfs_webhdfs_client.WebHdfsClient() 41 | if configured_client == "snakebite": 42 | return hdfs_snakebite_client.SnakebiteHdfsClient() 43 | if configured_client == "snakebite_with_hadoopcli_fallback": 44 | return luigi.contrib.target.CascadingClient([hdfs_snakebite_client.SnakebiteHdfsClient(), 45 | hdfs_hadoopcli_clients.create_hadoopcli_client()]) 46 | if configured_client == "hadoopcli": 47 | return hdfs_hadoopcli_clients.create_hadoopcli_client() 48 | raise Exception("Unknown hdfs client " + configured_client) 49 | 50 | 51 | def _with_ac(method_name): 52 | def result(*args, **kwargs): 53 | return getattr(get_autoconfig_client(), method_name)(*args, **kwargs) 54 | return result 55 | 56 | exists = _with_ac('exists') 57 | rename = _with_ac('rename') 58 | remove = _with_ac('remove') 59 | mkdir = _with_ac('mkdir') 60 | listdir = _with_ac('listdir') 61 | -------------------------------------------------------------------------------- /test/contrib/redis_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | # pylint: disable=F0401 19 | from time import sleep 20 | from helpers import unittest 21 | 22 | try: 23 | import redis 24 | except ImportError: 25 | raise unittest.SkipTest('Unable to load redis module') 26 | 27 | from luigi.contrib.redis_store import RedisTarget 28 | 29 | HOST = 'localhost' 30 | PORT = 6379 31 | DB = 15 32 | PASSWORD = None 33 | SOCKET_TIMEOUT = None 34 | MARKER_PREFIX = 'luigi_test' 35 | EXPIRE = 5 36 | 37 | 38 | class RedisTargetTest(unittest.TestCase): 39 | 40 | """ Test touch, exists and target expiration""" 41 | 42 | def test_touch_and_exists(self): 43 | target = RedisTarget(HOST, PORT, DB, 'update_id', PASSWORD) 44 | target.marker_prefix = MARKER_PREFIX 45 | flush() 46 | self.assertFalse(target.exists(), 47 | 'Target should not exist before touching it') 48 | target.touch() 49 | self.assertTrue(target.exists(), 50 | 'Target should exist after touching it') 51 | flush() 52 | 53 | def test_expiration(self): 54 | target = RedisTarget( 55 | HOST, PORT, DB, 'update_id', PASSWORD, None, EXPIRE) 56 | target.marker_prefix = MARKER_PREFIX 57 | flush() 58 | target.touch() 59 | self.assertTrue(target.exists(), 60 | 'Target should exist after touching it and before expiring') 61 | sleep(EXPIRE) 62 | self.assertFalse(target.exists(), 63 | 'Target should not exist after expiring') 64 | flush() 65 | 66 | 67 | def flush(): 68 | """ Flush test DB""" 69 | redis_client = redis.StrictRedis( 70 | host=HOST, port=PORT, db=DB, socket_timeout=SOCKET_TIMEOUT) 71 | redis_client.flushdb() 72 | -------------------------------------------------------------------------------- /test/import_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | import os 19 | 20 | from helpers import unittest 21 | 22 | 23 | class ImportTest(unittest.TestCase): 24 | 25 | def import_test(self): 26 | """Test that all module can be imported 27 | """ 28 | 29 | luigidir = os.path.join( 30 | os.path.dirname(os.path.abspath(__file__)), 31 | '..' 32 | ) 33 | 34 | packagedir = os.path.join(luigidir, 'luigi') 35 | 36 | for root, subdirs, files in os.walk(packagedir): 37 | package = os.path.relpath(root, luigidir).replace('/', '.') 38 | 39 | if '__init__.py' in files: 40 | __import__(package) 41 | 42 | for f in files: 43 | if f.endswith('.py') and not f.startswith('_'): 44 | __import__(package + '.' + f[:-3]) 45 | 46 | def import_luigi_test(self): 47 | """ 48 | Test that the top luigi package can be imported and contains the usual suspects. 49 | """ 50 | import luigi 51 | 52 | # These should exist (if not, this will cause AttributeErrors) 53 | expected = [ 54 | luigi.Event, 55 | luigi.Config, 56 | luigi.Task, luigi.ExternalTask, luigi.WrapperTask, 57 | luigi.Target, luigi.LocalTarget, luigi.File, 58 | luigi.namespace, 59 | luigi.RemoteScheduler, 60 | luigi.RPCError, 61 | luigi.run, luigi.build, 62 | luigi.Parameter, 63 | luigi.DateHourParameter, luigi.DateMinuteParameter, luigi.DateParameter, 64 | luigi.MonthParameter, luigi.YearParameter, 65 | luigi.DateIntervalParameter, luigi.TimeDeltaParameter, 66 | luigi.IntParameter, luigi.FloatParameter, 67 | luigi.BooleanParameter, luigi.BoolParameter, 68 | ] 69 | self.assertGreater(len(expected), 0) 70 | -------------------------------------------------------------------------------- /test/fib_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | from helpers import unittest 19 | 20 | import luigi 21 | import luigi.interface 22 | from luigi.mock import MockTarget 23 | 24 | # Calculates Fibonacci numbers :) 25 | 26 | 27 | class Fib(luigi.Task): 28 | n = luigi.IntParameter(default=100) 29 | 30 | def requires(self): 31 | if self.n >= 2: 32 | return [Fib(self.n - 1), Fib(self.n - 2)] 33 | else: 34 | return [] 35 | 36 | def output(self): 37 | return MockTarget('/tmp/fib_%d' % self.n) 38 | 39 | def run(self): 40 | if self.n == 0: 41 | s = 0 42 | elif self.n == 1: 43 | s = 1 44 | else: 45 | s = 0 46 | for input in self.input(): 47 | for line in input.open('r'): 48 | s += int(line.strip()) 49 | 50 | f = self.output().open('w') 51 | f.write('%d\n' % s) 52 | f.close() 53 | 54 | 55 | class FibTestBase(unittest.TestCase): 56 | 57 | def setUp(self): 58 | MockTarget.fs.clear() 59 | 60 | 61 | class FibTest(FibTestBase): 62 | 63 | def test_invoke(self): 64 | luigi.build([Fib(100)], local_scheduler=True) 65 | self.assertEqual(MockTarget.fs.get_data('/tmp/fib_10'), b'55\n') 66 | self.assertEqual(MockTarget.fs.get_data('/tmp/fib_100'), b'354224848179261915075\n') 67 | 68 | def test_cmdline(self): 69 | luigi.run(['--local-scheduler', '--no-lock', 'Fib', '--n', '100']) 70 | 71 | self.assertEqual(MockTarget.fs.get_data('/tmp/fib_10'), b'55\n') 72 | self.assertEqual(MockTarget.fs.get_data('/tmp/fib_100'), b'354224848179261915075\n') 73 | 74 | def test_build_internal(self): 75 | luigi.build([Fib(100)], local_scheduler=True) 76 | 77 | self.assertEqual(MockTarget.fs.get_data('/tmp/fib_10'), b'55\n') 78 | self.assertEqual(MockTarget.fs.get_data('/tmp/fib_100'), b'354224848179261915075\n') 79 | 80 | if __name__ == '__main__': 81 | luigi.run() 82 | -------------------------------------------------------------------------------- /luigi/static/visualiser/js/test/graph_test.js: -------------------------------------------------------------------------------- 1 | module("graph.js"); 2 | 3 | test("nodeFromTask", function() { 4 | var task = { 5 | deps: ["B","C"], 6 | taskId: "A", 7 | status: "DONE" 8 | }; 9 | var expected = { 10 | taskId: "A", 11 | status: "DONE", 12 | trackingUrl: "#A", 13 | deps: ["B","C"], 14 | depth: -1 15 | }; 16 | deepEqual(Graph.testableMethods.nodeFromTask(task), expected); 17 | }); 18 | 19 | test("uniqueIndexByProperty", function() { 20 | var input = [ 21 | {a:"x", b:100}, 22 | {a:"y", b:101}, 23 | {a:"z", b:102} 24 | ]; 25 | var expected = { 26 | "x": 0, 27 | "y": 1, 28 | "z": 2 29 | }; 30 | deepEqual(Graph.testableMethods.uniqueIndexByProperty(input, "a"), expected); 31 | }); 32 | 33 | test("createDependencyEdges", function() { 34 | var A = {taskId: "A", deps: ["B","C"]}; 35 | var B = {taskId: "B", deps: ["D"]}; 36 | var C = {taskId: "C", deps: []}; 37 | var D = {taskId: "D", deps: []}; 38 | var nodes = [A,B,C,D]; 39 | var nodeIndex = {"A":0, "B":1, "C":2, "D":3}; 40 | var edges = Graph.testableMethods.createDependencyEdges(nodes, nodeIndex); 41 | var expected = [ 42 | {source: A, target: B}, 43 | {source: A, target: C}, 44 | {source: B, target: D} 45 | ]; 46 | deepEqual(edges, expected); 47 | }); 48 | 49 | test("computeDepth", function() { 50 | var A = {taskId: "A", deps: ["B","C"], depth:-1}; 51 | var B = {taskId: "B", deps: ["D"], depth:-1}; 52 | var C = {taskId: "C", deps: [], depth:-1}; 53 | var D = {taskId: "D", deps: [], depth:-1}; 54 | var E = {taskId: "C", deps: [], depth:-1}; 55 | var nodes = [A,B,C,D,E]; 56 | var nodeIndex = {"A":0, "B":1, "C":2, "D":3}; 57 | Graph.testableMethods.computeDepth(nodes, nodeIndex); 58 | equal(A.depth, 0); 59 | equal(B.depth, 1); 60 | equal(C.depth, 1); 61 | equal(D.depth, 2); 62 | equal(E.depth, -1); 63 | }); 64 | 65 | test("createGraph", function() { 66 | var tasks = [ 67 | {taskId: "A", deps: ["B","C"], status: "PENDING"}, 68 | {taskId: "B", deps: ["D"], status: "RUNNING"}, 69 | {taskId: "C", deps: [], status: "DONE"}, 70 | {taskId: "D", deps: [], status: "DONE"}, 71 | {taskId: "E", deps: [], status: "DONE"} 72 | ]; 73 | var graph = Graph.testableMethods.createGraph(tasks); 74 | equal(graph.nodes.length, 4); 75 | equal(graph.links.length, 3); 76 | $.each(graph.nodes, function() { 77 | notEqual(this.x, 0); 78 | notEqual(this.y, 0); 79 | }); 80 | 81 | // TODO: more assertions 82 | }); 83 | -------------------------------------------------------------------------------- /test/task_bulk_complete_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2016 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | from helpers import unittest 19 | from luigi import Task 20 | from luigi import Parameter 21 | from luigi.task import MixinNaiveBulkComplete 22 | 23 | COMPLETE_TASKS = ["A", "B", "C"] 24 | 25 | 26 | class MockTask(MixinNaiveBulkComplete, Task): 27 | param_a = Parameter() 28 | param_b = Parameter(default="Not Mandatory") 29 | 30 | def complete(self): 31 | return self.param_a in COMPLETE_TASKS 32 | 33 | 34 | class MixinNaiveBulkCompleteTest(unittest.TestCase): 35 | """ 36 | Test that the MixinNaiveBulkComplete can handle 37 | input as 38 | - iterable of parameters (for single param tasks) 39 | - iterable of parameter tuples (for multi param tasks) 40 | - iterable of parameter dicts (for multi param tasks) 41 | """ 42 | def test_single_arg_list(self): 43 | single_arg_list = ["A", "B", "x"] 44 | expected_single_arg_list = set( 45 | [p for p in single_arg_list if p in COMPLETE_TASKS] 46 | ) 47 | self.assertEqual( 48 | expected_single_arg_list, 49 | set(MockTask.bulk_complete(single_arg_list)) 50 | ) 51 | 52 | def test_multiple_arg_tuple(self): 53 | multiple_arg_tuple = (("A", "1"), ("B", "2"), ("X", "3"), ("C", "2")) 54 | expected_multiple_arg_tuple = set( 55 | [p for p in multiple_arg_tuple if p[0] in COMPLETE_TASKS] 56 | ) 57 | self.assertEqual( 58 | expected_multiple_arg_tuple, 59 | set(MockTask.bulk_complete(multiple_arg_tuple)) 60 | ) 61 | 62 | def test_multiple_arg_dict(self): 63 | multiple_arg_dict = ( 64 | {"param_a": "X", "param_b": "1"}, 65 | {"param_a": "C", "param_b": "1"} 66 | ) 67 | expected_multiple_arg_dict = ( 68 | [p for p in multiple_arg_dict if p["param_a"] in COMPLETE_TASKS] 69 | ) 70 | self.assertEqual( 71 | expected_multiple_arg_dict, 72 | MockTask.bulk_complete(multiple_arg_dict) 73 | ) 74 | -------------------------------------------------------------------------------- /scripts/ci/setup_hadoop_env.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | HADOOP_DISTRO=${HADOOP_DISTRO:-"hdp"} 4 | 5 | ONLY_DOWNLOAD=${ONLY_DOWNLOAD:-false} 6 | ONLY_EXTRACT=${ONLY_EXTRACT:-false} 7 | 8 | while test $# -gt 0; do 9 | case "$1" in 10 | -h|--help) 11 | echo "Setup environment for snakebite tests" 12 | echo " " 13 | echo "options:" 14 | echo -e "\t-h, --help show brief help" 15 | echo -e "\t-o, --only-download just download hadoop tar(s)" 16 | echo -e "\t-e, --only-extract just extract hadoop tar(s)" 17 | echo -e "\t-d, --distro select distro (hdp|cdh)" 18 | exit 0 19 | ;; 20 | -o|--only-download) 21 | shift 22 | ONLY_DOWNLOAD=true 23 | ;; 24 | -e|--only-extract) 25 | shift 26 | ONLY_EXTRACT=true 27 | ;; 28 | -d|--distro) 29 | shift 30 | if test $# -gt 0; then 31 | HADOOP_DISTRO=$1 32 | else 33 | echo "No Hadoop distro specified - abort" >&2 34 | exit 1 35 | fi 36 | shift 37 | ;; 38 | *) 39 | echo "Unknown options: $1" >&2 40 | exit 1 41 | ;; 42 | esac 43 | done 44 | 45 | if $ONLY_DOWNLOAD && $ONLY_EXTRACT; then 46 | echo "Both only-download and only-extract specified - abort" >&2 47 | exit 1 48 | fi 49 | 50 | mkdir -p $HADOOP_HOME 51 | 52 | if [ $HADOOP_DISTRO = "cdh" ]; then 53 | URL="http://archive.cloudera.com/cdh5/cdh/5/hadoop-latest.tar.gz" 54 | elif [ $HADOOP_DISTRO = "hdp" ]; then 55 | # This site provides good URLs: 56 | # https://github.com/saltstack-formulas/hadoop-formula/blob/5034a2204da691eceb9c2d8cd8260f11d5cc06f3/hadoop/settings.sls 57 | URL="http://public-repo-1.hortonworks.com/HDP/centos6/2.x/updates/2.2.6.0/tars/hadoop-2.6.0.2.2.6.0-2800.tar.gz" 58 | else 59 | echo "No/bad HADOOP_DISTRO='${HADOOP_DISTRO}' specified" >&2 60 | exit 1 61 | fi 62 | 63 | if ! $ONLY_EXTRACT && [ ! -e ${HADOOP_HOME}/hadoop.tar.gz ] ; then 64 | echo "Downloading Hadoop from $URL to ${HADOOP_HOME}/hadoop.tar.gz" 65 | curl -z ${HADOOP_HOME}/hadoop.tar.gz -o ${HADOOP_HOME}/hadoop.tar.gz -L $URL 66 | 67 | if [ $? != 0 ]; then 68 | echo "Failed to download Hadoop from $URL - abort" >&2 69 | exit 1 70 | fi 71 | fi 72 | 73 | if $ONLY_DOWNLOAD; then 74 | exit 0 75 | fi 76 | 77 | echo "Extracting ${HADOOP_HOME}/hadoop.tar.gz into $HADOOP_HOME" 78 | tar zxf ${HADOOP_HOME}/hadoop.tar.gz --strip-components 1 -C $HADOOP_HOME 79 | 80 | if [ $? != 0 ]; then 81 | echo "Failed to extract Hadoop from ${HADOOP_HOME}/hadoop.tar.gz to ${HADOOP_HOME} - abort" >&2 82 | exit 1 83 | fi 84 | -------------------------------------------------------------------------------- /test/mock_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | from __future__ import print_function 18 | 19 | from helpers import unittest 20 | 21 | from luigi.mock import MockTarget, MockFileSystem 22 | 23 | 24 | class MockFileTest(unittest.TestCase): 25 | 26 | def test_1(self): 27 | t = MockTarget('test') 28 | p = t.open('w') 29 | print('test', file=p) 30 | p.close() 31 | 32 | q = t.open('r') 33 | self.assertEqual(list(q), ['test\n']) 34 | q.close() 35 | 36 | def test_with(self): 37 | t = MockTarget("foo") 38 | with t.open('w') as b: 39 | b.write("bar") 40 | 41 | with t.open('r') as b: 42 | self.assertEqual(list(b), ['bar']) 43 | 44 | # That should work in python2 because of the autocast 45 | # That should work in python3 because the default format is Text 46 | def test_unicode(self): 47 | t = MockTarget("foo") 48 | with t.open('w') as b: 49 | b.write(u"bar") 50 | 51 | with t.open('r') as b: 52 | self.assertEqual(b.read(), u'bar') 53 | 54 | 55 | class MockFileSystemTest(unittest.TestCase): 56 | fs = MockFileSystem() 57 | 58 | def _touch(self, path): 59 | t = MockTarget(path) 60 | with t.open('w'): 61 | pass 62 | 63 | def setUp(self): 64 | self.fs.clear() 65 | self.path = "/tmp/foo" 66 | self.path2 = "/tmp/bar" 67 | self._touch(self.path) 68 | self._touch(self.path2) 69 | 70 | def test_exists(self): 71 | self.assertTrue(self.fs.exists(self.path)) 72 | 73 | def test_remove(self): 74 | self.fs.remove(self.path) 75 | self.assertFalse(self.fs.exists(self.path)) 76 | 77 | def test_remove_recursive(self): 78 | self.fs.remove("/tmp", recursive=True) 79 | self.assertFalse(self.fs.exists(self.path)) 80 | self.assertFalse(self.fs.exists(self.path2)) 81 | 82 | def test_listdir(self): 83 | self.assertEqual(sorted([self.path, self.path2]), sorted(self.fs.listdir("/tmp"))) 84 | 85 | 86 | class TestImportMockFile(unittest.TestCase): 87 | 88 | def test_mockfile(self): 89 | from luigi.mock import MockFile 90 | self.assertTrue(isinstance(MockFile('foo'), MockTarget)) 91 | -------------------------------------------------------------------------------- /test/instance_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | from helpers import unittest 19 | 20 | import luigi 21 | import luigi.worker 22 | import luigi.date_interval 23 | import luigi.notifications 24 | 25 | luigi.notifications.DEBUG = True 26 | 27 | 28 | class InstanceTest(unittest.TestCase): 29 | 30 | def test_simple(self): 31 | class DummyTask(luigi.Task): 32 | x = luigi.Parameter() 33 | 34 | dummy_1 = DummyTask(1) 35 | dummy_2 = DummyTask(2) 36 | dummy_1b = DummyTask(1) 37 | 38 | self.assertNotEqual(dummy_1, dummy_2) 39 | self.assertEqual(dummy_1, dummy_1b) 40 | 41 | def test_dep(self): 42 | test = self 43 | 44 | class A(luigi.Task): 45 | 46 | def __init__(self): 47 | self.has_run = False 48 | super(A, self).__init__() 49 | 50 | def run(self): 51 | self.has_run = True 52 | 53 | class B(luigi.Task): 54 | x = luigi.Parameter() 55 | 56 | def requires(self): 57 | return A() # This will end up referring to the same object 58 | 59 | def run(self): 60 | test.assertTrue(self.requires().has_run) 61 | 62 | luigi.build([B(1), B(2)], local_scheduler=True) 63 | 64 | def test_external_instance_cache(self): 65 | class A(luigi.Task): 66 | pass 67 | 68 | class OtherA(luigi.ExternalTask): 69 | task_family = "A" 70 | 71 | oa = OtherA() 72 | a = A() 73 | self.assertNotEqual(oa, a) 74 | 75 | def test_date(self): 76 | ''' Adding unit test because we had a problem with this ''' 77 | class DummyTask(luigi.Task): 78 | x = luigi.DateIntervalParameter() 79 | 80 | dummy_1 = DummyTask(luigi.date_interval.Year(2012)) 81 | dummy_2 = DummyTask(luigi.date_interval.Year(2013)) 82 | dummy_1b = DummyTask(luigi.date_interval.Year(2012)) 83 | 84 | self.assertNotEqual(dummy_1, dummy_2) 85 | self.assertEqual(dummy_1, dummy_1b) 86 | 87 | def test_unhashable_type(self): 88 | # See #857 89 | class DummyTask(luigi.Task): 90 | x = luigi.Parameter() 91 | 92 | dummy = DummyTask(x={}) # NOQA 93 | 94 | if __name__ == '__main__': 95 | unittest.main() 96 | -------------------------------------------------------------------------------- /luigi/contrib/sge_runner.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | """ 19 | The SunGrid Engine runner 20 | 21 | The main() function of this module will be executed on the 22 | compute node by the submitted job. It accepts as a single 23 | argument the shared temp folder containing the package archive 24 | and pickled task to run, and carries out these steps: 25 | 26 | - extract tarfile of package dependencies and place on the path 27 | - unpickle SGETask instance created on the master node 28 | - run SGETask.work() 29 | 30 | On completion, SGETask on the master node will detect that 31 | the job has left the queue, delete the temporary folder, and 32 | return from SGETask.run() 33 | """ 34 | 35 | import os 36 | import sys 37 | try: 38 | import cPickle as pickle 39 | except ImportError: 40 | import pickle 41 | import logging 42 | import tarfile 43 | 44 | 45 | def _do_work_on_compute_node(work_dir): 46 | 47 | # Extract the necessary dependencies 48 | _extract_packages_archive(work_dir) 49 | 50 | # Open up the pickle file with the work to be done 51 | os.chdir(work_dir) 52 | with open("job-instance.pickle", "r") as f: 53 | job = pickle.load(f) 54 | 55 | # Do the work contained 56 | job.work() 57 | 58 | 59 | def _extract_packages_archive(work_dir): 60 | package_file = os.path.join(work_dir, "packages.tar") 61 | if not os.path.exists(package_file): 62 | return 63 | 64 | curdir = os.path.abspath(os.curdir) 65 | 66 | os.chdir(work_dir) 67 | tar = tarfile.open(package_file) 68 | for tarinfo in tar: 69 | tar.extract(tarinfo) 70 | tar.close() 71 | if '' not in sys.path: 72 | sys.path.insert(0, '') 73 | 74 | os.chdir(curdir) 75 | 76 | 77 | def main(args=sys.argv): 78 | """Run the work() method from the class instance in the file "job-instance.pickle". 79 | """ 80 | try: 81 | # Set up logging. 82 | logging.basicConfig(level=logging.WARN) 83 | work_dir = args[1] 84 | assert os.path.exists(work_dir), "First argument to sge_runner.py must be a directory that exists" 85 | _do_work_on_compute_node(work_dir) 86 | except Exception as e: 87 | # Dump encoded data that we will try to fetch using mechanize 88 | print(e) 89 | raise 90 | 91 | if __name__ == '__main__': 92 | main() 93 | -------------------------------------------------------------------------------- /luigi/static/visualiser/lib/AdminLTE/css/skin-green.min.css: -------------------------------------------------------------------------------- 1 | .skin-green .main-header .navbar{background-color:#00a65a}.skin-green .main-header .navbar .nav>li>a{color:#fff}.skin-green .main-header .navbar .nav>li>a:hover,.skin-green .main-header .navbar .nav>li>a:active,.skin-green .main-header .navbar .nav>li>a:focus,.skin-green .main-header .navbar .nav .open>a,.skin-green .main-header .navbar .nav .open>a:hover,.skin-green .main-header .navbar .nav .open>a:focus{background:rgba(0,0,0,0.1);color:#f6f6f6}.skin-green .main-header .navbar .sidebar-toggle{color:#fff}.skin-green .main-header .navbar .sidebar-toggle:hover{color:#f6f6f6;background:rgba(0,0,0,0.1)}.skin-green .main-header .navbar .sidebar-toggle{color:#fff}.skin-green .main-header .navbar .sidebar-toggle:hover{background-color:#008d4c}@media (max-width:767px){.skin-green .main-header .navbar .dropdown-menu li.divider{background-color:rgba(255,255,255,0.1)}.skin-green .main-header .navbar .dropdown-menu li a{color:#fff}.skin-green .main-header .navbar .dropdown-menu li a:hover{background:#008d4c}}.skin-green .main-header .logo{background-color:#008d4c;color:#fff;border-bottom:0 solid transparent}.skin-green .main-header .logo:hover{background-color:#008749}.skin-green .main-header li.user-header{background-color:#00a65a}.skin-green .content-header{background:transparent}.skin-green .wrapper,.skin-green .main-sidebar,.skin-green .left-side{background-color:#222d32}.skin-green .user-panel>.info,.skin-green .user-panel>.info>a{color:#fff}.skin-green .sidebar-menu>li.header{color:#4b646f;background:#1a2226}.skin-green .sidebar-menu>li>a{border-left:3px solid transparent}.skin-green .sidebar-menu>li:hover>a,.skin-green .sidebar-menu>li.active>a{color:#fff;background:#1e282c;border-left-color:#00a65a}.skin-green .sidebar-menu>li>.treeview-menu{margin:0 1px;background:#2c3b41}.skin-green .sidebar a{color:#b8c7ce}.skin-green .sidebar a:hover{text-decoration:none}.skin-green .treeview-menu>li>a{color:#8aa4af}.skin-green .treeview-menu>li.active>a,.skin-green .treeview-menu>li>a:hover{color:#fff}.skin-green .sidebar-form{border-radius:3px;border:1px solid #374850;margin:10px 10px}.skin-green .sidebar-form input[type="text"],.skin-green .sidebar-form .btn{box-shadow:none;background-color:#374850;border:1px solid transparent;height:35px;-webkit-transition:all .3s ease-in-out;-o-transition:all .3s ease-in-out;transition:all .3s ease-in-out}.skin-green .sidebar-form input[type="text"]{color:#666;border-top-left-radius:2px !important;border-top-right-radius:0 !important;border-bottom-right-radius:0 !important;border-bottom-left-radius:2px !important}.skin-green .sidebar-form input[type="text"]:focus,.skin-green .sidebar-form input[type="text"]:focus+.input-group-btn .btn{background-color:#fff;color:#666}.skin-green .sidebar-form input[type="text"]:focus+.input-group-btn .btn{border-left-color:#fff}.skin-green .sidebar-form .btn{color:#999;border-top-left-radius:0 !important;border-top-right-radius:2px !important;border-bottom-right-radius:2px !important;border-bottom-left-radius:0 !important} -------------------------------------------------------------------------------- /test/webhdfs_minicluster.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2015 VNG Corporation 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | from minicluster import MiniClusterTestCase 19 | import unittest 20 | import subprocess 21 | import select 22 | import re 23 | 24 | try: 25 | from snakebite.minicluster import MiniCluster 26 | except ImportError: 27 | raise unittest.SkipTest('To use minicluster, snakebite must be installed.') 28 | 29 | 30 | class WebHdfsMiniCluster(MiniCluster): 31 | ''' 32 | This is a unclean class overriding of the snakebite minicluster. 33 | 34 | But since it seemed pretty inflexible I had to override private methods 35 | here. 36 | ''' 37 | @property 38 | def webhdfs_port(self): 39 | return self.port 40 | 41 | def _start_mini_cluster(self, nnport=None): 42 | """ 43 | Copied in an ugly manner from snakebite source code. 44 | """ 45 | if self._jobclient_jar: 46 | hadoop_jar = self._jobclient_jar 47 | else: 48 | hadoop_jar = self._find_mini_cluster_jar(self._hadoop_home) 49 | if not hadoop_jar: 50 | raise Exception("No hadoop jobclient test jar found") 51 | cmd = [self._hadoop_cmd, 'jar', hadoop_jar, 52 | 'minicluster', '-nomr', '-format'] 53 | if nnport: 54 | cmd.extend(['-nnport', "%s" % nnport]) 55 | if True: 56 | # luigi webhdfs version 57 | cmd.extend(['-Ddfs.webhdfs.enabled=true']) 58 | self.hdfs = subprocess.Popen(cmd, bufsize=0, stdout=subprocess.PIPE, 59 | stderr=subprocess.PIPE, universal_newlines=True) 60 | 61 | def _get_namenode_port(self): 62 | just_seen_webhdfs = False 63 | while self.hdfs.poll() is None: 64 | rlist, wlist, xlist = select.select([self.hdfs.stderr, self.hdfs.stdout], [], []) 65 | for f in rlist: 66 | line = f.readline() 67 | print(line.rstrip()) 68 | 69 | m = re.match(".*Jetty bound to port (\d+).*", line) 70 | if just_seen_webhdfs and m: 71 | return int(m.group(1)) 72 | just_seen_webhdfs = re.match(".*namenode.*webhdfs.*", line) 73 | 74 | 75 | class WebHdfsMiniClusterTestCase(MiniClusterTestCase): 76 | 77 | @classmethod 78 | def instantiate_cluster(cls): 79 | return WebHdfsMiniCluster(None, nnport=50030) 80 | -------------------------------------------------------------------------------- /luigi/mrrunner.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright 2012-2015 Spotify AB 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | """ 20 | The hadoop runner. 21 | 22 | This module contains the main() method which will be used to run the 23 | mapper and reducer on the Hadoop nodes. 24 | """ 25 | 26 | from __future__ import print_function 27 | 28 | try: 29 | import cPickle as pickle 30 | except ImportError: 31 | import pickle 32 | import logging 33 | import os 34 | import sys 35 | import tarfile 36 | import traceback 37 | 38 | 39 | class Runner(object): 40 | """ 41 | Run the mapper or reducer on hadoop nodes. 42 | """ 43 | 44 | def __init__(self, job=None): 45 | self.extract_packages_archive() 46 | self.job = job or pickle.load(open("job-instance.pickle", "rb")) 47 | self.job._setup_remote() 48 | 49 | def run(self, kind, stdin=sys.stdin, stdout=sys.stdout): 50 | if kind == "map": 51 | self.job.run_mapper(stdin, stdout) 52 | elif kind == "combiner": 53 | self.job.run_combiner(stdin, stdout) 54 | elif kind == "reduce": 55 | self.job.run_reducer(stdin, stdout) 56 | else: 57 | raise Exception('weird command: %s' % kind) 58 | 59 | def extract_packages_archive(self): 60 | if not os.path.exists("packages.tar"): 61 | return 62 | 63 | tar = tarfile.open("packages.tar") 64 | for tarinfo in tar: 65 | tar.extract(tarinfo) 66 | tar.close() 67 | if '' not in sys.path: 68 | sys.path.insert(0, '') 69 | 70 | 71 | def print_exception(exc): 72 | tb = traceback.format_exc() 73 | print('luigi-exc-hex=%s' % tb.encode('hex'), file=sys.stderr) 74 | 75 | 76 | def main(args=None, stdin=sys.stdin, stdout=sys.stdout, print_exception=print_exception): 77 | """ 78 | Run either the mapper or the reducer from the class instance in the file "job-instance.pickle". 79 | 80 | Arguments: 81 | 82 | kind -- is either map or reduce 83 | """ 84 | try: 85 | # Set up logging. 86 | logging.basicConfig(level=logging.WARN) 87 | 88 | kind = args is not None and args[1] or sys.argv[1] 89 | Runner().run(kind, stdin=stdin, stdout=stdout) 90 | except Exception as exc: 91 | # Dump encoded data that we will try to fetch using mechanize 92 | print_exception(exc) 93 | raise 94 | 95 | if __name__ == '__main__': 96 | main() 97 | -------------------------------------------------------------------------------- /test/contrib/ecs_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2015 Outlier Bio, LLC 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | """ 19 | Integration test for the Luigi wrapper of EC2 Container Service (ECSTask) 20 | 21 | Requires: 22 | 23 | - boto3 package 24 | - Amazon AWS credentials discoverable by boto3 (e.g., by using ``aws configure`` 25 | from awscli_) 26 | - A running ECS cluster (see `ECS Get Started`_) 27 | 28 | Written and maintained by Jake Feala (@jfeala) for Outlier Bio (@outlierbio) 29 | 30 | .. _awscli: https://aws.amazon.com/cli 31 | .. _`ECS Get Started`: http://docs.aws.amazon.com/AmazonECS/latest/developerguide/ECS_GetStarted.html 32 | """ 33 | 34 | import unittest 35 | 36 | import luigi 37 | from luigi.contrib.ecs import ECSTask, _get_task_statuses 38 | 39 | try: 40 | import boto3 41 | client = boto3.client('ecs') 42 | except ImportError: 43 | raise unittest.SkipTest('boto3 is not installed. ECSTasks require boto3') 44 | 45 | TEST_TASK_DEF = { 46 | 'family': 'hello-world', 47 | 'volumes': [], 48 | 'containerDefinitions': [ 49 | { 50 | 'memory': 1, 51 | 'essential': True, 52 | 'name': 'hello-world', 53 | 'image': 'ubuntu', 54 | 'command': ['/bin/echo', 'hello world'] 55 | } 56 | ] 57 | } 58 | 59 | 60 | class ECSTaskNoOutput(ECSTask): 61 | 62 | def complete(self): 63 | if self.ecs_task_ids: 64 | return all([status == 'STOPPED' 65 | for status in _get_task_statuses(self.ecs_task_ids)]) 66 | return False 67 | 68 | 69 | class ECSTaskOverrideCommand(ECSTaskNoOutput): 70 | 71 | @property 72 | def command(self): 73 | return [{'name': 'hello-world', 'command': ['/bin/sleep', '10']}] 74 | 75 | 76 | class TestECSTask(unittest.TestCase): 77 | 78 | def setUp(self): 79 | # Register the test task definition 80 | response = client.register_task_definition(**TEST_TASK_DEF) 81 | self.arn = response['taskDefinition']['taskDefinitionArn'] 82 | 83 | def test_unregistered_task(self): 84 | t = ECSTaskNoOutput(task_def=TEST_TASK_DEF) 85 | luigi.build([t], local_scheduler=True) 86 | 87 | def test_registered_task(self): 88 | t = ECSTaskNoOutput(task_def_arn=self.arn) 89 | luigi.build([t], local_scheduler=True) 90 | 91 | def test_override_command(self): 92 | t = ECSTaskOverrideCommand(task_def_arn=self.arn) 93 | luigi.build([t], local_scheduler=True) 94 | -------------------------------------------------------------------------------- /examples/wordcount_hadoop.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | import luigi 19 | import luigi.contrib.hadoop 20 | import luigi.contrib.hdfs 21 | 22 | 23 | # To make this run, you probably want to edit /etc/luigi/client.cfg and add something like: 24 | # 25 | # [hadoop] 26 | # jar: /usr/lib/hadoop-xyz/hadoop-streaming-xyz-123.jar 27 | 28 | 29 | class InputText(luigi.ExternalTask): 30 | """ 31 | This task is a :py:class:`luigi.task.ExternalTask` which means it doesn't generate the 32 | :py:meth:`~.InputText.output` target on its own instead relying on the execution something outside of Luigi 33 | to produce it. 34 | """ 35 | 36 | date = luigi.DateParameter() 37 | 38 | def output(self): 39 | """ 40 | Returns the target output for this task. 41 | In this case, it expects a file to be present in HDFS. 42 | 43 | :return: the target output for this task. 44 | :rtype: object (:py:class:`luigi.target.Target`) 45 | """ 46 | return luigi.contrib.hdfs.HdfsTarget(self.date.strftime('/tmp/text/%Y-%m-%d.txt')) 47 | 48 | 49 | class WordCount(luigi.contrib.hadoop.JobTask): 50 | """ 51 | This task runs a :py:class:`luigi.contrib.hadoop.JobTask` 52 | over the target data returned by :py:meth:`~/.InputText.output` and 53 | writes the result into its :py:meth:`~.WordCount.output` target. 54 | 55 | This class uses :py:meth:`luigi.contrib.hadoop.JobTask.run`. 56 | """ 57 | 58 | date_interval = luigi.DateIntervalParameter() 59 | 60 | def requires(self): 61 | """ 62 | This task's dependencies: 63 | 64 | * :py:class:`~.InputText` 65 | 66 | :return: list of object (:py:class:`luigi.task.Task`) 67 | """ 68 | return [InputText(date) for date in self.date_interval.dates()] 69 | 70 | def output(self): 71 | """ 72 | Returns the target output for this task. 73 | In this case, a successful execution of this task will create a file in HDFS. 74 | 75 | :return: the target output for this task. 76 | :rtype: object (:py:class:`luigi.target.Target`) 77 | """ 78 | return luigi.contrib.hdfs.HdfsTarget('/tmp/text-count/%s' % self.date_interval) 79 | 80 | def mapper(self, line): 81 | for word in line.strip().split(): 82 | yield word, 1 83 | 84 | def reducer(self, key, values): 85 | yield key, sum(values) 86 | 87 | if __name__ == '__main__': 88 | luigi.run() 89 | -------------------------------------------------------------------------------- /luigi/contrib/target.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | import logging 19 | 20 | from luigi import six 21 | 22 | import luigi.target 23 | 24 | logger = logging.getLogger('luigi-interface') 25 | 26 | 27 | class CascadingClient(object): 28 | """ 29 | A FilesystemClient that will cascade failing function calls through a list of clients. 30 | 31 | Which clients are used are specified at time of construction. 32 | """ 33 | 34 | # This constant member is supposed to include all methods, feel free to add 35 | # methods here. If you want full control of which methods that should be 36 | # created, pass the kwarg to the constructor. 37 | ALL_METHOD_NAMES = ['exists', 'rename', 'remove', 'chmod', 'chown', 38 | 'count', 'copy', 'get', 'put', 'mkdir', 'list', 'listdir', 39 | 'getmerge', 40 | 'isdir', 41 | 'rename_dont_move', 42 | 'touchz', 43 | ] 44 | 45 | def __init__(self, clients, method_names=None): 46 | self.clients = clients 47 | if method_names is None: 48 | method_names = self.ALL_METHOD_NAMES 49 | 50 | for method_name in method_names: 51 | new_method = self._make_method(method_name) 52 | real_method = six.create_bound_method(new_method, self) 53 | setattr(self, method_name, real_method) 54 | 55 | @classmethod 56 | def _make_method(cls, method_name): 57 | def new_method(self, *args, **kwargs): 58 | return self._chained_call(method_name, *args, **kwargs) 59 | return new_method 60 | 61 | def _chained_call(self, method_name, *args, **kwargs): 62 | for i in range(len(self.clients)): 63 | client = self.clients[i] 64 | try: 65 | result = getattr(client, method_name)(*args, **kwargs) 66 | return result 67 | except luigi.target.FileSystemException: 68 | # For exceptions that are semantical, we must throw along 69 | raise 70 | except BaseException: 71 | is_last_iteration = (i + 1) >= len(self.clients) 72 | if is_last_iteration: 73 | raise 74 | else: 75 | logger.warning('The %s failed to %s, using fallback class %s', 76 | client.__class__.__name__, method_name, self.clients[i + 1].__class__.__name__) 77 | -------------------------------------------------------------------------------- /luigi/tools/luigi_grep.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import argparse 4 | import json 5 | from collections import defaultdict 6 | 7 | from luigi import six 8 | from luigi.six.moves.urllib.request import urlopen 9 | 10 | 11 | class LuigiGrep(object): 12 | 13 | def __init__(self, host, port): 14 | self._host = host 15 | self._port = port 16 | 17 | @property 18 | def graph_url(self): 19 | return "http://{0}:{1}/api/graph".format(self._host, self._port) 20 | 21 | def _fetch_json(self): 22 | """Returns the json representation of the dep graph""" 23 | print("Fetching from url: " + self.graph_url) 24 | resp = urlopen(self.graph_url).read() 25 | return json.loads(resp.decode('utf-8')) 26 | 27 | def _build_results(self, jobs, job): 28 | job_info = jobs[job] 29 | deps = job_info['deps'] 30 | deps_status = defaultdict(list) 31 | for j in deps: 32 | if j in jobs: 33 | deps_status[jobs[j]['status']].append(j) 34 | else: 35 | deps_status['UNKNOWN'].append(j) 36 | return {"name": job, "status": job_info['status'], "deps_by_status": deps_status} 37 | 38 | def prefix_search(self, job_name_prefix): 39 | """searches for jobs matching the given job_name_prefix.""" 40 | json = self._fetch_json() 41 | jobs = json['response'] 42 | for job in jobs: 43 | if job.startswith(job_name_prefix): 44 | yield self._build_results(jobs, job) 45 | 46 | def status_search(self, status): 47 | """searches for jobs matching the given status""" 48 | json = self._fetch_json() 49 | jobs = json['response'] 50 | for job in jobs: 51 | job_info = jobs[job] 52 | if job_info['status'].lower() == status.lower(): 53 | yield self._build_results(jobs, job) 54 | 55 | 56 | def main(): 57 | parser = argparse.ArgumentParser( 58 | "luigi-grep is used to search for workflows using the luigi scheduler's json api") 59 | parser.add_argument( 60 | "--scheduler-host", default="localhost", help="hostname of the luigi scheduler") 61 | parser.add_argument( 62 | "--scheduler-port", default="8082", help="port of the luigi scheduler") 63 | parser.add_argument("--prefix", help="prefix of a task query to search for", default=None) 64 | parser.add_argument("--status", help="search for jobs with the given status", default=None) 65 | 66 | args = parser.parse_args() 67 | grep = LuigiGrep(args.scheduler_host, args.scheduler_port) 68 | 69 | results = [] 70 | if args.prefix: 71 | results = grep.prefix_search(args.prefix) 72 | elif args.status: 73 | results = grep.status_search(args.status) 74 | 75 | for job in results: 76 | print("{name}: {status}, Dependencies:".format(name=job['name'], status=job['status'])) 77 | for (status, jobs) in six.iteritems(job['deps_by_status']): 78 | print(" status={status}".format(status=status)) 79 | for job in jobs: 80 | print(" {job}".format(job=job)) 81 | 82 | if __name__ == '__main__': 83 | main() 84 | -------------------------------------------------------------------------------- /test/contrib/cascading_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | from helpers import unittest 19 | 20 | import luigi.target 21 | from luigi.contrib.target import CascadingClient 22 | 23 | 24 | class CascadingClientTest(unittest.TestCase): 25 | 26 | def setUp(self): 27 | class FirstClient: 28 | 29 | def exists(self, pos_arg, kw_arg='first'): 30 | if pos_arg < 10: 31 | return pos_arg 32 | elif pos_arg < 20: 33 | return kw_arg 34 | elif kw_arg == 'raise_fae': 35 | raise luigi.target.FileAlreadyExists('oh noes!') 36 | else: 37 | raise Exception() 38 | 39 | class SecondClient: 40 | 41 | def exists(self, pos_arg, other_kw_arg='second', 42 | kw_arg='for-backwards-compatibility'): 43 | if pos_arg < 30: 44 | return -pos_arg 45 | elif pos_arg < 40: 46 | return other_kw_arg 47 | else: 48 | raise Exception() 49 | 50 | self.clients = [FirstClient(), SecondClient()] 51 | self.client = CascadingClient(self.clients) 52 | 53 | def test_successes(self): 54 | self.assertEqual(5, self.client.exists(5)) 55 | self.assertEqual('yay', self.client.exists(15, kw_arg='yay')) 56 | 57 | def test_fallbacking(self): 58 | self.assertEqual(-25, self.client.exists(25)) 59 | self.assertEqual('lol', self.client.exists(35, kw_arg='yay', 60 | other_kw_arg='lol')) 61 | # Note: the first method don't accept the other keyword argument 62 | self.assertEqual(-15, self.client.exists(15, kw_arg='yay', 63 | other_kw_arg='lol')) 64 | 65 | def test_failings(self): 66 | self.assertRaises(Exception, lambda: self.client.exists(45)) 67 | self.assertRaises(AttributeError, lambda: self.client.mkdir()) 68 | 69 | def test_FileAlreadyExists_propagation(self): 70 | self.assertRaises(luigi.target.FileAlreadyExists, 71 | lambda: self.client.exists(25, kw_arg='raise_fae')) 72 | 73 | def test_method_names_kwarg(self): 74 | self.client = CascadingClient(self.clients, method_names=[]) 75 | self.assertRaises(AttributeError, lambda: self.client.exists()) 76 | self.client = CascadingClient(self.clients, method_names=['exists']) 77 | self.assertEqual(5, self.client.exists(5)) 78 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py{27,33,34,35}-{cdh,hdp,nonhdfs,gcloud,postgres,unixsocket}, visualiser, pypy-scheduler, docs, flake8 3 | skipsdist = True 4 | 5 | [testenv] 6 | usedevelop = True 7 | install_command = pip install {opts} {packages} 8 | deps= 9 | mock<2.0 10 | moto<1.0 11 | HTTPretty==0.8.10 12 | nose<2.0 13 | unittest2<2.0 14 | boto<3.0 15 | sqlalchemy<2.0 16 | elasticsearch<2.0.0 17 | psutil<4.0 18 | enum34>1.1.0 19 | cdh,hdp: snakebite>=2.5.2,<2.6.0 20 | cdh,hdp: hdfs>=2.0.4,<3.0.0 # The webhdfs library 21 | postgres: psycopg2<3.0 22 | gcloud: google-api-python-client>=1.4.0,<2.0 23 | coverage>=3.6,<3.999 24 | codecov>=1.4.0 25 | requests<3.0 26 | unixsocket: requests-unixsocket<1.0 27 | pygments 28 | hypothesis[datetime] 29 | passenv = 30 | USER JAVA_HOME POSTGRES_USER GCS_TEST_PROJECT_ID GCS_TEST_BUCKET GOOGLE_APPLICATION_CREDENTIALS TRAVIS_BUILD_ID TRAVIS TRAVIS_BRANCH TRAVIS_JOB_NUMBER TRAVIS_PULL_REQUEST TRAVIS_JOB_ID TRAVIS_REPO_SLUG TRAVIS_COMMIT CI 31 | setenv = 32 | LC_ALL = en_US.utf-8 33 | cdh: HADOOP_DISTRO=cdh 34 | cdh: HADOOP_HOME={toxinidir}/.tox/hadoop-cdh 35 | hdp: HADOOP_DISTRO=hdp 36 | hdp: HADOOP_HOME={toxinidir}/.tox/hadoop-hdp 37 | postgres: NOSE_ATTR=postgres 38 | scheduler: NOSE_ATTR=scheduler 39 | cdh,hdp: NOSE_ATTR=minicluster 40 | gcloud: NOSE_ATTR=gcloud 41 | nonhdfs: NOSE_EVAL_ATTR=not minicluster and not gcloud and not postgres and not unixsocket 42 | unixsocket: NOSE_ATTR=unixsocket 43 | LUIGI_CONFIG_PATH={toxinidir}/test/testconfig/luigi.cfg 44 | COVERAGE_PROCESS_START={toxinidir}/.coveragerc 45 | FULL_COVERAGE=true 46 | nonhdfs: NOSE_WITH_DOCTEST=1 47 | commands = 48 | cdh,hdp: {toxinidir}/scripts/ci/setup_hadoop_env.sh 49 | python --version 50 | coverage run test/runtests.py -v --ignore-files='(^\.|^_|^setup\.py$)' \ 51 | --ignore-files=not_imported.py \ 52 | {posargs:} 53 | coverage combine 54 | codecov -e TOXENV 55 | 56 | [testenv:visualiser] 57 | usedevelop = True 58 | deps = 59 | mock<2.0 60 | nose<2.0 61 | unittest2<2.0 62 | passenv = {[testenv]passenv} 63 | setenv = 64 | LC_ALL = en_US.utf-8 65 | NOSE_EVAL_ATTR=not minicluster and not gcloud and not postgres and not unixsocket 66 | LUIGI_CONFIG_PATH={toxinidir}/test/testconfig/luigi.cfg 67 | TEST_VISUALISER=1 68 | commands = 69 | python --version 70 | nosetests -v --tests=test/visualiser 71 | 72 | [testenv:flake8] 73 | deps = flake8 74 | commands = flake8 --max-line-length=160 --exclude=doc,luigi/six.py 75 | flake8 --max-line-length=100 --ignore=E265 doc 76 | 77 | [testenv:autopep8] 78 | deps = autopep8 79 | commands = autopep8 --ignore E309,E501 -a -i -r luigi test examples bin 80 | 81 | [testenv:isort] 82 | deps = isort 83 | commands = isort -w 120 -rc luigi test examples bin 84 | 85 | [testenv:docs] 86 | # Build documentation using sphinx. 87 | # Call this using `tox -e docs`. 88 | deps = 89 | sqlalchemy 90 | Sphinx>=1.3b1 91 | sphinx_rtd_theme 92 | commands = 93 | # build API docs 94 | sphinx-apidoc -o doc/api -T luigi --separate 95 | 96 | # build HTML docs 97 | sphinx-build -W -b html -d {envtmpdir}/doctrees doc doc/_build/html 98 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .coverage.* 2 | doc/api/*.rst 3 | test/gcloud-credentials.json 4 | .hypothesis/ 5 | 6 | .nicesetup 7 | 8 | client.cfg 9 | luigi.cfg 10 | 11 | hadoop_test.py 12 | minicluster.py 13 | mrrunner.py 14 | pig_property_file 15 | 16 | packages.tar 17 | 18 | test/data 19 | 20 | Vagrantfile 21 | 22 | *.pickle 23 | *.rej 24 | *.orig 25 | 26 | 27 | # Created by https://www.gitignore.io 28 | 29 | ### Python ### 30 | # Byte-compiled / optimized / DLL files 31 | __pycache__/ 32 | *.py[cod] 33 | 34 | # C extensions 35 | *.so 36 | 37 | # Distribution / packaging 38 | .Python 39 | env/ 40 | build/ 41 | develop-eggs/ 42 | dist/ 43 | downloads/ 44 | eggs/ 45 | # NOTE : lib/ prevents inclusion of static/visualiser/lib 46 | #lib/ 47 | lib64/ 48 | parts/ 49 | sdist/ 50 | var/ 51 | *.egg-info/ 52 | .installed.cfg 53 | *.egg 54 | 55 | # PyInstaller 56 | # Usually these files are written by a python script from a template 57 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 58 | *.manifest 59 | *.spec 60 | 61 | # Installer logs 62 | pip-log.txt 63 | pip-delete-this-directory.txt 64 | 65 | # Unit test / coverage reports 66 | htmlcov/ 67 | .tox/ 68 | .coverage 69 | .coverage.* 70 | .cache 71 | nosetests.xml 72 | coverage.xml 73 | my_dir 74 | 75 | # Translations 76 | *.mo 77 | *.pot 78 | 79 | # Django stuff: 80 | *.log 81 | 82 | # Sphinx documentation 83 | doc/_build/ 84 | 85 | # PyBuilder 86 | target/ 87 | 88 | 89 | ### Vim ### 90 | [._]*.s[a-w][a-z] 91 | [._]s[a-w][a-z] 92 | *.un~ 93 | Session.vim 94 | .netrwhist 95 | *~ 96 | 97 | 98 | ### PyCharm ### 99 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm 100 | 101 | *.iml 102 | 103 | ## Directory-based project format: 104 | .idea/ 105 | # if you remove the above rule, at least ignore the following: 106 | 107 | # User-specific stuff: 108 | # .idea/workspace.xml 109 | # .idea/tasks.xml 110 | # .idea/dictionaries 111 | 112 | # Sensitive or high-churn files: 113 | # .idea/dataSources.ids 114 | # .idea/dataSources.xml 115 | # .idea/sqlDataSources.xml 116 | # .idea/dynamic.xml 117 | # .idea/uiDesigner.xml 118 | 119 | # Gradle: 120 | # .idea/gradle.xml 121 | # .idea/libraries 122 | 123 | # Mongo Explorer plugin: 124 | # .idea/mongoSettings.xml 125 | 126 | ## File-based project format: 127 | *.ipr 128 | *.iws 129 | 130 | ## Plugin-specific files: 131 | 132 | # IntelliJ 133 | out/ 134 | 135 | # mpeltonen/sbt-idea plugin 136 | .idea_modules/ 137 | 138 | # JIRA plugin 139 | atlassian-ide-plugin.xml 140 | 141 | # Crashlytics plugin (for Android Studio and IntelliJ) 142 | com_crashlytics_export_strings.xml 143 | crashlytics.properties 144 | crashlytics-build.properties 145 | 146 | 147 | ### Vagrant ### 148 | .vagrant/ 149 | 150 | 151 | ### OSX ### 152 | .DS_Store 153 | .AppleDouble 154 | .LSOverride 155 | 156 | # Icon must end with two \r 157 | Icon 158 | 159 | 160 | # Thumbnails 161 | ._* 162 | 163 | # Files that might appear on external disk 164 | .Spotlight-V100 165 | .Trashes 166 | 167 | # Directories potentially created on remote AFP share 168 | .AppleDB 169 | .AppleDesktop 170 | Network Trash Folder 171 | Temporary Items 172 | .apdisk 173 | -------------------------------------------------------------------------------- /test/test_sigpipe.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | import os 19 | from helpers import unittest 20 | 21 | from luigi.format import InputPipeProcessWrapper 22 | 23 | 24 | BASH_SCRIPT = """ 25 | #!/bin/bash 26 | 27 | trap "touch /tmp/luigi_sigpipe.marker; exit 141" SIGPIPE 28 | 29 | 30 | for i in {1..3} 31 | do 32 | sleep 0.1 33 | echo "Welcome $i times" 34 | done 35 | """ 36 | 37 | FAIL_SCRIPT = BASH_SCRIPT + """ 38 | exit 1 39 | """ 40 | 41 | 42 | class TestSigpipe(unittest.TestCase): 43 | 44 | def setUp(self): 45 | with open("/tmp/luigi_test_sigpipe.sh", "w") as fp: 46 | fp.write(BASH_SCRIPT) 47 | 48 | def tearDown(self): 49 | os.remove("/tmp/luigi_test_sigpipe.sh") 50 | if os.path.exists("/tmp/luigi_sigpipe.marker"): 51 | os.remove("/tmp/luigi_sigpipe.marker") 52 | 53 | def test_partial_read(self): 54 | p1 = InputPipeProcessWrapper(["bash", "/tmp/luigi_test_sigpipe.sh"]) 55 | self.assertEqual(p1.readline().decode('utf8'), "Welcome 1 times\n") 56 | p1.close() 57 | self.assertTrue(os.path.exists("/tmp/luigi_sigpipe.marker")) 58 | 59 | def test_full_read(self): 60 | p1 = InputPipeProcessWrapper(["bash", "/tmp/luigi_test_sigpipe.sh"]) 61 | counter = 1 62 | for line in p1: 63 | self.assertEqual(line.decode('utf8'), "Welcome %i times\n" % counter) 64 | counter += 1 65 | p1.close() 66 | self.assertFalse(os.path.exists("/tmp/luigi_sigpipe.marker")) 67 | 68 | 69 | class TestSubprocessException(unittest.TestCase): 70 | 71 | def setUp(self): 72 | with open("/tmp/luigi_test_sigpipe.sh", "w") as fp: 73 | fp.write(FAIL_SCRIPT) 74 | 75 | def tearDown(self): 76 | os.remove("/tmp/luigi_test_sigpipe.sh") 77 | if os.path.exists("/tmp/luigi_sigpipe.marker"): 78 | os.remove("/tmp/luigi_sigpipe.marker") 79 | 80 | def test_partial_read(self): 81 | p1 = InputPipeProcessWrapper(["bash", "/tmp/luigi_test_sigpipe.sh"]) 82 | self.assertEqual(p1.readline().decode('utf8'), "Welcome 1 times\n") 83 | p1.close() 84 | self.assertTrue(os.path.exists("/tmp/luigi_sigpipe.marker")) 85 | 86 | def test_full_read(self): 87 | def run(): 88 | p1 = InputPipeProcessWrapper(["bash", "/tmp/luigi_test_sigpipe.sh"]) 89 | counter = 1 90 | for line in p1: 91 | self.assertEqual(line.decode('utf8'), "Welcome %i times\n" % counter) 92 | counter += 1 93 | p1.close() 94 | 95 | self.assertRaises(RuntimeError, run) 96 | -------------------------------------------------------------------------------- /examples/dynamic_requirements.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | import random as rnd 19 | import time 20 | 21 | import luigi 22 | 23 | 24 | class Config(luigi.Task): 25 | seed = luigi.IntParameter() 26 | 27 | def output(self): 28 | """ 29 | Returns the target output for this task. 30 | In this case, a successful execution of this task will create a file on the local filesystem. 31 | 32 | :return: the target output for this task. 33 | :rtype: object (:py:class:`luigi.target.Target`) 34 | """ 35 | return luigi.LocalTarget('/tmp/Config_%d.txt' % self.seed) 36 | 37 | def run(self): 38 | time.sleep(5) 39 | rnd.seed(self.seed) 40 | 41 | result = ','.join( 42 | [str(x) for x in rnd.sample(list(range(300)), rnd.randint(7, 25))]) 43 | with self.output().open('w') as f: 44 | f.write(result) 45 | 46 | 47 | class Data(luigi.Task): 48 | magic_number = luigi.IntParameter() 49 | 50 | def output(self): 51 | """ 52 | Returns the target output for this task. 53 | In this case, a successful execution of this task will create a file on the local filesystem. 54 | 55 | :return: the target output for this task. 56 | :rtype: object (:py:class:`luigi.target.Target`) 57 | """ 58 | return luigi.LocalTarget('/tmp/Data_%d.txt' % self.magic_number) 59 | 60 | def run(self): 61 | time.sleep(1) 62 | with self.output().open('w') as f: 63 | f.write('%s' % self.magic_number) 64 | 65 | 66 | class Dynamic(luigi.Task): 67 | seed = luigi.IntParameter(default=1) 68 | 69 | def output(self): 70 | """ 71 | Returns the target output for this task. 72 | In this case, a successful execution of this task will create a file on the local filesystem. 73 | 74 | :return: the target output for this task. 75 | :rtype: object (:py:class:`luigi.target.Target`) 76 | """ 77 | return luigi.LocalTarget('/tmp/Dynamic_%d.txt' % self.seed) 78 | 79 | def run(self): 80 | # This could be done using regular requires method 81 | config = self.clone(Config) 82 | yield config 83 | 84 | with config.output().open() as f: 85 | data = [int(x) for x in f.read().split(',')] 86 | 87 | # ... but not this 88 | data_dependent_deps = [Data(magic_number=x) for x in data] 89 | yield data_dependent_deps 90 | 91 | with self.output().open('w') as f: 92 | f.write('Tada!') 93 | 94 | 95 | if __name__ == '__main__': 96 | luigi.run() 97 | -------------------------------------------------------------------------------- /examples/wordcount.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | from luigi import six 18 | 19 | import luigi 20 | 21 | 22 | class InputText(luigi.ExternalTask): 23 | """ 24 | This class represents something that was created elsewhere by an external process, 25 | so all we want to do is to implement the output method. 26 | """ 27 | date = luigi.DateParameter() 28 | 29 | def output(self): 30 | """ 31 | Returns the target output for this task. 32 | In this case, it expects a file to be present in the local file system. 33 | 34 | :return: the target output for this task. 35 | :rtype: object (:py:class:`luigi.target.Target`) 36 | """ 37 | return luigi.LocalTarget(self.date.strftime('/var/tmp/text/%Y-%m-%d.txt')) 38 | 39 | 40 | class WordCount(luigi.Task): 41 | date_interval = luigi.DateIntervalParameter() 42 | 43 | def requires(self): 44 | """ 45 | This task's dependencies: 46 | 47 | * :py:class:`~.InputText` 48 | 49 | :return: list of object (:py:class:`luigi.task.Task`) 50 | """ 51 | return [InputText(date) for date in self.date_interval.dates()] 52 | 53 | def output(self): 54 | """ 55 | Returns the target output for this task. 56 | In this case, a successful execution of this task will create a file on the local filesystem. 57 | 58 | :return: the target output for this task. 59 | :rtype: object (:py:class:`luigi.target.Target`) 60 | """ 61 | return luigi.LocalTarget('/var/tmp/text-count/%s' % self.date_interval) 62 | 63 | def run(self): 64 | """ 65 | 1. count the words for each of the :py:meth:`~.InputText.output` targets created by :py:class:`~.InputText` 66 | 2. write the count into the :py:meth:`~.WordCount.output` target 67 | """ 68 | count = {} 69 | 70 | # NOTE: self.input() actually returns an element for the InputText.output() target 71 | for f in self.input(): # The input() method is a wrapper around requires() that returns Target objects 72 | for line in f.open('r'): # Target objects are a file system/format abstraction and this will return a file stream object 73 | for word in line.strip().split(): 74 | count[word] = count.get(word, 0) + 1 75 | 76 | # output data 77 | f = self.output().open('w') 78 | for word, count in six.iteritems(count): 79 | f.write("%s\t%d\n" % (word, count)) 80 | f.close() # WARNING: file system operations are atomic therefore if you don't close the file you lose all data 81 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2012 Spotify AB 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not 4 | # use this file except in compliance with the License. You may obtain a copy of 5 | # the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 11 | # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 12 | # License for the specific language governing permissions and limitations under 13 | # the License. 14 | 15 | import os 16 | 17 | from setuptools import setup 18 | 19 | 20 | def get_static_files(path): 21 | return [os.path.join(dirpath.replace("luigi/", ""), ext) 22 | for (dirpath, dirnames, filenames) in os.walk(path) 23 | for ext in ["*.html", "*.js", "*.css", "*.png", 24 | "*.eot", "*.svg", "*.ttf", "*.woff", "*.woff2"]] 25 | 26 | 27 | luigi_package_data = sum(map(get_static_files, ["luigi/static", "luigi/templates"]), []) 28 | 29 | readme_note = """\ 30 | .. note:: 31 | 32 | For the latest source, discussion, etc, please visit the 33 | `GitHub repository `_\n\n 34 | """ 35 | 36 | with open('README.rst') as fobj: 37 | long_description = readme_note + fobj.read() 38 | 39 | install_requires = [ 40 | 'tornado>=4.0,<5', 41 | 'python-daemon<3.0', 42 | ] 43 | 44 | if os.environ.get('READTHEDOCS', None) == 'True': 45 | # So that we can build documentation for luigi.db_task_history and luigi.contrib.sqla 46 | install_requires.append('sqlalchemy') 47 | # readthedocs don't like python-daemon, see #1342 48 | install_requires.remove('python-daemon<3.0') 49 | 50 | setup( 51 | name='luigi', 52 | version='2.0.1', 53 | description='Workflow mgmgt + task scheduling + dependency resolution', 54 | long_description=long_description, 55 | author='Erik Bernhardsson', 56 | url='https://github.com/spotify/luigi', 57 | license='Apache License 2.0', 58 | packages=[ 59 | 'luigi', 60 | 'luigi.contrib', 61 | 'luigi.contrib.hdfs', 62 | 'luigi.tools' 63 | ], 64 | package_data={ 65 | 'luigi': luigi_package_data 66 | }, 67 | entry_points={ 68 | 'console_scripts': [ 69 | 'luigi = luigi.cmdline:luigi_run', 70 | 'luigid = luigi.cmdline:luigid', 71 | 'luigi-grep = luigi.tools.luigi_grep:main', 72 | 'luigi-deps = luigi.tools.deps:main', 73 | 'luigi-migrate = luigi.tools.migrate:main' 74 | ] 75 | }, 76 | install_requires=install_requires, 77 | classifiers=[ 78 | 'Development Status :: 5 - Production/Stable', 79 | 'Environment :: Console', 80 | 'Environment :: Web Environment', 81 | 'Intended Audience :: Developers', 82 | 'Intended Audience :: System Administrators', 83 | 'License :: OSI Approved :: Apache Software License', 84 | 'Programming Language :: Python :: 2.7', 85 | 'Programming Language :: Python :: 3.3', 86 | 'Programming Language :: Python :: 3.4', 87 | 'Programming Language :: Python :: 3.5', 88 | 'Topic :: System :: Monitoring', 89 | ], 90 | ) 91 | -------------------------------------------------------------------------------- /test/lock_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | import os 19 | import subprocess 20 | import tempfile 21 | import mock 22 | from helpers import unittest 23 | 24 | import luigi 25 | import luigi.lock 26 | import luigi.notifications 27 | 28 | luigi.notifications.DEBUG = True 29 | 30 | 31 | class TestCmd(unittest.TestCase): 32 | 33 | def test_getpcmd(self): 34 | p = subprocess.Popen(["sleep", "1"]) 35 | self.assertTrue( 36 | luigi.lock.getpcmd(p.pid) in ["sleep 1", '[sleep]'] 37 | ) 38 | p.kill() 39 | 40 | 41 | class LockTest(unittest.TestCase): 42 | 43 | def setUp(self): 44 | self.pid_dir = tempfile.mkdtemp() 45 | self.pid, self.cmd, self.pid_file = luigi.lock.get_info(self.pid_dir) 46 | 47 | def tearDown(self): 48 | if os.path.exists(self.pid_file): 49 | os.remove(self.pid_file) 50 | os.rmdir(self.pid_dir) 51 | 52 | def test_get_info(self): 53 | p = subprocess.Popen(["yes", "à我ф"], stdout=subprocess.PIPE) 54 | pid, cmd, pid_file = luigi.lock.get_info(self.pid_dir, p.pid) 55 | p.kill() 56 | self.assertEqual(cmd, 'yes à我ф') 57 | 58 | def test_acquiring_free_lock(self): 59 | acquired = luigi.lock.acquire_for(self.pid_dir) 60 | self.assertTrue(acquired) 61 | 62 | def test_acquiring_taken_lock(self): 63 | with open(self.pid_file, 'w') as f: 64 | f.write('%d\n' % (self.pid, )) 65 | 66 | acquired = luigi.lock.acquire_for(self.pid_dir) 67 | self.assertFalse(acquired) 68 | 69 | def test_acquiring_partially_taken_lock(self): 70 | with open(self.pid_file, 'w') as f: 71 | f.write('%d\n' % (self.pid, )) 72 | 73 | acquired = luigi.lock.acquire_for(self.pid_dir, 2) 74 | self.assertTrue(acquired) 75 | 76 | s = os.stat(self.pid_file) 77 | self.assertEqual(s.st_mode & 0o777, 0o777) 78 | 79 | def test_acquiring_lock_from_missing_process(self): 80 | fake_pid = 99999 81 | with open(self.pid_file, 'w') as f: 82 | f.write('%d\n' % (fake_pid, )) 83 | 84 | acquired = luigi.lock.acquire_for(self.pid_dir) 85 | self.assertTrue(acquired) 86 | 87 | s = os.stat(self.pid_file) 88 | self.assertEqual(s.st_mode & 0o777, 0o777) 89 | 90 | @mock.patch('os.kill') 91 | def test_take_lock_with_kill(self, kill_fn): 92 | with open(self.pid_file, 'w') as f: 93 | f.write('%d\n' % (self.pid,)) 94 | 95 | kill_signal = 77777 96 | acquired = luigi.lock.acquire_for(self.pid_dir, kill_signal=kill_signal) 97 | self.assertTrue(acquired) 98 | kill_fn.assert_called_once_with(self.pid, kill_signal) 99 | -------------------------------------------------------------------------------- /luigi/contrib/hdfs/abstract_client.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | """ 19 | Module containing abstract class about hdfs clients. 20 | """ 21 | 22 | import abc 23 | from luigi import six 24 | import luigi.target 25 | 26 | 27 | @six.add_metaclass(abc.ABCMeta) 28 | class HdfsFileSystem(luigi.target.FileSystem): 29 | """ 30 | This client uses Apache 2.x syntax for file system commands, which also matched CDH4. 31 | """ 32 | 33 | def rename(self, path, dest): 34 | """ 35 | Rename or move a file. 36 | 37 | In hdfs land, "mv" is often called rename. So we add an alias for 38 | ``move()`` called ``rename()``. This is also to keep backward 39 | compatibility since ``move()`` became standardized in luigi's 40 | filesystem interface. 41 | """ 42 | return self.move(path, dest) 43 | 44 | def rename_dont_move(self, path, dest): 45 | """ 46 | Override this method with an implementation that uses rename2, 47 | which is a rename operation that never moves. 48 | 49 | rename2 - 50 | https://github.com/apache/hadoop/blob/ae91b13/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java 51 | (lines 483-523) 52 | """ 53 | # We only override this method to be able to provide a more specific 54 | # docstring. 55 | return super(HdfsFileSystem, self).rename_dont_move(path, dest) 56 | 57 | @abc.abstractmethod 58 | def remove(self, path, recursive=True, skip_trash=False): 59 | pass 60 | 61 | @abc.abstractmethod 62 | def chmod(self, path, permissions, recursive=False): 63 | pass 64 | 65 | @abc.abstractmethod 66 | def chown(self, path, owner, group, recursive=False): 67 | pass 68 | 69 | @abc.abstractmethod 70 | def count(self, path): 71 | """ 72 | Count contents in a directory 73 | """ 74 | pass 75 | 76 | @abc.abstractmethod 77 | def copy(self, path, destination): 78 | pass 79 | 80 | @abc.abstractmethod 81 | def put(self, local_path, destination): 82 | pass 83 | 84 | @abc.abstractmethod 85 | def get(self, path, local_destination): 86 | pass 87 | 88 | @abc.abstractmethod 89 | def mkdir(self, path, parents=True, raise_if_exists=False): 90 | pass 91 | 92 | @abc.abstractmethod 93 | def listdir(self, path, ignore_directories=False, ignore_files=False, 94 | include_size=False, include_type=False, include_time=False, recursive=False): 95 | pass 96 | 97 | @abc.abstractmethod 98 | def touchz(self, path): 99 | pass 100 | -------------------------------------------------------------------------------- /luigi/contrib/redis_store.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | import datetime 19 | import logging 20 | 21 | from luigi.target import Target 22 | from luigi.parameter import Parameter 23 | 24 | logger = logging.getLogger('luigi-interface') 25 | 26 | try: 27 | import redis 28 | 29 | except ImportError: 30 | logger.warning("Loading redis_store module without redis installed. " 31 | "Will crash at runtime if redis_store functionality is used.") 32 | 33 | 34 | class RedisTarget(Target): 35 | 36 | """ Target for a resource in Redis.""" 37 | 38 | marker_prefix = Parameter(default='luigi', 39 | config_path=dict(section='redis', name='marker-prefix')) 40 | 41 | def __init__(self, host, port, db, update_id, password=None, 42 | socket_timeout=None, expire=None): 43 | """ 44 | :param host: Redis server host 45 | :type host: str 46 | :param port: Redis server port 47 | :type port: int 48 | :param db: database index 49 | :type db: int 50 | :param update_id: an identifier for this data hash 51 | :type update_id: str 52 | :param password: a password to connect to the redis server 53 | :type password: str 54 | :param socket_timeout: client socket timeout 55 | :type socket_timeout: int 56 | :param expire: timeout before the target is deleted 57 | :type expire: int 58 | 59 | """ 60 | self.host = host 61 | self.port = port 62 | self.db = db 63 | self.password = password 64 | self.socket_timeout = socket_timeout 65 | self.update_id = update_id 66 | self.expire = expire 67 | 68 | self.redis_client = redis.StrictRedis( 69 | host=self.host, 70 | port=self.port, 71 | db=self.db, 72 | socket_timeout=self.socket_timeout, 73 | ) 74 | 75 | def marker_key(self): 76 | """ 77 | Generate a key for the indicator hash. 78 | """ 79 | return '%s:%s' % (self.marker_prefix, self.update_id) 80 | 81 | def touch(self): 82 | """ 83 | Mark this update as complete. 84 | 85 | We index the parameters `update_id` and `date`. 86 | """ 87 | marker_key = self.marker_key() 88 | self.redis_client.hset(marker_key, 'update_id', self.update_id) 89 | self.redis_client.hset(marker_key, 'date', datetime.datetime.now()) 90 | 91 | if self.expire is not None: 92 | self.redis_client.expire(marker_key, self.expire) 93 | 94 | def exists(self): 95 | """ 96 | Test, if this task has been run. 97 | """ 98 | return self.redis_client.exists(self.marker_key()) == 1 99 | -------------------------------------------------------------------------------- /luigi/retcodes.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2015-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | """ 18 | Module containing the logic for exit codes for the luigi binary. It's useful 19 | when you in a programmatic way need to know if luigi actually finished the 20 | given task, and if not why. 21 | """ 22 | 23 | import luigi 24 | import sys 25 | import logging 26 | from luigi import IntParameter 27 | 28 | 29 | class retcode(luigi.Config): 30 | """ 31 | See the :ref:`return codes configuration section `. 32 | """ 33 | unhandled_exception = IntParameter(default=4, 34 | description='For scheduling errors or internal luigi errors.', 35 | ) 36 | missing_data = IntParameter(default=0, 37 | description="For when there are incomplete ExternalTask dependencies.", 38 | ) 39 | task_failed = IntParameter(default=0, 40 | description='''For when a task's run() method fails.''', 41 | ) 42 | already_running = IntParameter(default=0, 43 | description='For both local --lock and luigid "lock"', 44 | ) 45 | 46 | 47 | def run_with_retcodes(argv): 48 | """ 49 | Run luigi with command line parsing, but raise ``SystemExit`` with the configured exit code. 50 | 51 | Note: Usually you use the luigi binary directly and don't call this function yourself. 52 | 53 | :param argv: Should (conceptually) be ``sys.argv[1:]`` 54 | """ 55 | logger = logging.getLogger('luigi-interface') 56 | with luigi.cmdline_parser.CmdlineParser.global_instance(argv): 57 | retcodes = retcode() 58 | 59 | worker = None 60 | try: 61 | worker = luigi.interface._run(argv)['worker'] 62 | except luigi.interface.PidLockAlreadyTakenExit: 63 | sys.exit(retcodes.already_running) 64 | except Exception: 65 | # Some errors occur before logging is set up, we set it up now 66 | luigi.interface.setup_interface_logging() 67 | logger.exception("Uncaught exception in luigi") 68 | sys.exit(retcodes.unhandled_exception) 69 | 70 | task_sets = luigi.execution_summary._summary_dict(worker) 71 | non_empty_categories = {k: v for k, v in task_sets.items() if v}.keys() 72 | 73 | def has(status): 74 | assert status in luigi.execution_summary._ORDERED_STATUSES 75 | return status in non_empty_categories 76 | 77 | codes_and_conds = ( 78 | (retcodes.missing_data, has('still_pending_ext')), 79 | (retcodes.task_failed, has('failed')), 80 | (retcodes.already_running, has('run_by_other_worker')), 81 | ) 82 | sys.exit(max(code * (1 if cond else 0) for code, cond in codes_and_conds)) 83 | -------------------------------------------------------------------------------- /luigi/contrib/hdfs/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | """ 19 | Provides access to HDFS using the :py:class:`HdfsTarget`, a subclass of :py:class:`~luigi.target.Target`. 20 | You can configure what client by setting the "client" config under the "hdfs" section in the configuration, or using the ``--hdfs-client`` command line option. 21 | "hadoopcli" is the slowest, but should work out of the box. "snakebite" is the fastest, but requires Snakebite to be installed. 22 | 23 | Since the hdfs functionality is quite big in luigi, it's split into smaller 24 | files under ``luigi/contrib/hdfs/*.py``. But for the sake of convenience and 25 | API stability, everything is reexported under :py:mod:`luigi.contrib.hdfs`. 26 | """ 27 | 28 | # imports 29 | from luigi.contrib.hdfs import config as hdfs_config 30 | from luigi.contrib.hdfs import clients as hdfs_clients 31 | from luigi.contrib.hdfs import error as hdfs_error 32 | from luigi.contrib.hdfs import snakebite_client as hdfs_snakebite_client 33 | from luigi.contrib.hdfs import hadoopcli_clients as hdfs_hadoopcli_clients 34 | from luigi.contrib.hdfs import webhdfs_client as hdfs_webhdfs_client 35 | from luigi.contrib.hdfs import format as hdfs_format 36 | from luigi.contrib.hdfs import target as hdfs_target 37 | 38 | 39 | # config.py 40 | hdfs = hdfs_config.hdfs 41 | load_hadoop_cmd = hdfs_config.load_hadoop_cmd 42 | get_configured_hadoop_version = hdfs_config.get_configured_hadoop_version 43 | get_configured_hdfs_client = hdfs_config.get_configured_hdfs_client 44 | tmppath = hdfs_config.tmppath 45 | 46 | 47 | # clients 48 | HDFSCliError = hdfs_error.HDFSCliError 49 | call_check = hdfs_hadoopcli_clients.HdfsClient.call_check 50 | list_path = hdfs_snakebite_client.SnakebiteHdfsClient.list_path 51 | HdfsClient = hdfs_hadoopcli_clients.HdfsClient 52 | SnakebiteHdfsClient = hdfs_snakebite_client.SnakebiteHdfsClient 53 | WebHdfsClient = hdfs_webhdfs_client.WebHdfsClient 54 | HdfsClientCdh3 = hdfs_hadoopcli_clients.HdfsClientCdh3 55 | HdfsClientApache1 = hdfs_hadoopcli_clients.HdfsClientApache1 56 | create_hadoopcli_client = hdfs_hadoopcli_clients.create_hadoopcli_client 57 | get_autoconfig_client = hdfs_clients.get_autoconfig_client 58 | exists = hdfs_clients.exists 59 | rename = hdfs_clients.rename 60 | remove = hdfs_clients.remove 61 | mkdir = hdfs_clients.mkdir 62 | listdir = hdfs_clients.listdir 63 | 64 | 65 | # format.py 66 | HdfsReadPipe = hdfs_format.HdfsReadPipe 67 | HdfsAtomicWritePipe = hdfs_format.HdfsAtomicWritePipe 68 | HdfsAtomicWriteDirPipe = hdfs_format.HdfsAtomicWriteDirPipe 69 | PlainFormat = hdfs_format.PlainFormat 70 | PlainDirFormat = hdfs_format.PlainDirFormat 71 | Plain = hdfs_format.Plain 72 | PlainDir = hdfs_format.PlainDir 73 | CompatibleHdfsFormat = hdfs_format.CompatibleHdfsFormat 74 | 75 | 76 | # target.py 77 | HdfsTarget = hdfs_target.HdfsTarget 78 | -------------------------------------------------------------------------------- /test/wrap_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | from __future__ import print_function 18 | 19 | import datetime 20 | from helpers import unittest 21 | 22 | import luigi 23 | import luigi.notifications 24 | from luigi.mock import MockTarget 25 | from luigi.util import inherits 26 | 27 | luigi.notifications.DEBUG = True 28 | 29 | 30 | class A(luigi.Task): 31 | 32 | def output(self): 33 | return MockTarget('/tmp/a.txt') 34 | 35 | def run(self): 36 | f = self.output().open('w') 37 | print('hello, world', file=f) 38 | f.close() 39 | 40 | 41 | class B(luigi.Task): 42 | date = luigi.DateParameter() 43 | 44 | def output(self): 45 | return MockTarget(self.date.strftime('/tmp/b-%Y-%m-%d.txt')) 46 | 47 | def run(self): 48 | f = self.output().open('w') 49 | print('goodbye, space', file=f) 50 | f.close() 51 | 52 | 53 | def XMLWrapper(cls): 54 | @inherits(cls) 55 | class XMLWrapperCls(luigi.Task): 56 | 57 | def requires(self): 58 | return self.clone_parent() 59 | 60 | def run(self): 61 | f = self.input().open('r') 62 | g = self.output().open('w') 63 | print('', file=g) 64 | for line in f: 65 | print('' + line.strip() + '', file=g) 66 | g.close() 67 | 68 | return XMLWrapperCls 69 | 70 | 71 | class AXML(XMLWrapper(A)): 72 | 73 | def output(self): 74 | return MockTarget('/tmp/a.xml') 75 | 76 | 77 | class BXML(XMLWrapper(B)): 78 | 79 | def output(self): 80 | return MockTarget(self.date.strftime('/tmp/b-%Y-%m-%d.xml')) 81 | 82 | 83 | class WrapperTest(unittest.TestCase): 84 | 85 | ''' This test illustrates how a task class can wrap another task class by modifying its behavior. 86 | 87 | See instance_wrap_test.py for an example of how instances can wrap each other. ''' 88 | workers = 1 89 | 90 | def setUp(self): 91 | MockTarget.fs.clear() 92 | 93 | def test_a(self): 94 | luigi.build([AXML()], local_scheduler=True, no_lock=True, workers=self.workers) 95 | self.assertEqual(MockTarget.fs.get_data('/tmp/a.xml'), b'\nhello, world\n') 96 | 97 | def test_b(self): 98 | luigi.build([BXML(datetime.date(2012, 1, 1))], local_scheduler=True, no_lock=True, workers=self.workers) 99 | self.assertEqual(MockTarget.fs.get_data('/tmp/b-2012-01-01.xml'), b'\ngoodbye, space\n') 100 | 101 | 102 | class WrapperWithMultipleWorkersTest(WrapperTest): 103 | workers = 7 104 | 105 | 106 | if __name__ == '__main__': 107 | luigi.run() 108 | -------------------------------------------------------------------------------- /test/simulate_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | from helpers import unittest 19 | import luigi 20 | from luigi.contrib.simulate import RunAnywayTarget 21 | 22 | from multiprocessing import Process 23 | import os 24 | import tempfile 25 | 26 | 27 | def temp_dir(): 28 | return os.path.join(tempfile.gettempdir(), 'luigi-simulate') 29 | 30 | 31 | def is_writable(): 32 | d = temp_dir() 33 | fn = os.path.join(d, 'luigi-simulate-write-test') 34 | exists = True 35 | try: 36 | os.makedirs(d, exist_ok=True) 37 | open(fn, 'w').close() 38 | os.remove(fn) 39 | except: 40 | exists = False 41 | 42 | return unittest.skipIf(not exists, 'Can\'t write to temporary directory') 43 | 44 | 45 | class TaskA(luigi.Task): 46 | i = luigi.IntParameter(default=0) 47 | 48 | def output(self): 49 | return RunAnywayTarget(self) 50 | 51 | def run(self): 52 | fn = os.path.join(temp_dir(), 'luigi-simulate-test.tmp') 53 | os.makedirs(os.path.dirname(fn), exist_ok=True) 54 | 55 | with open(fn, 'a') as f: 56 | f.write('{0}={1}\n'.format(self.__class__.__name__, self.i)) 57 | 58 | self.output().done() 59 | 60 | 61 | class TaskB(TaskA): 62 | def requires(self): 63 | return TaskA(i=10) 64 | 65 | 66 | class TaskC(TaskA): 67 | def requires(self): 68 | return TaskA(i=5) 69 | 70 | 71 | class TaskD(TaskA): 72 | def requires(self): 73 | return [TaskB(), TaskC(), TaskA(i=20)] 74 | 75 | 76 | class TaskWrap(luigi.WrapperTask): 77 | def requires(self): 78 | return [TaskA(), TaskD()] 79 | 80 | 81 | def reset(): 82 | # Force tasks to be executed again (because multiple pipelines are executed inside of the same process) 83 | t = TaskA().output() 84 | with t.unique.get_lock(): 85 | t.unique.value = 0 86 | 87 | 88 | class RunAnywayTargetTest(unittest.TestCase): 89 | @is_writable() 90 | def test_output(self): 91 | reset() 92 | 93 | fn = os.path.join(temp_dir(), 'luigi-simulate-test.tmp') 94 | 95 | luigi.build([TaskWrap()], local_scheduler=True) 96 | with open(fn, 'r') as f: 97 | data = f.read().strip().split('\n') 98 | 99 | data.sort() 100 | reference = ['TaskA=0', 'TaskA=10', 'TaskA=20', 'TaskA=5', 'TaskB=0', 'TaskC=0', 'TaskD=0'] 101 | reference.sort() 102 | 103 | os.remove(fn) 104 | self.assertEqual(data, reference) 105 | 106 | @is_writable() 107 | def test_output_again(self): 108 | # Running the test in another process because the PID is used to determine if the target exists 109 | p = Process(target=self.test_output) 110 | p.start() 111 | p.join() 112 | -------------------------------------------------------------------------------- /examples/ssh_remote_execution.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | from __future__ import print_function 19 | 20 | from collections import defaultdict 21 | 22 | from luigi import six 23 | 24 | import luigi 25 | from luigi.contrib.ssh import RemoteContext, RemoteTarget 26 | from luigi.mock import MockFile 27 | 28 | SSH_HOST = "some.accessible.host" 29 | 30 | 31 | class CreateRemoteData(luigi.Task): 32 | """ 33 | Dump info on running processes on remote host. 34 | Data is still stored on the remote host 35 | """ 36 | 37 | def output(self): 38 | """ 39 | Returns the target output for this task. 40 | In this case, a successful execution of this task will create a file on a remote server using SSH. 41 | 42 | :return: the target output for this task. 43 | :rtype: object (:py:class:`~luigi.target.Target`) 44 | """ 45 | return RemoteTarget( 46 | "/tmp/stuff", 47 | SSH_HOST 48 | ) 49 | 50 | def run(self): 51 | remote = RemoteContext(SSH_HOST) 52 | print(remote.check_output([ 53 | "ps aux > {0}".format(self.output().path) 54 | ])) 55 | 56 | 57 | class ProcessRemoteData(luigi.Task): 58 | """ 59 | Create a toplist of users based on how many running processes they have on a remote machine. 60 | 61 | In this example the processed data is stored in a MockFile. 62 | """ 63 | 64 | def requires(self): 65 | """ 66 | This task's dependencies: 67 | 68 | * :py:class:`~.CreateRemoteData` 69 | 70 | :return: object (:py:class:`luigi.task.Task`) 71 | """ 72 | return CreateRemoteData() 73 | 74 | def run(self): 75 | processes_per_user = defaultdict(int) 76 | with self.input().open('r') as infile: 77 | for line in infile: 78 | username = line.split()[0] 79 | processes_per_user[username] += 1 80 | 81 | toplist = sorted( 82 | six.iteritems(processes_per_user), 83 | key=lambda x: x[1], 84 | reverse=True 85 | ) 86 | 87 | with self.output().open('w') as outfile: 88 | for user, n_processes in toplist: 89 | print(n_processes, user, file=outfile) 90 | 91 | def output(self): 92 | """ 93 | Returns the target output for this task. 94 | In this case, a successful execution of this task will simulate the creation of a file in a filesystem. 95 | 96 | :return: the target output for this task. 97 | :rtype: object (:py:class:`~luigi.target.Target`) 98 | """ 99 | return MockFile("output", mirror_on_stderr=True) 100 | 101 | 102 | if __name__ == "__main__": 103 | luigi.run() 104 | -------------------------------------------------------------------------------- /test/task_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | import doctest 19 | from helpers import unittest 20 | from datetime import datetime, timedelta 21 | 22 | import luigi 23 | import luigi.task 24 | from luigi.task_register import load_task 25 | 26 | 27 | class DummyTask(luigi.Task): 28 | 29 | param = luigi.Parameter() 30 | bool_param = luigi.BoolParameter() 31 | int_param = luigi.IntParameter() 32 | float_param = luigi.FloatParameter() 33 | date_param = luigi.DateParameter() 34 | datehour_param = luigi.DateHourParameter() 35 | timedelta_param = luigi.TimeDeltaParameter() 36 | insignificant_param = luigi.Parameter(significant=False) 37 | 38 | 39 | class DefaultInsignificantParamTask(luigi.Task): 40 | insignificant_param = luigi.Parameter(significant=False, default='value') 41 | necessary_param = luigi.Parameter(significant=False) 42 | 43 | 44 | class TaskTest(unittest.TestCase): 45 | 46 | def test_tasks_doctest(self): 47 | doctest.testmod(luigi.task) 48 | 49 | def test_task_to_str_to_task(self): 50 | params = dict( 51 | param='test', 52 | bool_param=True, 53 | int_param=666, 54 | float_param=123.456, 55 | date_param=datetime(2014, 9, 13).date(), 56 | datehour_param=datetime(2014, 9, 13, 9), 57 | timedelta_param=timedelta(44), # doesn't support seconds 58 | insignificant_param='test') 59 | 60 | original = DummyTask(**params) 61 | other = DummyTask.from_str_params(original.to_str_params()) 62 | self.assertEqual(original, other) 63 | 64 | def test_task_from_str_insignificant(self): 65 | params = {'necessary_param': 'needed'} 66 | original = DefaultInsignificantParamTask(**params) 67 | other = DefaultInsignificantParamTask.from_str_params(params) 68 | self.assertEqual(original, other) 69 | 70 | def test_task_missing_necessary_param(self): 71 | with self.assertRaises(luigi.parameter.MissingParameterException): 72 | DefaultInsignificantParamTask.from_str_params({}) 73 | 74 | def test_external_tasks_loadable(self): 75 | task = load_task("luigi", "ExternalTask", {}) 76 | assert(isinstance(task, luigi.ExternalTask)) 77 | 78 | def test_flatten(self): 79 | flatten = luigi.task.flatten 80 | self.assertEqual(sorted(flatten({'a': 'foo', 'b': 'bar'})), ['bar', 'foo']) 81 | self.assertEqual(sorted(flatten(['foo', ['bar', 'troll']])), ['bar', 'foo', 'troll']) 82 | self.assertEqual(flatten('foo'), ['foo']) 83 | self.assertEqual(flatten(42), [42]) 84 | self.assertEqual(flatten((len(i) for i in ["foo", "troll"])), [3, 5]) 85 | self.assertRaises(TypeError, flatten, (len(i) for i in ["foo", "troll", None])) 86 | 87 | 88 | if __name__ == '__main__': 89 | unittest.main() 90 | -------------------------------------------------------------------------------- /test/instance_wrap_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2015 Spotify AB 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | import datetime 19 | import decimal 20 | from helpers import unittest 21 | 22 | import luigi 23 | import luigi.notifications 24 | from luigi.mock import MockTarget 25 | 26 | luigi.notifications.DEBUG = True 27 | 28 | 29 | class Report(luigi.Task): 30 | date = luigi.DateParameter() 31 | 32 | def run(self): 33 | f = self.output().open('w') 34 | f.write('10.0 USD\n') 35 | f.write('4.0 EUR\n') 36 | f.write('3.0 USD\n') 37 | f.close() 38 | 39 | def output(self): 40 | return MockTarget(self.date.strftime('/tmp/report-%Y-%m-%d')) 41 | 42 | 43 | class ReportReader(luigi.Task): 44 | date = luigi.DateParameter() 45 | 46 | def requires(self): 47 | return Report(self.date) 48 | 49 | def run(self): 50 | self.lines = list(self.input().open('r').readlines()) 51 | 52 | def get_line(self, line): 53 | amount, currency = self.lines[line].strip().split() 54 | return decimal.Decimal(amount), currency 55 | 56 | def complete(self): 57 | return False 58 | 59 | 60 | class CurrencyExchanger(luigi.Task): 61 | task = luigi.Parameter() 62 | currency_to = luigi.Parameter() 63 | 64 | exchange_rates = {('USD', 'USD'): decimal.Decimal(1), 65 | ('EUR', 'USD'): decimal.Decimal('1.25')} 66 | 67 | def requires(self): 68 | return self.task # Note that you still need to state this explicitly 69 | 70 | def get_line(self, line): 71 | amount, currency_from = self.task.get_line(line) 72 | return amount * self.exchange_rates[(currency_from, self.currency_to)], self.currency_to 73 | 74 | def complete(self): 75 | return False 76 | 77 | 78 | class InstanceWrapperTest(unittest.TestCase): 79 | 80 | ''' This test illustrates that tasks can have tasks as parameters 81 | 82 | This is a more complicated variant of factorial_test.py which is an example of 83 | tasks communicating directly with other tasks. In this case, a task takes another 84 | task as a parameter and wraps it. 85 | 86 | Also see wrap_test.py for an example of a task class wrapping another task class. 87 | 88 | Not the most useful pattern, but there's actually been a few cases where it was 89 | pretty handy to be able to do that. I'm adding it as a unit test to make sure that 90 | new code doesn't break the expected behavior. 91 | ''' 92 | 93 | def test(self): 94 | d = datetime.date(2012, 1, 1) 95 | r = ReportReader(d) 96 | ex = CurrencyExchanger(r, 'USD') 97 | 98 | luigi.build([ex], local_scheduler=True) 99 | self.assertEqual(ex.get_line(0), (decimal.Decimal('10.0'), 'USD')) 100 | self.assertEqual(ex.get_line(1), (decimal.Decimal('5.0'), 'USD')) 101 | -------------------------------------------------------------------------------- /luigi/templates/layout.html: -------------------------------------------------------------------------------- 1 | 5 | 6 | 7 | 8 | 9 | 10 | Luigi History Viewer 11 | 12 | 13 | 14 | 15 | 16 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 |
94 | 95 | 96 | 97 |
98 |
99 |
100 | {% block content %}{% end %} 101 |
102 |
103 |
104 |
105 |
106 | 107 | 108 | --------------------------------------------------------------------------------