├── .gitignore ├── .travis.yml ├── CHANGELOG.md ├── LICENCE.txt ├── README.md ├── appveyor └── appveyor.yml ├── bin ├── tuttle └── tuttle-extend-workflow ├── ci ├── odbc.ini ├── test_with_coverage.bat ├── touch-x.tuttle └── tuttlefile ├── debian ├── changelog ├── compat ├── control ├── rules ├── tuttle.links └── tuttle.triggers ├── docker └── Dockerfile ├── dockerfile4deb ├── docs ├── examples │ ├── musketeers │ │ ├── Les_trois_mousquetaires.zip │ │ └── tuttlefile │ └── musketeers_tuttle_dir │ │ ├── __init__.py │ │ ├── html_report_assets │ │ ├── bootstrap.min.css │ │ ├── bootstrap.min.js │ │ ├── jquery.min.js │ │ └── viz.js │ │ ├── last_workflow.pickle │ │ ├── processes │ │ ├── logs │ │ │ ├── tuttlefile_18_err.txt │ │ │ ├── tuttlefile_18_stdout.txt │ │ │ ├── tuttlefile_1_err.txt │ │ │ ├── tuttlefile_1_stdout.txt │ │ │ ├── tuttlefile_24_err.txt │ │ │ ├── tuttlefile_24_stdout.txt │ │ │ ├── tuttlefile_5_err.txt │ │ │ └── tuttlefile_5_stdout.txt │ │ ├── tuttlefile_1 │ │ ├── tuttlefile_18 │ │ ├── tuttlefile_24 │ │ └── tuttlefile_5 │ │ │ └── tuttlefile_5.py │ │ └── report.html ├── how_to_make_a_release.md ├── index.md ├── reference │ ├── index.md │ ├── processors.md │ ├── resources_and_urls.md │ ├── resources_authentication.md │ └── tuttlefile_syntax.md ├── screenshot_report.png ├── tuto_Makefile.MD ├── tuto_parametrized_workflow │ ├── dep_graph_3_imgs.png │ ├── new_img.png │ └── tuto_parametrized_workflow.MD └── tutorial_musketeers │ ├── Les_trois_mousquetaires.zip │ ├── question_mark.jpg │ ├── screenshot_report_step2.png │ ├── screenshot_report_step7.5.png │ ├── screenshot_report_step7.png │ ├── tutorial.md │ └── tutorial_steps │ ├── step10 │ ├── myplot.gnuplot │ └── tuttlefile │ ├── step11 │ ├── myplot.gnuplot │ └── tuttlefile │ ├── step2 │ └── tuttlefile │ ├── step3 │ └── tuttlefile │ ├── step4 │ ├── characters_count.png │ └── tuttlefile │ ├── step5 │ └── tuttlefile │ ├── step6 │ └── tuttlefile │ ├── step7 │ └── tuttlefile │ ├── step8 │ └── tuttlefile │ └── step9 │ ├── characters_count.png │ └── tuttlefile ├── mkdocs.yml ├── package4win.py ├── project ├── TODO next.txt └── ideas.txt ├── requirements-dev.txt ├── setup.py ├── tests ├── A ├── __init__.py ├── functional_tests │ ├── A │ ├── B │ ├── __init__.py │ ├── b-produces-x.tuttle │ ├── everything-produces-result.tuttle │ ├── test_errors │ │ ├── A │ │ ├── test_error_in_process.py │ │ ├── test_error_in_workflow.py │ │ ├── test_keep_going.py │ │ └── test_no_tuttlefile.py │ ├── test_extend_workflow.py │ ├── test_invalidate_command.py │ ├── test_invalidate_resource.py │ ├── test_keyboardinterrupt.py │ ├── test_preprocessors.py │ ├── test_resources │ │ ├── A │ │ └── test_file_resource.py │ ├── test_standard_behaviour.py │ ├── test_threshold.py │ └── tuttlefile ├── included_project.tuttle ├── test_addons │ ├── .tuttlepass │ ├── A │ ├── a_lib.py │ ├── bad_csv.csv │ ├── ftp │ │ └── ftp_resource │ ├── s3server.py │ ├── test.csv │ ├── test_csv_addon.py │ ├── test_ftp.py │ ├── test_hdfs.py │ ├── test_net_addon.py │ ├── test_odbc.py │ ├── test_postgres.py │ ├── test_pycurl.py │ ├── test_python.py │ ├── test_s3_addon.py │ ├── test_sqlite.py │ ├── tests.sqlite │ └── utf8.csv ├── test_authentication.py ├── test_figures_formating.py ├── test_file.py ├── test_log_follower.py ├── test_processors │ ├── test_bat_processor.py │ └── test_shell_processor.py ├── test_project_parser.py ├── test_report.py ├── test_run_parallel.py ├── test_workflow.py ├── test_workflow_builder.py └── utf8_file.txt └── tuttle ├── VERSION ├── __init__.py ├── addons ├── __init__.py ├── csv_addon.py ├── ftp.py ├── hdfs.py ├── net.py ├── netutils.py ├── odbc.py ├── postgres.py ├── python.py ├── s3.py ├── sqlite.py └── utf8.csv ├── cli_tuttle.py ├── cli_tuttle_extend_workflow.py ├── commands.py ├── error.py ├── extend_workflow.py ├── figures_formating.py ├── invalidation.py ├── log_follower.py ├── process.py ├── processors └── __init__.py ├── project_parser.py ├── report ├── __init__.py ├── dot_repport.py ├── html_report_assets │ ├── bootstrap.min.css │ ├── bootstrap.min.js │ ├── jquery.min.js │ └── viz.js ├── html_repport.py └── report_template.html ├── resource.py ├── tuttle_directories.py ├── utils.py ├── version.py ├── workflow.py ├── workflow_builder.py └── workflow_runner.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Python and ecosystem files 2 | *.pyc 3 | .coverage 4 | env_tuttle 5 | env 6 | env3 7 | cover 8 | build 9 | dist 10 | tuttle.egg-info 11 | 12 | # Files generated by executing tuttle projects 13 | .tuttle 14 | 15 | #Editors 16 | .idea 17 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | 2 | addons: 3 | postgresql: "9.2" 4 | 5 | services: 6 | - docker 7 | - postgresql 8 | 9 | language: python 10 | 11 | python: 12 | - '2.7' 13 | 14 | before_install: 15 | - sudo add-apt-repository ppa:dh-virtualenv/stable -y 16 | - sudo apt-get update -qq 17 | - sudo apt-get install -y --force-yes python python-pip python-virtualenv dh-virtualenv build-essential fakeroot devscripts 18 | debhelper 19 | # Install odbc 20 | - sudo apt-get install unixodbc-bin unixodbc odbc-postgresql unixodbc-dev 21 | - sudo odbcinst -i -d -f /usr/share/psqlodbc/odbcinst.ini.template 22 | - sudo odbcinst -i -s -l -n tuttle_test_db -f ci/odbc.ini 23 | 24 | 25 | install: 26 | - pip install -r requirements-dev.txt 27 | - psql -c 'CREATE DATABASE tuttle_test_db;' -U postgres 28 | 29 | 30 | script: 31 | # Running tests 32 | - pip install . 33 | - python tests/test_addons/test_hdfs.py install 34 | - . tests/hadoop/vars.sh 35 | - nosetests -v 36 | # Making .deb package 37 | - dpkg-buildpackage -us -uc 38 | - mkdir -p dist/debian 39 | - mv -v ../tuttle_* dist/debian/ 40 | - sudo dpkg -i dist/debian/tuttle_0.6-1_amd64.deb 41 | # Be sure the .deb package works 42 | - cd ci 43 | - tuttle run -j 3 44 | - cp world-cities.csv workflow_execution_success 45 | - cp from-python workflow_execution_success2 46 | - cp from-shell-or-batch workflow_execution_success3 47 | - cd .. 48 | # building docker image 49 | - cd docker 50 | - docker login -u "tuttle" -p "$DOCKER_PASS" 51 | - docker build -t tuttle/tuttle:master . 52 | # - docker tag tuttle/tuttle:master tuttle/tuttle:latest 53 | - docker tag tuttle/tuttle:master tuttle/tuttle:v0.6-dev2 54 | - docker push tuttle/tuttle 55 | - docker logout 56 | - cd .. 57 | 58 | deploy: 59 | skip_cleanup: true 60 | provider: releases 61 | api_key: 62 | secure: BmKAsspBpvzC1aSlwPHwDkuPYKEAirfFL9yWCoveMefH5c2TU2xurI5W1Nig2beW8JV54rcgvPAIYOMUHaEnMcFBix6G3ze7kbhIw8sFLNP2c7OiQrd6u7QY6BpsxyEq7Z2Ef6aajm1Nyj5H2AEUR63t2VFCilwWj2Wdzr0Rh/8= 63 | file: dist/debian/tuttle_0.6-1_amd64.deb 64 | skip_cleanup: true 65 | on: 66 | repo: lexman/tuttle 67 | tags: true 68 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | New on Version 0.6 2 | === 3 | 4 | ## Authentication 5 | * Access your protected resources by setting passwords in a .tuttlepass file 6 | 7 | ## Report 8 | * Dependency graph from left to right is easier to read than top to bottom 9 | * Logs can be accessed even if the process is not complete yet 10 | * Link to find definition of process that creates a resource 11 | * Nicer durations in hours, minutes, seconds 12 | 13 | ## Resources and processors 14 | * odbc resources and processor for handling any SQL database 15 | * ftp resources. Available for download processor 16 | * Download processor uses curl witch makes it more robust for long downloads 17 | * Download processor can have multiple inputs, in order to ensure downloading in a subdirectory 18 | * hdfs resources 19 | 20 | New on Version 0.5 21 | === 22 | 23 | ## Parallelism 24 | * Tuttle can now run several processes in parallel in respect to dependency order. For example, ``tuttle run --jobs 2`` will run your workflow with two workers. Default is still 1. 25 | * Live logs : you don't need anymore to wait until a process is complete to see the logs anymore. As soon as a line is complete it is displayed. 26 | * With ``--keep-going`` option, ``tuttle run`` doesn't stop at first error but tries to process as much as it can. Thus multiple failures *can* occur. Also running a failing process with ``keep-going`` will try to run all remaining processes 27 | 28 | ## Other 29 | * New ``check-integrity`` option validates that no resource have changed since tuttle have produced them 30 | * Two processes without outputs can't have exactly the same inputs because we can't make the difference between them 31 | * Error message in report for failing (pre-)processes 32 | * Version in report and in dumps so we can remember with which tuttle the data was crafted 33 | * Interrupting tuttle with ^C will set running processes in error as **aborted** 34 | 35 | ## Internals 36 | * Major refactoring of the invalidation system in order to make it easier to reason about 37 | * Only one call to ``exists()`` per resource and per run, because checking if an external piece of exist can be long. Also ``signature()`` is called maximum once because it can be *very* long 38 | * Be sure to terminate every process that might have been created by processors after running the workflow 39 | 40 | ## Bug fixes 41 | * Invalidation is now coherent for a processes without outputs : once it have succeeded, it won't run again 42 | * Fixed persistence of logs in the ``.tuttle`` directory when a process id changes (ie : when its position change in the tuttlefile) 43 | * ``--threshold`` now take into account duration of processes that don't create outputs 44 | 45 | ## Resources and processors 46 | * Running tuttle with a postgresql resource will fail with an explicit error message if it can't connect to the database instead of saying that resources don't exists 47 | 48 | New on Version 0.4 49 | === 50 | 51 | ## Parametric processes 52 | ... To describe a workflow according to a configuration file or a the content of a directory : 53 | * 'preprocesses' are run before the workflow is executed 54 | * you can add processes to a workflow with the new command ``tuttle-extend-workflow`` from a preprocesses 55 | * a new tutorial explains how it works in detail 56 | 57 | ## Other 58 | * coma is DEPRECATED to separate resources in dependency definitions. You should now use space instead 59 | * [docker images](https://hub.docker.com/r/tuttle/tuttle/) are available to use tuttle 60 | 61 | ## Bug fixes 62 | * escape process ids in the report 63 | * ``file://`` is not a valid resource 64 | * ``!shell`` does not stand for processor ``hell`` 65 | 66 | 67 | New on Version 0.3 68 | === 69 | 70 | ## New "include" statement 71 | ... To split a tuttle project in several files 72 | 73 | ## More documentation 74 | the reference lists all the resources and processors available 75 | 76 | ## New resources and processors : 77 | * PostgreSQL tables, views, functions and index resources 78 | * PostgreSQL Processor 79 | * https resources 80 | * AWS s3 resources (experimental) 81 | 82 | ## Better tests 83 | Part of tuttle's job is to connect to third party tools. Integration tests must cover these tools, like Postgresql or a web server... Two methods have been developed : 84 | * mock the third party tool with some python code (web server, s3 server) 85 | * use the third party tool if it is installed on the machine (postgresql) 86 | 87 | ## A few bug fixes 88 | * bug on install that required jinja2 before installing dependencies 89 | 90 | New on Version 0.2 91 | === 92 | 93 | ## New resources and processors : 94 | * SQLite tables, views, triggers and index resources 95 | * SQLite Processor 96 | * http resources 97 | * download processor 98 | * Pyton Processor 99 | 100 | ## A few bug fixes 101 | 102 | ## And a tutorial as the first step to the doc ! 103 | 104 | 105 | V0.1 : first official release 106 | === 107 | The goal of 0.1 is to show the intended usage of tuttle, in term of command line workflow. -------------------------------------------------------------------------------- /LICENCE.txt: -------------------------------------------------------------------------------- 1 | MIT License (MIT) 2 | 3 | Copyright (c) 2015 Alexandre Bonnasseau aka Lexman 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # News 2 | April 7th : Version 0.6 Alpha 2 is [available](https://github.com/lexman/tuttle/releases/tag/v0.6-dev2) ! 3 | 4 | # Tuttle : Make for data 5 | 6 | 7 | This tool has been designed to help you create data as a team in an industrial environment, with reliability in mind. 8 | 9 | Whether you change the scripts, merge your work with teammate's, checkout another branch of code, Tuttle will re-compute the data for you, but only the part that changed. 10 | Most of all, Tuttle GUARANTIES the result you expect from your source files, every time you run it, on every plateform. 11 | 12 | 13 | # Syntax 14 | 15 | Here's an example of the syntax of tuttle : this projects aims a finding the importance of 16 | each musketeer in the novel *The Three Musketeers*. The text has to be extracted from a zip file, 17 | and the whole workflow should produce a png bar graph and a csv file you can import in our favorite 18 | spreadsheet software : 19 | 20 | file://Les_trois_mousquetaires.txt <- file://Les_trois_mousquetaires.zip 21 | # extracts the text of the novel from the archive 22 | unzip Les_trois_mousquetaires.zip Les_trois_mousquetaires.txt 23 | 24 | file://characters_count.dat <- file://Les_trois_mousquetaires.txt ! python 25 | # reads the text and counts the occurrences of each musketeer (comparisons 26 | # are made in lower case to avoid surprises !) 27 | # -*- coding: utf8 -*- 28 | names = ["Athos", "Porthos", "Aramis", "d'Artagnan"] 29 | with open('characters_count.dat', 'w') as f_out: 30 | with open('Les_trois_mousquetaires.txt') as f_in: 31 | content_low = f_in.read().lower() 32 | for name in names: 33 | name_low = name.lower() 34 | f_out.write("{}\t{}\n".format(name, content_low.count(name_low))) 35 | 36 | file://characters_count.csv <- file://characters_count.dat 37 | # Creates a file readable by a spreadsheet software : 38 | # * add quotes around the name of the character 39 | # * add Windows style new lines 40 | awk '{print "\""$1"\","$2"\r"}' characters_count.dat > characters_count.csv 41 | 42 | file://characters_count.png <- file://characters_count.dat 43 | # Plot the data with gnuplot. You need to have gnuplot installed 44 | gnuplot <<$script$ 45 | set terminal png 46 | set output "characters_count.png" 47 | plot "characters_count.dat" using 2: xtic(1) with histogram 48 | $script$ 49 | 50 | 51 | When you run this project, you get a [report](http://lexman.github.io/tuttle/docs/examples/musketeers_tuttle_dir/report.html) of every 52 | thing that has been run, when, whether it succeeded, an access to the logs, and... A nice dependency graph ! 53 | 54 | ![Dependency graph](docs/screenshot_report.png) 55 | 56 | You'll find details on this workflow on the dedicated [tutorial](docs/tutorial_musketeers/tutorial.md). 57 | 58 | Please note that Tuttle is at a very early stage of development and must be considered as alpha, therefore syntax as 59 | well as command line options are likely to change. 60 | 61 | 62 | # Install 63 | You can find [download tuttle](https://github.com/lexman/tuttle/releases) and install it on your system : 64 | 65 | * on Windows, download the .msi installer 66 | * on debian and ubuntu a .deb is provided 67 | * on other systems, you need to install [python 2.7](https://www.python.org/downloads/release) and install tuttle from the sources : 68 | ``` 69 | git clone https://github.com/lexman/tuttle 70 | cd tuttle 71 | python setup.py install 72 | ``` 73 | 74 | # Hacking 75 | 76 | 77 | [![AppVeyor Windows build status](https://ci.appveyor.com/api/projects/status/github/lexman/tuttle)](https://ci.appveyor.com/project/lexman/tuttle) 78 | [![Travis Linux build status](https://travis-ci.org/lexman/tuttle.png)](https://travis-ci.org/lexman/tuttle) 79 | 80 | Tuttle is a python project you can download and install : 81 | 82 | git clone https://github.com/lexman/tuttle 83 | cd tuttle 84 | python setup.py install 85 | 86 | 87 | 88 | Contributions are very welcome through pull request. You can contribute to : 89 | * documentation : formal doc, tutorials 90 | * code : improve tuttle kernel, add new extensions : spreadshits, mongodb, hdfs, etc. Code have to come with tests and documentation 91 | * tests : use Tuttle for your projects and report bugs 92 | * syntax : help define the perfect way to describe workflows 93 | * design : please help improve the look of the [report](http://lexman.github.io/tuttle/docs/sales_assets/tuttle_report.html) to ease readability ! 94 | -------------------------------------------------------------------------------- /appveyor/appveyor.yml: -------------------------------------------------------------------------------- 1 | environment: 2 | 3 | matrix: 4 | - PYTHON: "C:\\Python27" 5 | PYTHON_VERSION: "2.7.9" 6 | PYTHON_ARCH: "32" 7 | PSYCOPG2: "psycopg2-2.6.1.win32-py2.7-pg9.4.4-release.exe" 8 | 9 | - PYTHON: "C:\\Python27-x64" 10 | PYTHON_VERSION: "2.7.9" 11 | PYTHON_ARCH: "64" 12 | PSYCOPG2: "psycopg2-2.6.1.win-amd64-py2.7-pg9.4.4-release.exe" 13 | 14 | install: 15 | 16 | # Install the build dependencies of the project. If some dependencies contain 17 | # compiled extensions and are not provided as pre-built wheel packages, 18 | # pip will build them from source using the MSVC compiler matching the 19 | # target Python version and architecture 20 | - python -m pip install -r requirements-dev.txt" 21 | - python --version 22 | - python -m pip freeze 23 | 24 | # Install windows version of psycopg2 from http://www.stickpeople.com/projects/python/win-psycopg/ 25 | - ps: "Invoke-WebRequest http://www.stickpeople.com/projects/python/win-psycopg/2.6.1/$env:PSYCOPG2 -OutFile $env:PSYCOPG2" 26 | - SET PATH=%PYTHON%;%PYTHON%\Scripts;%PATH% 27 | - easy_install %PSYCOPG2% 28 | 29 | build: false # Not a C# project, build stuff at the test step instead. 30 | 31 | test_script: 32 | 33 | # Install the dependencies for the project 34 | - python -m pip install -r requirements-dev.txt" 35 | - python -m pip install ." 36 | - python -m pip freeze 37 | # Build the compiled extension and run the project tests 38 | - nosetests 39 | 40 | # If tests are successful, create an installable package for the project. 41 | - python package4win.py bdist_msi 42 | 43 | # cleanup 44 | - python -m pip uninstall -y tuttle 45 | 46 | # ... And check that tuttle works 47 | # Ensure we know exactly the name of the installer, on every platform (amd64 and x86) 48 | # http://stackoverflow.com/questions/14704333/using-wildard-with-dos-copy-command-corrupts-destination-file 49 | - echo f | xcopy dist\tuttle-*.msi dist\tuttle-installer.msi 50 | - cd dist 51 | - msiexec /i tuttle-installer.msi /qn /norestart /log C:\projects\install_tuttle.log 52 | - "PATH=%PATH%;\"C:\\Program Files (x86)\\tuttle\";\"C:\\Program Files\\tuttle\"" 53 | - cmd: more C:\projects\install_tuttle.log 54 | - cmd: cd C:\projects\tuttle\ci 55 | - cmd: tuttle -h 56 | - cmd: tuttle run -j 3 57 | - cmd: copy world-cities.csv workflow_execution_success.html 58 | - cmd: copy from-python workflow_execution_success2.html 59 | - cmd: copy from-shell-or-batch workflow_execution_success3.html 60 | 61 | artifacts: 62 | # Archive the generated package in the ci.appveyor.com build report. 63 | - path: dist\* 64 | 65 | #on_success: 66 | # - TODO: upload the content of dist/*.msi to github releases 67 | deploy: 68 | description: 'Version $(appveyor_repo_tag_name)' 69 | provider: GitHub 70 | auth_token: 71 | secure: zoaeXycWHS3rNbTzM9C5TshvVEp6hzXDPrqHK1aBdxiXr5aJd8gOHfJUz5XYrLgu 72 | artifact: /tuttle-0.*\.msi/ 73 | draft: true 74 | prerelease: true 75 | force_update: true 76 | on: 77 | appveyor_repo_tag: true # deploy on tag push only -------------------------------------------------------------------------------- /bin/tuttle: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf8 -*- 3 | 4 | import sys 5 | from multiprocessing import freeze_support 6 | from os.path import abspath, dirname, join 7 | 8 | if getattr(sys, 'frozen', False): 9 | # frozen 10 | tuttle_module = join(dirname(abspath(sys.executable)), '..', '..', 'tuttle') 11 | else: 12 | # unfrozen 13 | tuttle_module = join(dirname(abspath(__file__)), '..', '..', 'tuttle') 14 | sys.path.insert(0, tuttle_module) 15 | 16 | from tuttle.cli_tuttle import tuttle_main 17 | 18 | if __name__ == '__main__': 19 | freeze_support() 20 | sys.exit(tuttle_main()) -------------------------------------------------------------------------------- /bin/tuttle-extend-workflow: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf8 -*- 3 | 4 | import sys 5 | from os.path import abspath, dirname, join 6 | 7 | if getattr(sys, 'frozen', False): 8 | # frozen 9 | tuttle_module = join(dirname(abspath(sys.executable)), '..', '..', 'tuttle') 10 | else: 11 | # unfrozen 12 | tuttle_module = join(dirname(abspath(__file__)), '..', '..', 'tuttle') 13 | sys.path.insert(0, tuttle_module) 14 | 15 | from tuttle.cli_tuttle_extend_workflow import tuttle_extend_workflow_main 16 | 17 | if __name__ == '__main__': 18 | sys.exit(tuttle_extend_workflow_main()) -------------------------------------------------------------------------------- /ci/odbc.ini: -------------------------------------------------------------------------------- 1 | [tuttle_test_db] 2 | Description = PostgreSQL 3 | Driver = PostgreSQL ANSI 4 | Trace = No 5 | TraceFile = /tmp/psqlodbc.log 6 | Database = tuttle_test_db 7 | Servername = localhost 8 | UserName = 9 | Password = 10 | Port = 11 | ReadOnly = Yes 12 | RowVersioning = No 13 | ShowSystemTables = No 14 | ShowOidColumn = No 15 | FakeOidIndex = No 16 | ConnSettings = 17 | -------------------------------------------------------------------------------- /ci/test_with_coverage.bat: -------------------------------------------------------------------------------- 1 | nosetests --with-coverage --cover-html --cover-package=tuttle 2 | -------------------------------------------------------------------------------- /ci/touch-x.tuttle: -------------------------------------------------------------------------------- 1 | file://{{ x }} <- 2 | echo {{ x }} > {{ x }} 3 | -------------------------------------------------------------------------------- /debian/changelog: -------------------------------------------------------------------------------- 1 | tuttle (0.6-1) UNRELEASED; urgency=medium 2 | 3 | * Access your protected resources by setting passwords in a .tuttlepass file for authentication 4 | * Dependency graph from left to right is easier to read than top to bottom 5 | * Logs can be accessed from report even if the process is not complete yet 6 | * Link in report to find definition of process that creates a resource 7 | * Nicer durations in hours, minutes, seconds 8 | * odbc resources and processor for handling any SQL database 9 | * ftp resources. Available for download processor 10 | * Download processor uses curl witch makes it more robusts for long downloads 11 | * Download processor can have multiple inputs, in order to ensure downloading in a subdirectory 12 | 13 | -- Lexman Sun, 01 Apr 2018 17:00:00 +0200 14 | 15 | tuttle (0.5-1) UNRELEASED; urgency=medium 16 | 17 | * TBD... 18 | * Run several processes in parallel with --jobs option. 19 | * Live logs : as soon as a line is complete it is displayed. 20 | * Don't stop at first error with --keep-going 21 | * --check-integrity option validates that no resource have changed since tuttle have produced them 22 | * Two processes without outputs can't have exactly the same inputs because we can't make the difference between them 23 | * Error message in report for failing (pre-)processes 24 | * Version in report and in dumps so we can remember with which tuttle the data was crafted 25 | * Interrupting tuttle with ^C will set running processes in error as aborted 26 | * Major refactoring of the invalidation system in order to make it easier to reason about 27 | * Only one call to exists() per resource and per run and no more than one call to signature(), because those calls can be long 28 | * Be sure to terminate every sub-process after running the workflow 29 | * Invalidation is now coherent for a processes without outputs : once it have succeeded, it won't run again 30 | * Fixed persistence of logs in the ``.tuttle`` directory when a process id changes (ie : when its position change in the tuttlefile) 31 | * --threshold now take into account duration of processes that don't create outputs 32 | * Running tuttle with a postgresql resource will fail with an explicit error message if it can't connect to the database instead of saying that resources don't exists 33 | 34 | -- Lexman Sun, 06 Jul 2017 17:00:00 +0200 35 | 36 | 37 | tuttle (0.4-1) UNRELEASED; urgency=medium 38 | 39 | * Preprocesses are run before the workflow is executed 40 | * Parametric processes can be added to a workflow with the new command ``tuttle-extend-workflow`` from a preprocesses 41 | * A new tutorial explains how to declare parametric processes in detail 42 | * Coma is DEPRECATED in favor of space to separate resources in dependency definitions 43 | * Docker image 44 | * Bugfixes 45 | 46 | -- Lexman Thu, 01 Jun 2016 17:00:00 -0500 47 | 48 | 49 | tuttle (0.3-1) UNRELEASED; urgency=medium 50 | 51 | * New "include" statement in order to split a tuttle project in several files 52 | * New PostgreSQL resource covering tables, views, functions and index 53 | * New PostgreSQL Processor 54 | * New https resources 55 | * New AWS s3 resources (experimental) 56 | * Better integration tests with external tool 57 | * A complete documentation referencing all available processors and resources 58 | * Fixed bug on install that required jinja2 even before running setup.py 59 | 60 | 61 | -- Lexman Mon, 30 Nov 2015 09:04:00 +0700 62 | 63 | tuttle (0.2-1) UNRELEASED; urgency=medium 64 | 65 | * File resources 66 | * SQLite tables, views, triggers and index resources 67 | * http resources 68 | * Pyton Processor 69 | * SQLite Processor 70 | * Tutorial 71 | 72 | -- Lexman Wed, 19 Aug 2015 15:09:48 +0200 73 | 74 | tuttle (0.1-1) unstable; urgency=low 75 | 76 | * Initial release 77 | * Demonstration of development flow with tuttle commands 78 | 79 | -- Lexman Fri, 22 May 2015 17:00:00 +0200 80 | 81 | -------------------------------------------------------------------------------- /debian/compat: -------------------------------------------------------------------------------- 1 | 9 -------------------------------------------------------------------------------- /debian/control: -------------------------------------------------------------------------------- 1 | Source: tuttle 2 | Section: python 3 | Priority: extra 4 | Maintainer: Lexman 5 | Build-Depends: debhelper (>= 9), python 6 | Standards-Version: 3.9.5 7 | 8 | Package: tuttle 9 | Architecture: any 10 | Pre-Depends: dpkg (>= 1.16.1), python2.7, ${misc:Pre-Depends} 11 | Depends: ${python:Depends}, ${misc:Depends} 12 | Description: Make for data 13 | Reliably create data from different sources. Work as a team in an industrial environment... 14 | A tool for continuous data processing -------------------------------------------------------------------------------- /debian/rules: -------------------------------------------------------------------------------- 1 | #!/usr/bin/make -f 2 | 3 | override_dh_virtualenv: 4 | dh_virtualenv --extra-pip-arg='--no-binary' --extra-pip-arg=':all:' 5 | %: 6 | dh $@ --with python-virtualenv -------------------------------------------------------------------------------- /debian/tuttle.links: -------------------------------------------------------------------------------- 1 | /usr/share/python/tuttle/bin/tuttle /usr/bin/tuttle -------------------------------------------------------------------------------- /debian/tuttle.triggers: -------------------------------------------------------------------------------- 1 | # Register interest in Python interpreter changes (Python 2 for now); and 2 | # don't make the Python package dependent on the virtualenv package 3 | # processing (noawait) 4 | interest-noawait /usr/bin/python2.7 5 | 6 | # Also provide a symbolic trigger for all dh-virtualenv packages 7 | interest dh-virtualenv-interpreter-update 8 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:xenial 2 | MAINTAINER Lexman 3 | RUN apt-get update && apt-get install -y python python-psycopg2 postgresql-client python-pip libcurl4-openssl-dev unixodbc-dev libssl-dev 4 | RUN pip install --upgrade pip 5 | RUN pip install https://github.com/lexman/tuttle/archive/master.zip 6 | RUN chmod +x /usr/local/bin/tuttle* 7 | VOLUME ["/project"] 8 | WORKDIR /project -------------------------------------------------------------------------------- /dockerfile4deb: -------------------------------------------------------------------------------- 1 | FROM debian:jessie 2 | 3 | LABEL Description="This image is used to build a debian package of tuttle" Version="0.1" 4 | 5 | RUN mkdir project_tuttle mkdir project_tuttle/tuttle && apt-get update && apt-get install -y python python-pip python-virtualenv dh-virtualenv debhelper 6 | 7 | ADD tuttle project_tuttle/tuttle/tuttle/ 8 | ADD bin project_tuttle/tuttle/bin/ 9 | ADD debian project_tuttle/tuttle/debian/ 10 | ADD setup.py requirements.txt project_tuttle/tuttle/ 11 | 12 | WORKDIR project_tuttle/tuttle/ 13 | 14 | RUN virtualenv env_tuttle && . env_tuttle/bin/activate && pip install -r requirements.txt 15 | 16 | VOLUME ["/result"] 17 | 18 | CMD . env_tuttle/bin/activate && dpkg-buildpackage -us -uc && ls && ls .. && cp ../* /result -------------------------------------------------------------------------------- /docs/examples/musketeers/Les_trois_mousquetaires.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lexman/tuttle/dab07db4a1e3e18c876deb2897c07be3935acd60/docs/examples/musketeers/Les_trois_mousquetaires.zip -------------------------------------------------------------------------------- /docs/examples/musketeers/tuttlefile: -------------------------------------------------------------------------------- 1 | file://Les_trois_mousquetaires.txt <- file://Les_trois_mousquetaires.zip 2 | # extracts the text of the novel from the archive 3 | unzip Les_trois_mousquetaires.zip Les_trois_mousquetaires.txt 4 | 5 | file://characters_count.dat <- file://Les_trois_mousquetaires.txt !# python 6 | # reads the text and counts the occurrences of each musketeer (comparaisons 7 | # are made in lower case to avoid surprises !) 8 | names = ["Athos", "Porthos", "Aramis", "d'Artagnan"] 9 | with open('characters_count.dat', 'w') as f_out: 10 | with open('Les_trois_mousquetaires.txt') as f_in: 11 | content_low = f_in.read().lower() 12 | print("{} chars in the novel".format(len(content_low))) 13 | for name in names: 14 | name_low = name.lower() 15 | f_out.write("{}\t{}\n".format(name, content_low.count(name_low))) 16 | print("{} - done".format(name)) 17 | 18 | file://characters_count.csv <- file://characters_count.dat 19 | # Creates a file readable by a spreadsheet software : 20 | # * add quotes around the name of the character 21 | # * add Windows style new lines 22 | awk '{print "\""$1"\","$2"\r"}' characters_count.dat > characters_count.csv 23 | 24 | file://characters_count.png <- file://characters_count.dat 25 | # Plot the data with gnuplot. You need to have gnuplot installed 26 | gnuplot <<$script$ 27 | set terminal png 28 | set output "characters_count.png" 29 | plot "characters_count.dat" using 2: xtic(1) with histogram 30 | $script$ 31 | -------------------------------------------------------------------------------- /docs/examples/musketeers_tuttle_dir/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Lexman_2' 2 | -------------------------------------------------------------------------------- /docs/examples/musketeers_tuttle_dir/last_workflow.pickle: -------------------------------------------------------------------------------- 1 | (ituttle.workflow 2 | Workflow 3 | p0 4 | (dp1 5 | S'_resources' 6 | p2 7 | (dp3 8 | Vfile://characters_count.dat 9 | p4 10 | ccopy_reg 11 | _reconstructor 12 | p5 13 | (ctuttle.resources 14 | FileResource 15 | p6 16 | c__builtin__ 17 | object 18 | p7 19 | Ntp8 20 | Rp9 21 | (dp10 22 | S'url' 23 | p11 24 | g4 25 | sS'creator_process' 26 | p12 27 | (ituttle.process 28 | Process 29 | p13 30 | (dp14 31 | S'log_stderr' 32 | p15 33 | S'.tuttle/processes/logs/tuttlefile_5_err.txt' 34 | p16 35 | sS'success' 36 | p17 37 | I01 38 | sS'_outputs' 39 | p18 40 | (lp19 41 | g9 42 | asS'_filename' 43 | p20 44 | S'tuttlefile' 45 | p21 46 | sS'_id' 47 | p22 48 | S'tuttlefile_5' 49 | p23 50 | sS'log_stdout' 51 | p24 52 | S'.tuttle/processes/logs/tuttlefile_5_stdout.txt' 53 | p25 54 | sS'_processor' 55 | p26 56 | (ituttle.extensions.python 57 | PythonProcessor 58 | p27 59 | (dp28 60 | bsS'_code' 61 | p29 62 | Vnames = ["Athos", "Porthos", "Aramis", "d'Artagnan"]\u000awith open('characters_count.dat', 'w') as f_out:\u000a with open('Les_trois_mousquetaires.txt') as f_in:\u000a content_low = f_in.read().lower()\u000a print("{} chars in the novel".format(len(content_low)))\u000a for name in names:\u000a name_low = name.lower()\u000a f_out.write("{}\u005ct{}\u005cn".format(name, content_low.count(name_low)))\u000a print("{} - done".format(name))\u000a 63 | p30 64 | sS'_end' 65 | p31 66 | F1439306962.595049 67 | sS'_line_num' 68 | p32 69 | I5 70 | sS'_inputs' 71 | p33 72 | (lp34 73 | g5 74 | (g6 75 | g7 76 | Ntp35 77 | Rp36 78 | (dp37 79 | g11 80 | Vfile://Les_trois_mousquetaires.txt 81 | p38 82 | sg12 83 | (ituttle.process 84 | Process 85 | p39 86 | (dp40 87 | g15 88 | S'.tuttle/processes/logs/tuttlefile_1_err.txt' 89 | p41 90 | sg17 91 | I01 92 | sg18 93 | (lp42 94 | g36 95 | asg20 96 | g21 97 | sg22 98 | S'tuttlefile_1' 99 | p43 100 | sg24 101 | S'.tuttle/processes/logs/tuttlefile_1_stdout.txt' 102 | p44 103 | sg26 104 | (ituttle.processors 105 | ShellProcessor 106 | p45 107 | (dp46 108 | bsg29 109 | Vunzip Les_trois_mousquetaires.zip Les_trois_mousquetaires.txt\u000a 110 | p47 111 | sg31 112 | F1439306962.01609 113 | sg32 114 | I1 115 | sg33 116 | (lp48 117 | g5 118 | (g6 119 | g7 120 | Ntp49 121 | Rp50 122 | (dp51 123 | g11 124 | Vfile://Les_trois_mousquetaires.zip 125 | p52 126 | sg12 127 | NsS'_path' 128 | p53 129 | V/media/sf_shared/musketeers/Les_trois_mousquetaires.zip 130 | p54 131 | sbasS'_start' 132 | p55 133 | F1439306961.805367 134 | sbsg53 135 | V/media/sf_shared/musketeers/Les_trois_mousquetaires.txt 136 | p56 137 | sbasg55 138 | F1439306962.190533 139 | sbsg53 140 | V/media/sf_shared/musketeers/characters_count.dat 141 | p57 142 | sbsVfile://characters_count.csv 143 | p58 144 | g5 145 | (g6 146 | g7 147 | Ntp59 148 | Rp60 149 | (dp61 150 | g11 151 | g58 152 | sg12 153 | (ituttle.process 154 | Process 155 | p62 156 | (dp63 157 | g15 158 | S'.tuttle/processes/logs/tuttlefile_18_err.txt' 159 | p64 160 | sg17 161 | I01 162 | sg18 163 | (lp65 164 | g60 165 | asg20 166 | g21 167 | sg22 168 | S'tuttlefile_18' 169 | p66 170 | sg24 171 | S'.tuttle/processes/logs/tuttlefile_18_stdout.txt' 172 | p67 173 | sg26 174 | g45 175 | sg29 176 | Vawk '{print "\u005c""$1"\u005c","$2"\u005cr"}' characters_count.dat > characters_count.csv\u000a 177 | p68 178 | sg31 179 | F1439306962.805264 180 | sg32 181 | I18 182 | sg33 183 | (lp69 184 | g9 185 | asg55 186 | F1439306962.742407 187 | sbsg53 188 | V/media/sf_shared/musketeers/characters_count.csv 189 | p70 190 | sbsg38 191 | g36 192 | sg52 193 | g50 194 | sVfile://characters_count.png 195 | p71 196 | g5 197 | (g6 198 | g7 199 | Ntp72 200 | Rp73 201 | (dp74 202 | g11 203 | g71 204 | sg12 205 | (ituttle.process 206 | Process 207 | p75 208 | (dp76 209 | g15 210 | S'.tuttle/processes/logs/tuttlefile_24_err.txt' 211 | p77 212 | sg17 213 | I01 214 | sg18 215 | (lp78 216 | g73 217 | asg20 218 | g21 219 | sg22 220 | S'tuttlefile_24' 221 | p79 222 | sg24 223 | S'.tuttle/processes/logs/tuttlefile_24_stdout.txt' 224 | p80 225 | sg26 226 | g45 227 | sg29 228 | Vgnuplot <<$script$\u000aset terminal png\u000aset output "characters_count.png"\u000aplot "characters_count.dat" using 2: xtic(1) with histogram\u000a$script$\u000a 229 | p81 230 | sg31 231 | F1439306963.7995 232 | sg32 233 | I24 234 | sg33 235 | (lp82 236 | g9 237 | asg55 238 | F1439306962.945624 239 | sbsg53 240 | V/media/sf_shared/musketeers/characters_count.png 241 | p83 242 | sbssS'_processes' 243 | p84 244 | (lp85 245 | g39 246 | ag13 247 | ag62 248 | ag75 249 | asS'_resources_signatures' 250 | p86 251 | (dp87 252 | g4 253 | S'sha1:75b6ca99c53b6c151e9cd79ec4a6d4eeb7c66ab6' 254 | p88 255 | sg58 256 | S'sha1:61d837e200557de8ea11c4d5a745c5aca4cf54a0' 257 | p89 258 | sg38 259 | S'sha1:bcb47f4f2b104bc8a1bee9e4a274f47fa3bf8c81' 260 | p90 261 | sg52 262 | S'sha1:f01049d489c01d071498d5bd2468fcab88cf3ff8' 263 | p91 264 | sg71 265 | S'sha1:79bf3009505e207ce525b5a50cb41eca01c49a04' 266 | p92 267 | ssb. -------------------------------------------------------------------------------- /docs/examples/musketeers_tuttle_dir/processes/logs/tuttlefile_18_err.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lexman/tuttle/dab07db4a1e3e18c876deb2897c07be3935acd60/docs/examples/musketeers_tuttle_dir/processes/logs/tuttlefile_18_err.txt -------------------------------------------------------------------------------- /docs/examples/musketeers_tuttle_dir/processes/logs/tuttlefile_18_stdout.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lexman/tuttle/dab07db4a1e3e18c876deb2897c07be3935acd60/docs/examples/musketeers_tuttle_dir/processes/logs/tuttlefile_18_stdout.txt -------------------------------------------------------------------------------- /docs/examples/musketeers_tuttle_dir/processes/logs/tuttlefile_1_err.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lexman/tuttle/dab07db4a1e3e18c876deb2897c07be3935acd60/docs/examples/musketeers_tuttle_dir/processes/logs/tuttlefile_1_err.txt -------------------------------------------------------------------------------- /docs/examples/musketeers_tuttle_dir/processes/logs/tuttlefile_1_stdout.txt: -------------------------------------------------------------------------------- 1 | Archive: Les_trois_mousquetaires.zip 2 | inflating: Les_trois_mousquetaires.txt 3 | -------------------------------------------------------------------------------- /docs/examples/musketeers_tuttle_dir/processes/logs/tuttlefile_24_err.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lexman/tuttle/dab07db4a1e3e18c876deb2897c07be3935acd60/docs/examples/musketeers_tuttle_dir/processes/logs/tuttlefile_24_err.txt -------------------------------------------------------------------------------- /docs/examples/musketeers_tuttle_dir/processes/logs/tuttlefile_24_stdout.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lexman/tuttle/dab07db4a1e3e18c876deb2897c07be3935acd60/docs/examples/musketeers_tuttle_dir/processes/logs/tuttlefile_24_stdout.txt -------------------------------------------------------------------------------- /docs/examples/musketeers_tuttle_dir/processes/logs/tuttlefile_5_err.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lexman/tuttle/dab07db4a1e3e18c876deb2897c07be3935acd60/docs/examples/musketeers_tuttle_dir/processes/logs/tuttlefile_5_err.txt -------------------------------------------------------------------------------- /docs/examples/musketeers_tuttle_dir/processes/logs/tuttlefile_5_stdout.txt: -------------------------------------------------------------------------------- 1 | 1389543 chars in the novel 2 | Athos - done 3 | Porthos - done 4 | Aramis - done 5 | d'Artagnan - done 6 | -------------------------------------------------------------------------------- /docs/examples/musketeers_tuttle_dir/processes/tuttlefile_1: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | set -e 3 | unzip Les_trois_mousquetaires.zip Les_trois_mousquetaires.txt 4 | -------------------------------------------------------------------------------- /docs/examples/musketeers_tuttle_dir/processes/tuttlefile_18: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | set -e 3 | awk '{print "\""$1"\","$2"\r"}' characters_count.dat > characters_count.csv 4 | -------------------------------------------------------------------------------- /docs/examples/musketeers_tuttle_dir/processes/tuttlefile_24: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | set -e 3 | gnuplot <<$script$ 4 | set terminal png 5 | set output "characters_count.png" 6 | plot "characters_count.dat" using 2: xtic(1) with histogram 7 | $script$ 8 | -------------------------------------------------------------------------------- /docs/examples/musketeers_tuttle_dir/processes/tuttlefile_5/tuttlefile_5.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf8 -*- 2 | from os import getcwd as __get_current_dir__ 3 | from sys import path as __python__path__ 4 | __python__path__.append(__get_current_dir__()) 5 | names = ["Athos", "Porthos", "Aramis", "d'Artagnan"] 6 | with open('characters_count.dat', 'w') as f_out: 7 | with open('Les_trois_mousquetaires.txt') as f_in: 8 | content_low = f_in.read().lower() 9 | print("{} chars in the novel".format(len(content_low))) 10 | for name in names: 11 | name_low = name.lower() 12 | f_out.write("{}\t{}\n".format(name, content_low.count(name_low))) 13 | print("{} - done".format(name)) 14 | -------------------------------------------------------------------------------- /docs/how_to_make_a_release.md: -------------------------------------------------------------------------------- 1 | # How to make a release of tuttle 2 | 3 | 4 | 1. Make sure the changelog (CHANGELOG.MD) is up to date 5 | 1. Make sure the ```Build version format``` in [Appveyor](https://ci.appveyor.com/project/lexman/tuttle/settings) is up to date with the intended release version, eg ```0.3-{build}``` 6 | 1. Change the version number in file ```tuttle/VERSION```, eg ```0.3-rc0``` 7 | 1. Change the version number of docker image in file ``.travis.yml``, eg ``- docker tag tuttle/tuttle:master tuttle/tuttle:v0.4`` 8 | 1. Add ``- docker tag tuttle/tuttle:master tuttle/tuttle:latest`` to the ``.travis.yml`` file 9 | 1. Update the debian version of the package : 10 | * Add the new version and the changelog in the file debian/changelog 11 | * Update travis.yml to follow the new version number in the package name. Package name is in 12 | - the name of the file to push to github release (line ```file: dist/debian/tuttle_0.3-1_amd64.deb```) 13 | - the command line to deploy the package to test (line ```- sudo dpkg -i dist/debian/tuttle_0.3-1_amd64.deb```) 14 | 1. Make sure everything has been pushed and all the tests pass on both [Appveyor](https://ci.appveyor.com/project/lexman/tuttle) (windows) and [Travis](travis-ci.org/lexman/tuttle) 15 | 1. Create a new RC tag for the intended version, eg ```v0.3-rc0``` 16 | 1. Push the tag : 17 | * Github will create a new release 18 | * Travis will make the deb64 package and push it to github release 19 | * Appveyor will create the win32 and win64 packages and push them to github release 20 | 1. Finish the release on github : 21 | * Compile a deb32 package on a debian environment and upload it to Github release 22 | * Copy and Paste the release changelog to Github release 23 | 24 | 1. Make new RCs until ready... And eventually make a final version. 25 | 1. Make the version in ```tuttle/VERSION``` ready for next release, eg ```0.4-pre``` 26 | 1. Make ```Build version format``` in (Appveyor)[https://ci.appveyor.com/project/lexman/tuttle/settings] ready for next release, eg ```0.4-{build}``` 27 | 1. Remove ``- docker tag tuttle/tuttle:master tuttle/tuttle:latest`` to the ``.travis.yml`` file 28 | 1. Change the version number of docker image in file ``.travis.yml``, eg ``- docker tag tuttle/tuttle:master tuttle/tuttle:v0.4-pre`` 29 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | # Overview 2 | 3 | 4 | The best way to discover tuttle is the [main tutorial](tutorial_musketeers/tutorial.md) : counting the importance of each musketeer in the novel 5 | 'The Three Musketeers' : python + awk + gnuplot. It explains in deep details how to use tuttle to work smoothly. 6 | 7 | If you're familiar with `make`, the article [Make vs tuttle](http://okfnlabs.org/blog/2016/03/25/make-vs-tuttle.html) on OKFN labs is also a good introduction. 8 | 9 | Once your familiar with tuttle's workflow, you can find the details of processors and url schemes in the 10 | [tuttlefile reference](reference/tuttlefile_syntax.md) 11 | 12 | Some tuttle projects of interest : 13 | * https://github.com/datasets/world-cities/blob/master/scripts/tuttlefile : example of ``sqlite://`` resources and ``sqlite`` processor 14 | * https://github.com/lexman/carte-de-mon-departement : example of parametrized workflow 15 | 16 | If you're stuck because you don't know all your inputs at the time of writing your tuttlefile, for exemple because you want to process all the files 17 | from a directory, you can learn how to use [parametrized workflows](tuto_parametrized_workflow/tuto_parametrized_workflow.MD). -------------------------------------------------------------------------------- /docs/reference/index.md: -------------------------------------------------------------------------------- 1 | # tuttlefile reference 2 | -------------------------------------------------------------------------------- /docs/reference/processors.md: -------------------------------------------------------------------------------- 1 | ## Processors 2 | 3 | ### shell 4 | ``shell`` is the default processor on *nix systems (e.g. Linux). The code is interpreted as a shell script which stops at 5 | the first error. 6 | 7 | ### bat 8 | ``bat`` is the default processor on windows. The code is interpreted as a batch script which stops at the first error. 9 | 10 | ### python 11 | The ``python`` processor runs the code as a python 2.7 script 12 | 13 | ### SQLite 14 | The ``sqlite`` processor is valid only if all input and output resources are ``sqlite://`` resources from the same 15 | database file. The processor will run the sql code inside that database. 16 | 17 | ### PostgreSQL 18 | The ``postgresql`` processor is valid only if all input and output resources are ``pg://`` resources from the same 19 | database file. The processor will run the sql code inside that database. 20 | 21 | ### download 22 | The ``download`` processor is valid only if it has one ``http://``, ``https://`` or ``ftp;//resource`` as input and one ``file://`` 23 | resource as output. The processor will download the resource and save it in the file. 24 | 25 | ### csv2sqlite 26 | The ``csv2sqlite`` processor is valid only if it has one ``file://`` resource as input and ``sqlite://`` resource as 27 | output. If the file is a valid CSV file, the processor will load it inside the ouput table, using the first line of 28 | the csv file as column names. 29 | 30 | ### Future plans 31 | The official list of requested processors is available as [github issues](https://github.com/lexman/tuttle/issues?q=is%3Aopen+is%3Aissue+label%3Aprocessor) 32 | 33 | NB : A lot of other magic transfer processor, like download and csv2sqlite are planned for the future 34 | 35 | Writing your own processor is easy if you know the python language. So consider contributing... Pull requests are 36 | welcome ! 37 | -------------------------------------------------------------------------------- /docs/reference/resources_and_urls.md: -------------------------------------------------------------------------------- 1 | # Resources and urls 2 | tuttle allows you to create and access a wide variety of data, not only files, but resources over the internet or in your cluster, as long as you can descrie it with an url. 3 | tuttle implements several common type of resources : 4 | 5 | ## file 6 | ``file://`` urls reference either files relatives to the tuttle file ``file://relative/path/to/file`` or an absolute path to 7 | the file reachable from the local system ``file:///absolute/path/to/file``. Path can be either standard files or 8 | directories. 9 | 10 | ## http - https 11 | Any [valid http url](https://en.wikipedia.org/wiki/Web_resource), like http://github.com . Note that http resources can't be removed by tuttle, therefore invalidation of an http 12 | resource will issue a warning. https:// is also supported. 13 | 14 | ## ftp 15 | Any ftp file or directory, like ftp://ftp.debian.org/debian/README. Like every other resources, you can [set authentication](resources_authentication.md)) to 16 | the ftp server. They can be downloaded (not uploaded) with the download processor. 17 | 18 | ## sqlite 19 | A table, a view, an index or a trigger in an SQLite database. For example, a table called ``mytable``, in an SQLite 20 | database in the file relative/path/to/sqlite_file (path is relative to the tuttlefile) has the url : 21 | ``` 22 | sqlite://relative/path/to/sqlite_file/mytable 23 | ``` 24 | 25 | Note that when tuttle removes the last table, view, index or trigger in the database, it removes the SQLite file. 26 | 27 | ## postgresql - pg: 28 | A Postgresql resource can either be : 29 | * a table 30 | * a view 31 | * a function 32 | 33 | Url structure is : 34 | ``` 35 | pg://hostname:port/database_name/schema_name/view_or_table_name 36 | ``` 37 | where schema is optional. For example, this is a valid url from the functional tests : 38 | ``` 39 | pg://localhost:5432/tuttle_test_db/test_table 40 | ``` 41 | 42 | You can also target the schema itself : 43 | ``` 44 | pg://localhost:5432/tuttle_test_db/schema_name/ 45 | ``` 46 | 47 | You can't include authentication url on purpose so that your password will never 48 | be visible in your version control system (eg git). When running tuttle, your system user must have write access to the 49 | database. You can either [use a ``.pgpass`` file in your user's home directory](http://www.postgresql.org/docs/9.4/static/libpq-pgpass.html) 50 | or [set PGNAME and PGPASSWORD environnement variables](http://www.postgresql.org/docs/9.4/static/libpq-envars.html). 51 | 52 | ## Amazon S3 and compatible (experimental) 53 | An [S3 object form AWS](https://aws.amazon.com/s3/) or compatible service. Urls are in the form : 54 | ``` 55 | s3://service_endpoint/bucket_name/key_name 56 | ``` 57 | Where ``service_endpoint`` is the server address of the service provider. The standard address for AWS is ``s3.amazonaws.com`` but 58 | it can vary depending on [which datacenter your data is stored](http://docs.aws.amazon.com/general/latest/gr/rande.html#s3_region). For example, 59 | if your data is stored in Frankfurt, ``service_endpoint`` should be ``s3-website.eu-central-1.amazonaws.com``. 60 | 61 | 62 | There are several ways to specify credentials to your account, including setting AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment 63 | variables or creating a ``~/.aws/credentials`` configuration file. You can see the [full credential documentation](https://blogs.aws.amazon.com/security/post/Tx3D6U6WSFGOK2H/A-New-and-Standardized-Way-to-Manage-Credentials-in-the-AWS-SDKs) 64 | for further details. Remember, you should not commit credentials to your project version control system (eg git), but have a separate way to manage your configuration. 65 | 66 | This functionality should be considered experimental because it hasn't been properly tested on a real AWS account. Any feedback or improvement is welcome ! 67 | 68 | ## odbc 69 | Any table or partition from a table in a database with an odbc connector. Assuming your table is called ``my_table``, and the database is available with Data 70 | Source Name ``datasource_name``, the url would be : 71 | ```` 72 | odbc://datasource_name/my_table 73 | ```` 74 | ODBC resources have experimental support for partitioning, which means you write chunks of data according to a filter on columns. If your column is "my_col" , this is a valid url : 75 | ```` 76 | odbc://datasource_name/my_table?my_col=a_value 77 | ```` 78 | Only one set of filters is allowed for the same table. 79 | 80 | ## hdfs 81 | Any file or directory in an hdfs storage. eg ``hdfs:\\myserver\path\to\my\file`` 82 | 83 | ## Future plans 84 | The official list of requested urls schemes available as [github issues](https://github.com/lexman/tuttle/issues?q=is%3Aopen+is%3Aissue+label%3Aprocessor) 85 | 86 | Writing your own resources is easy if you know the python language. So consider contributing... Pull requests are 87 | welcome ! 88 | -------------------------------------------------------------------------------- /docs/reference/resources_authentication.md: -------------------------------------------------------------------------------- 1 | # Resources Authentication 2 | 3 | Some resources, like ftp files, need authentication to get access to them. That's why tuttle provides a *.tuttlepass* file 4 | to set a username and password to resources that need it. 5 | 6 | ## Warning 7 | Passwords are not in the tuttlefile beacause **you should never commit your password nor authentication into your source repository**. 8 | 9 | ## .tuttlepass structure 10 | 11 | You can set user and password to resources according to regular expressions on resources : 12 | 13 | ftp://ftp\.mysite\.com/lexman/.* lexman password 14 | http://download\.mysite\.com/protected/data.csv myaccount mypassword 15 | ftp://ftp\..* me mypassword 16 | 17 | Any regular expression, user name and password are separated with tabulations. 18 | The order matters, so the first regular expressions that capures the ressource defines the username and password. 19 | 20 | 21 | ## Location of .tuttlepass file 22 | 23 | On Linux and other unix, the .tuttlepass file is at the root of the user directory (ie ~/.tuttlepass) 24 | 25 | On Windows, the file is located at XXX 26 | 27 | It's always possible to overwrite the .tuttlepass location by setting environnement variable TUTTLEPASSFILE. -------------------------------------------------------------------------------- /docs/reference/tuttlefile_syntax.md: -------------------------------------------------------------------------------- 1 | # Tuttlefile Syntax 2 | 3 | A tuttlefile is made with several sections wich can be one of the following. 4 | 5 | ## Processes 6 | The main elements of a tuttlefile file are ``processes`` 7 | 8 | Each process describe how to get ``output resources`` from ``input resources`` with some ``code``. A block of ``code`` 9 | is identified because every line start with the same indentation. Code is interpreted by tuttle 10 | according to the ``processor`` chosen for the process. 11 | 12 | For example a process producing two outputs from three inputs according to my_processor can look like that 13 | 14 | ``` 15 | scheme://output/resource/1 scheme://output/resource/2 <- scheme://input/resource/1 scheme://input/resource/2 scheme://input/resource/3 ! my_processor 16 | first line of code 17 | second line of code 18 | ``` 19 | 20 | Input and output resources are both space separated lists of urls. An arrow ``<-`` separate inputs from outputs, and empty lists on either side 21 | are valid. 22 | 23 | Then comes an optional exclamation mark ``!`` with the processor name. By default, ``processor`` is ``shell`` under Linux 24 | (and ``batch`` under windows) 25 | 26 | Input, outputs and processor must all be on the same line (for the moment). 27 | 28 | ## Includes 29 | Also, tuttle projects can be split in several files with the ``include`` statement : 30 | 31 | ``` 32 | include another_tuttlefile.tuttle 33 | ``` 34 | 35 | ## Preprocesses 36 | If the usual syntax is not powerfull enought to let you describe your workflow, maybe because there is yesterdays date in the name in input files, or 37 | you must apply the same processing to a list of images in a directory, you can use *preprocesses* to add parts to your workflow with code. 38 | 39 | All preproceses are run early, even before tuttle checks that the workflow is valid. The shouldn't have side effects (like creating files) because 40 | tuttle can't clean after a preprocess has run. 41 | 42 | Proceprocesses look like : 43 | 44 | |<< ! shell 45 | tuttle-extend-workflow img.tpl.tuttlefile img=IMG_001.jpg 46 | 47 | Preprocesses are an advanced feature of tuttle you can learn on, the [parametrized workflow tutorial](tuto_parametrized_workflow/tuto_parametrized_workflow.MD). 48 | -------------------------------------------------------------------------------- /docs/screenshot_report.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lexman/tuttle/dab07db4a1e3e18c876deb2897c07be3935acd60/docs/screenshot_report.png -------------------------------------------------------------------------------- /docs/tuto_parametrized_workflow/dep_graph_3_imgs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lexman/tuttle/dab07db4a1e3e18c876deb2897c07be3935acd60/docs/tuto_parametrized_workflow/dep_graph_3_imgs.png -------------------------------------------------------------------------------- /docs/tuto_parametrized_workflow/new_img.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lexman/tuttle/dab07db4a1e3e18c876deb2897c07be3935acd60/docs/tuto_parametrized_workflow/new_img.png -------------------------------------------------------------------------------- /docs/tutorial_musketeers/Les_trois_mousquetaires.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lexman/tuttle/dab07db4a1e3e18c876deb2897c07be3935acd60/docs/tutorial_musketeers/Les_trois_mousquetaires.zip -------------------------------------------------------------------------------- /docs/tutorial_musketeers/question_mark.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lexman/tuttle/dab07db4a1e3e18c876deb2897c07be3935acd60/docs/tutorial_musketeers/question_mark.jpg -------------------------------------------------------------------------------- /docs/tutorial_musketeers/screenshot_report_step2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lexman/tuttle/dab07db4a1e3e18c876deb2897c07be3935acd60/docs/tutorial_musketeers/screenshot_report_step2.png -------------------------------------------------------------------------------- /docs/tutorial_musketeers/screenshot_report_step7.5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lexman/tuttle/dab07db4a1e3e18c876deb2897c07be3935acd60/docs/tutorial_musketeers/screenshot_report_step7.5.png -------------------------------------------------------------------------------- /docs/tutorial_musketeers/screenshot_report_step7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lexman/tuttle/dab07db4a1e3e18c876deb2897c07be3935acd60/docs/tutorial_musketeers/screenshot_report_step7.png -------------------------------------------------------------------------------- /docs/tutorial_musketeers/tutorial_steps/step10/myplot.gnuplot: -------------------------------------------------------------------------------- 1 | set terminal png 2 | set output "characters_count.png" 3 | plot "characters_count.dat" using 2: xtic(1) with histogram linecolor "green" 4 | -------------------------------------------------------------------------------- /docs/tutorial_musketeers/tutorial_steps/step10/tuttlefile: -------------------------------------------------------------------------------- 1 | file://Les_trois_mousquetaires.txt <- file://Les_trois_mousquetaires.zip 2 | unzip Les_trois_mousquetaires.zip Les_trois_mousquetaires.txt 3 | 4 | file://characters_count.dat <- file://Les_trois_mousquetaires.txt !# python 5 | names = ["Athos", "Porthos", "Aramis", "d'Artagnan", "Richelieu", "Rochefort"] 6 | with open('characters_count.dat', 'w') as f_out: 7 | with open('Les_trois_mousquetaires.txt') as f_in: 8 | content_low = f_in.read().lower() 9 | print("{} chars in the novel".format(len(content_low))) 10 | for name in names: 11 | name_low = name.lower() 12 | f_out.write("{}\t{}\n".format(name, content_low.count(name_low))) 13 | print("{} - done".format(name)) 14 | 15 | file://characters_count.csv <- file://characters_count.dat 16 | awk '{print "\""$1"\","$2"\r"}' characters_count.dat > characters_count.csv 17 | 18 | file://characters_count.png <- file://characters_count.dat, file://myplot.gnuplot 19 | gnuplot myplot.gnuplot 20 | -------------------------------------------------------------------------------- /docs/tutorial_musketeers/tutorial_steps/step11/myplot.gnuplot: -------------------------------------------------------------------------------- 1 | set terminal png 2 | set output "characters_count.png" 3 | plot "characters_count.dat" using 2: xtic(1) with histogram linecolor "green" 4 | -------------------------------------------------------------------------------- /docs/tutorial_musketeers/tutorial_steps/step11/tuttlefile: -------------------------------------------------------------------------------- 1 | file://Les_trois_mousquetaires.zip <- http://lexman.github.io/tuttle/docs/musketeers_assets/Les_trois_mousquetaires.zip ! download 2 | 3 | file://Les_trois_mousquetaires.txt <- file://Les_trois_mousquetaires.zip 4 | unzip Les_trois_mousquetaires.zip Les_trois_mousquetaires.txt 5 | 6 | file://characters_count.dat <- file://Les_trois_mousquetaires.txt !# python 7 | names = ["Athos", "Porthos", "Aramis", "d'Artagnan", "Richelieu", "Rochefort"] 8 | with open('characters_count.dat', 'w') as f_out: 9 | with open('Les_trois_mousquetaires.txt') as f_in: 10 | content_low = f_in.read().lower() 11 | print("{} chars in the novel".format(len(content_low))) 12 | for name in names: 13 | name_low = name.lower() 14 | f_out.write("{}\t{}\n".format(name, content_low.count(name_low))) 15 | print("{} - done".format(name)) 16 | 17 | file://characters_count.csv <- file://characters_count.dat 18 | awk '{print "\""$1"\","$2"\r"}' characters_count.dat > characters_count.csv 19 | 20 | file://characters_count.png <- file://characters_count.dat, file://myplot.gnuplot 21 | gnuplot myplot.gnuplot 22 | -------------------------------------------------------------------------------- /docs/tutorial_musketeers/tutorial_steps/step2/tuttlefile: -------------------------------------------------------------------------------- 1 | file://Les_trois_mousquetaires.txt <- file://Les_trois_mousquetaires.zip 2 | unzip Les_trois_mousquetaires.zip Les_trois_mousquetaires.txt 3 | 4 | file://characters_count.dat <- file://Les_trois_mousquetaires.txt !# python 5 | names = ["Athos", "Porthos", "Aramis", "D'Artagnan"] 6 | with open('characters_count.dat', 'w') as f_out: 7 | with open('Les_trois_mousquetaires.txt') as f_in: 8 | content = f_in.read() 9 | print("{} chars in the novel".format(len(content))) 10 | for name in names: 11 | f_out.write("{}\t{}\n".format(name, content.count(name))) 12 | print("{} - done".format(name)) 13 | -------------------------------------------------------------------------------- /docs/tutorial_musketeers/tutorial_steps/step3/tuttlefile: -------------------------------------------------------------------------------- 1 | file://Les_trois_mousquetaires.txt <- file://Les_trois_mousquetaires.zip 2 | unzip Les_trois_mousquetaires.zip Les_trois_mousquetaires.txt 3 | 4 | file://characters_count.dat <- file://Les_trois_mousquetaires.txt !# python 5 | names = ["Athos", "Porthos", "Aramis", "D'Artagnan"] 6 | with open('characters_count.dat', 'w') as f_out: 7 | with open('Les_trois_mousquetaires.txt') as f_in: 8 | content = f_in.read() 9 | print("{} chars in the novel".format(len(content))) 10 | for name in names: 11 | f_out.write("{}\t{}\n".format(name, content.count(name))) 12 | print("{} - done".format(name)) 13 | 14 | file://characters_count.csv <- file://characters_count.dat 15 | awk '{print "\""$1"\","$2"\r"}' characters_count.dat > characters_count.csv 16 | -------------------------------------------------------------------------------- /docs/tutorial_musketeers/tutorial_steps/step4/characters_count.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lexman/tuttle/dab07db4a1e3e18c876deb2897c07be3935acd60/docs/tutorial_musketeers/tutorial_steps/step4/characters_count.png -------------------------------------------------------------------------------- /docs/tutorial_musketeers/tutorial_steps/step4/tuttlefile: -------------------------------------------------------------------------------- 1 | file://Les_trois_mousquetaires.txt <- file://Les_trois_mousquetaires.zip 2 | unzip Les_trois_mousquetaires.zip Les_trois_mousquetaires.txt 3 | 4 | file://characters_count.dat <- file://Les_trois_mousquetaires.txt !# python 5 | names = ["Athos", "Porthos", "Aramis", "D'Artagnan"] 6 | with open('characters_count.dat', 'w') as f_out: 7 | with open('Les_trois_mousquetaires.txt') as f_in: 8 | content = f_in.read() 9 | print("{} chars in the novel".format(len(content))) 10 | for name in names: 11 | f_out.write("{}\t{}\n".format(name, content.count(name))) 12 | print("{} - done".format(name)) 13 | 14 | file://characters_count.csv <- file://characters_count.dat 15 | awk '{print "\""$1"\","$2"\r"}' characters_count.dat > characters_count.csv 16 | 17 | file://characters_count.png <- file://characters_count.dat 18 | gnuplot <<$script$ 19 | set terminal png 20 | set output "characters_count.png" 21 | plot "characters_count.dat" using 2: xtic(1) with histogram 22 | $script$ 23 | -------------------------------------------------------------------------------- /docs/tutorial_musketeers/tutorial_steps/step5/tuttlefile: -------------------------------------------------------------------------------- 1 | file://Les_trois_mousquetaires.txt <- file://Les_trois_mousquetaires.zip 2 | unzip Les_trois_mousquetaires.zip Les_trois_mousquetaires.txt 3 | 4 | file://characters_count.dat <- file://Les_trois_mousquetaires.txt !# python 5 | names = ["Athos", "Porthos", "Aramis", "d'Artagnan"] 6 | with open('characters_count.dat', 'w') as f_out: 7 | with open('Les_trois_mousquetaires.txt') as f_in: 8 | content_low = f_in.read().lower() 9 | print("{} chars in the novel".format(len(content_low))) 10 | for name in names: 11 | name_low = name.lower() 12 | f_out.write("{}\t{}\n".format(name, content_low.count(name_low))) 13 | print("{} - done".format(name)) 14 | 15 | file://characters_count.csv <- file://characters_count.dat 16 | awk '{print "\""$1"\","$2"\r"}' characters_count.dat > characters_count.csv 17 | 18 | file://characters_count.png <- file://characters_count.dat 19 | gnuplot <<$script$ 20 | set terminal png 21 | set output "characters_count.png" 22 | plot "characters_count.dat" using 2: xtic(1) with histogram 23 | $script$ 24 | -------------------------------------------------------------------------------- /docs/tutorial_musketeers/tutorial_steps/step6/tuttlefile: -------------------------------------------------------------------------------- 1 | file://Les_trois_mousquetaires.txt <- file://Les_trois_mousquetaires.zip 2 | unzip Les_trois_mousquetaires.zip Les_trois_mousquetaires.txt 3 | 4 | file://characters_count.dat <- file://Les_trois_mousquetaires.txt !# python 5 | names = ["Athos", "Porthos", "Aramis", "d'Artagnan", "Richelieu", "Rochefort"] 6 | with open('characters_count.dat', 'w') as f_out: 7 | with open('Les_trois_mousquetaires.txt') as f_in: 8 | content_low = f_in.read().lower() 9 | print("{} chars in the novel".format(len(content_low))) 10 | for name in names: 11 | name_low = name.lower() 12 | f_out.write("{}\t{}\n".format(name, content_low.count(name_low))) 13 | print("{} - done".format(name)) 14 | 15 | file://characters_count.csv <- file://characters_count.dat 16 | awk '{print "\""$1"\","$2"\r"}' characters_count.dat > characters_count.csv 17 | 18 | file://characters_count.png <- file://characters_count.dat 19 | gnuplot <<$script$ 20 | set terminal png 21 | set output "characters_count.png" 22 | plot "characters_count.dat" using 2: xtic(1) with histogram 23 | $script$ 24 | -------------------------------------------------------------------------------- /docs/tutorial_musketeers/tutorial_steps/step7/tuttlefile: -------------------------------------------------------------------------------- 1 | file://Les_trois_mousquetaires.txt <- file://Les_trois_mousquetaires.zip 2 | unzip Les_trois_mousquetaires.zip Les_trois_mousquetaires.txt 3 | 4 | file://characters_count.dat <- file://Les_trois_mousquetaires.txt !# python 5 | names = ["Athos", "Porthos", "Aramis", "d'Artagnan", "Richelieu", "Rochefort"] 6 | with open('characters_count.dat', 'w') as f_out: 7 | with open('Les_trois_mousquetaires.txt') as f_in: 8 | content_low = f_in.read().lower() 9 | print("{} chars in the novel".format(len(content_low))) 10 | for name in names: 11 | name_low = name.lower() 12 | f_out.write("{}\t{}\n".format(name, content_low.count(name_low))) 13 | print("{} - done".format(name)) 14 | 15 | file://characters_count.csv <- file://characters_count.dat 16 | awk '{print "\""$1"\","$2"\r"}' characters_count.dat > characters_count.csv 17 | 18 | file://characters_count.png <- file://characters_count.dat 19 | gnuplot <<$script$ 20 | set terminal png 21 | set output "characters_count.png" 22 | plot "characters_count.dat" using 2: xtic(1) with histogram 23 | linecolor "green" 24 | $script$ 25 | -------------------------------------------------------------------------------- /docs/tutorial_musketeers/tutorial_steps/step8/tuttlefile: -------------------------------------------------------------------------------- 1 | file://Les_trois_mousquetaires.txt <- file://Les_trois_mousquetaires.zip 2 | unzip Les_trois_mousquetaires.zip Les_trois_mousquetaires.txt 3 | 4 | file://characters_count.dat <- file://Les_trois_mousquetaires.txt !# python 5 | names = ["Athos", "Porthos", "Aramis", "d'Artagnan", "Richelieu", "Rochefort"] 6 | with open('characters_count.dat', 'w') as f_out: 7 | with open('Les_trois_mousquetaires.txt') as f_in: 8 | content_low = f_in.read().lower() 9 | print("{} chars in the novel".format(len(content_low))) 10 | for name in names: 11 | name_low = name.lower() 12 | f_out.write("{}\t{}\n".format(name, content_low.count(name_low))) 13 | print("{} - done".format(name)) 14 | 15 | file://characters_count.csv <- file://characters_count.dat 16 | awk '{print "\""$1"\","$2"\r"}' characters_count.dat > characters_count.csv 17 | 18 | file://characters_count.png <- file://characters_count.dat 19 | gnuplot <<$script$ 20 | set terminal png 21 | set output "characters_count.png" 22 | linecolor "green" 23 | plot "characters_count.dat" using 2: xtic(1) with histogram 24 | $script$ 25 | -------------------------------------------------------------------------------- /docs/tutorial_musketeers/tutorial_steps/step9/characters_count.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lexman/tuttle/dab07db4a1e3e18c876deb2897c07be3935acd60/docs/tutorial_musketeers/tutorial_steps/step9/characters_count.png -------------------------------------------------------------------------------- /docs/tutorial_musketeers/tutorial_steps/step9/tuttlefile: -------------------------------------------------------------------------------- 1 | file://Les_trois_mousquetaires.txt <- file://Les_trois_mousquetaires.zip 2 | unzip Les_trois_mousquetaires.zip Les_trois_mousquetaires.txt 3 | 4 | file://characters_count.dat <- file://Les_trois_mousquetaires.txt !# python 5 | names = ["Athos", "Porthos", "Aramis", "d'Artagnan", "Richelieu", "Rochefort"] 6 | with open('characters_count.dat', 'w') as f_out: 7 | with open('Les_trois_mousquetaires.txt') as f_in: 8 | content_low = f_in.read().lower() 9 | print("{} chars in the novel".format(len(content_low))) 10 | for name in names: 11 | name_low = name.lower() 12 | f_out.write("{}\t{}\n".format(name, content_low.count(name_low))) 13 | print("{} - done".format(name)) 14 | 15 | file://characters_count.csv <- file://characters_count.dat 16 | awk '{print "\""$1"\","$2"\r"}' characters_count.dat > characters_count.csv 17 | 18 | file://characters_count.png <- file://characters_count.dat 19 | gnuplot <<$script$ 20 | set terminal png 21 | set output "characters_count.png" 22 | plot "characters_count.dat" using 2: xtic(1) with histogram linecolor "green" 23 | $script$ 24 | -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: tuttle 2 | theme: readthedocs 3 | pages: 4 | - Overview : index.md 5 | - Reference : 6 | - reference/index.md 7 | - reference/tuttlefile_syntax.md 8 | - reference/resources_and_urls.md 9 | - reference/processors.md 10 | - reference/resources_authentication.md 11 | - Tutorials: 12 | - Discover tuttle : tutorial_musketeers/tutorial.md 13 | - Parametrized workflow : tuto_parametrized_workflow/tuto_parametrized_workflow.MD 14 | - If you're familiar with Makefiles : tuto_Makefile.MD 15 | - Developpers: 16 | - how_to_make_a_release.md 17 | -------------------------------------------------------------------------------- /package4win.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf8 -*- 3 | 4 | """Tuttle""" 5 | 6 | import sys 7 | import re 8 | from os.path import join 9 | import six 10 | 11 | try: 12 | import setuptools 13 | from cx_Freeze import setup, Executable 14 | except ImportError: 15 | print("You need to install setuptools and cx_freeze modules in order to create a Windows installer for tuttle. " 16 | "You can install these packages with your package manager (usually python-setuptools) or via pip (pip" 17 | " install setuptools cx_freeze).") 18 | sys.exit(1) 19 | from setup import tuttle_description # Import description of the package from the standard setup 20 | 21 | 22 | def strip_rc(version): 23 | m = re.search(r'^(\d+)\.(\d+)', version) 24 | return m.group(0) 25 | 26 | 27 | # cx_freeze option for a command line application 28 | base = None 29 | build_exe_options = { 30 | "packages": ["os", "six", ], 31 | "excludes": ["tkinter", ], 32 | "includes": ["ConfigParser", "HTMLParser", ], 33 | "include_files": ( 34 | six.__file__, 35 | join("tuttle", "report"), 36 | join("tuttle"), 37 | ) 38 | } 39 | build_msi_options = { 40 | "add_to_path": True, 41 | } 42 | 43 | cx_freeze_opts = { 44 | 'options': { 45 | 'bdist_msi': build_msi_options, 46 | 'build_exe': build_exe_options 47 | }, 48 | 'executables': [Executable(join("bin", "tuttle"), base=base), 49 | Executable(join("bin", "tuttle-extend-workflow"), base=base)], 50 | } 51 | package_description = tuttle_description 52 | package_description.update(cx_freeze_opts) 53 | package_description['version'] = strip_rc(package_description['version']) 54 | setup(**package_description) -------------------------------------------------------------------------------- /project/TODO next.txt: -------------------------------------------------------------------------------- 1 | DONE - Move resource in its own module 2 | 3 | DONE - Put all the generated files in directory .tuttle (except tuttle_report.html, the entry point of everything accessible from the project) 4 | 5 | DONE - should we close stdin ? 6 | should be tested on *nix 7 | 8 | DONE - Tell the user why that resource is deleted : 9 | - process has changed 10 | - resource no longer produced 11 | - ... 12 | - depends on an invalid resource 13 | 14 | DONE - If a process failed, running tuttle again should fail immediately 15 | 16 | DONE - If a process failed, all should be marked as not Ok 17 | 18 | Should we invalidate a resource that has disappeared ? 19 | 20 | What should happen if a process fails and tuttle is re-run ? 21 | - should resources produced by the erroneous process be removed ? No because tuttle would run again endlessly 22 | - should if ignore the failing process and run other candidates until partial completion then end in error again ? Would 23 | be the same as an option --build_max_stuff as in gcc 24 | - should it stop immediately and not run anything ? This is consistent => will be implemented for now DONE 25 | 26 | DONE - Check for circular dependencies 27 | 28 | DONE - Check for status : if not all dependencies are created the process should be marked as failed 29 | 30 | DONE - Remove resources that are invalid 31 | 32 | DONE - -Check that the claimed-to-be-created resource really exists at the end of the process, otherwise there have been an issue and any depending process can't pursue 33 | 34 | + Test of a resource not really created by a process should not be invalided when removed 35 | 36 | When there is an error running a process, tes resources produced by this process should be marked as invalid 37 | It means that if a process didn't produce one of the expected outputs, every other output must be considered invalid 38 | 39 | DONE - Run Functional tests and process states in its own temporary dir 40 | 41 | Check if some (not primary) resources where deleted and invalidate depending resources ? Really ? What about signature ? 42 | 43 | shell processor by default (should tests be run according to the os ?) 44 | DONE Move default processor to shell 45 | 46 | Improve code coverage 47 | especially for parser - DONE 48 | 49 | DONE - Retrieve duration of common processes from previous workflow 50 | 51 | DONE -Ensure that a resource that used to be created by the process but is now a primary resource should not be deleted 52 | 53 | NOT FOR THE MOMENT - Stop deleting resources if the time (or % of time) of the work invalidated exceeds a threshold provided by user 54 | And print invalidation quantity any way 55 | 56 | HTTP (DONE) or FTP resource 57 | 58 | Remove #! from modules and packages - DONE 59 | 60 | Check if some primary resources have changed - DONE ! 61 | 62 | NOT FOR 0.1 - Check if some resource changed (and invalid them ?) 63 | 64 | DONE - Packaging - partially done 65 | 66 | Travis for tests on *nix. Is it possible to test on Windows => AppVeyor ? - DONE 67 | Upload the artifacts as a release on github ? 68 | 69 | Nicer reports : 70 | DONE - nice html with boostrap 71 | Done - Embed js in .tuttle 72 | autorelading iframe for the logs 73 | 74 | NOT FOR THE MOMENT - Add a unified log for both stderr and stdout 75 | 76 | NOT FOR THE MOMENT - Produce as much data as you can, similar to option --keep-going in Gnu Make 77 | 78 | A working real life example using tuttle in Github, with continuous integration in Travis CI. 79 | Car accidents in France ? 80 | 81 | "Just run it again..." 82 | If execution of a workflow failed, it MUST fail again if run again without any change in he workflow. 83 | What must not happen : your run a workflow. The 4rth process didn't produce bla.csv as promissed : tuttle stops at the error. No process uses bla.csv. Anyway, if you run tuttle again, it must not produce the rest and succed 84 | No magic. No surprise 85 | 86 | Pop a webserver for functional tests without network ? => Not for now - DONE 87 | 88 | DONE - Refactor functional tests with function "run_tuttle" so that it imports tuttle and runs the main function instead of subprocessing : we get coverage ! 89 | 90 | What should we do if someone tries to invalidate an http resource ? 91 | Should print a warning. This will be developed after a specific "same" (or convert) processor is available. 92 | This processor will be able to cast a resource as another : eg a file resource could 93 | be converted to a spreadsheet resource 94 | 95 | DONE - Remove files (ex .bat files) generated by processes from .tuttle if a a process is invalidated ? 96 | 97 | Switch to pytest for code coverage with multiprocessing 98 | 99 | DONE - Refactor short names (Partialy DONE) 100 | 101 | DONE - dump process status and start time just before running and just after, in case somebody would need this info at any time 102 | 103 | WON'T DO - Create immutable processes from constructor : no more add_input() 104 | 105 | DONE - Refactor id 106 | 107 | WON'T DO - Improve tests on csv2sqlite - Is it useful ? Maybe this extension won't last 108 | 109 | DONE - Remove csv resource 110 | 111 | A meaningfull example 112 | 113 | DONE - Ensure we minimise the number of call to exists() and signature() 114 | 115 | Write a test for pick_a_failing_process late, and define what has to be done 116 | 117 | DONE - Detect corrupted resources through signature 118 | 119 | Add logs to download processor - DONE 120 | 121 | Report the size of the logs - DONE 122 | Add a test... - DONE 123 | 124 | DONE - Remove logs and all files when reseting a process ? 125 | 126 | DONE - Invalidate : should tuttle fail if nothing has been run yet . yes 127 | 128 | Fix the bug of accents in a tuttlefile ! -------------------------------------------------------------------------------- /project/ideas.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | File ressource: 4 | Be able to process files from input to outputs 5 | 6 | Shell: 7 | A shell processor to run command line tools 8 | 9 | Graph report: 10 | Trace the execution of processes in a sqlite database 11 | Repports the current state in graphviz (at the end of each process ?) 12 | 13 | Dependancy tree: 14 | Create ressources that are missing, according to the dependancy tree by running the necessary processes 15 | 16 | 17 | Python: 18 | A Python processor 19 | 20 | Logs: 21 | View logs interractivelly 22 | 23 | 24 | Invalidation: 25 | Remove former ressources that are no longer needed 26 | Remove ressources that are no longer up to date because a process creating it has changed 27 | Remove ressources that are no longer up to date according to the current dependancy tree 28 | Remove all ressources dependant to removed ressources 29 | 30 | Improved file ressource: 31 | By default, even if you don't tell file://, your ressource is a file 32 | 33 | Check: 34 | "tuttle check" to check if my data is up to date => tuttle invalidate -q ? 35 | could return the total time of processing that would be invalidated, and the percent too. 36 | maybe the number of processes that would need to be run 37 | 38 | Invalidate: 39 | "tuttle invalidate" to invalidate a specific ressource I know to to be updated, but can't be automatically because I have forgotten to declare a dependancy / I've updated a specific library that will change the result 40 | 41 | Documentation: 42 | As important as a good working software 43 | Processors documentation 44 | Ressources documentation 45 | Tutorials 46 | Philosophy 47 | Quick statup 48 | Blog posts: 49 | - don't write rollback yourself 50 | - don't trash intermediate data 51 | 52 | Runtime dependancies: 53 | declare dependancies at runtime, in order to consider python librairies imported or tools used by the process 54 | 55 | Packaging : 56 | because an easy installation is vital 57 | on Linux 58 | on Windows 59 | 60 | Handle wildcards in file ressource: 61 | Be able to process file:///directory/*.sql files 62 | Need to check for conflicts, because ressources must not overlap. Eg file:///directory/*.sql and file:///directory/init.sql 63 | 64 | Adult mode: 65 | Ok, I know what I'm doing : 66 | - I've only added a comment, but I really know it will not change a specific ressouce. So please don't invalidate it 67 | - I've already got some data I have processed previously, and I've ported the code to tuttle, but I don't have the time to launch the whole workflow right now. But I still want to run the missing parts 68 | Should impact graph reports 69 | 70 | SQLite: 71 | An SQLite processor 72 | Databases is a major goal of tuttle, so an implemtation of SQLite would be a good exercice before heading to more complexe databases 73 | 74 | PostgresQL: 75 | A PostgresQL processor, because opensource rules ! 76 | Should implement such ressources as stored procedures (because a stored procedure shouldn't change in the workflow) 77 | 78 | MongoDb: 79 | A javascript processor for MongoDB 80 | 81 | Prediction: 82 | Estimate the duration of running the whole workflow, according to passed duration of processes 83 | 84 | Windows: 85 | The shell processor (or another) to run on Windows command line 86 | tuttle should run on Windows 87 | 88 | Auto processor: 89 | An [Auto] processor for automaticly doing clear operations such as 90 | * download a file from http / ftp 91 | * publishing a file through ftp 92 | * unzip a file 93 | * load a csv file into a table 94 | * .. 95 | R: 96 | An R processor 97 | 98 | Virtual: 99 | A "virtual" target to run operation that don't necesseraly produce output 100 | Usefull to notification, like sending an email when a specific target is ready 101 | 102 | Log: 103 | Log the output and the errors when a process is run 104 | 105 | Configuration: 106 | A standard way to declare constants 107 | Could be used for Pre-run dependancy graph 108 | Processes using constants should declare them as dependancies, in order to invalidate the appropriate ressources if configuration changes 109 | Somme constants should not be taken into account for dependancies and should not be saved. Specifically passwords 110 | 111 | Machine configuration: 112 | a default configuration 113 | overridden by project's configuration 114 | 115 | Pre-run dependancy graph: 116 | Declare some ressources and processes dynamically before the whole workflow is ran 117 | Usefull when dealing with legagy workflow 118 | Users could extend the static graph with Python code 119 | Eg : reading a configuration file that specifies where to find the input data 120 | Before the whole workflow is run, the pre-run dependancies code is executed, and all the operations of invalidation are based the the dynamic graph 121 | 122 | Concurency : 123 | Run several independant processes concurently on one machine 124 | 125 | Dispatching: 126 | Allow other machines to connect to tuttle and retreive some processes to run 127 | Usefull for distributing processing among several machines 128 | Usefull for running processes that need a specific environement when : 129 | * a tool that exist only on windows / Linux / Mac if it's not your normal environnement 130 | * a tools is installed only in a specific machine (eg for licence issues) 131 | 132 | Export to spreadshit: 133 | Could be done in the auto processor 134 | 135 | Other stuff in a spreadshit processor: 136 | To be defined 137 | 138 | Distribution: 139 | Bundle a workflow into a an executable you can ditribute 140 | 141 | Datasets: 142 | some reference datasets like maps of the world with reference id for zones 143 | 144 | Pyline processor: 145 | A python processor to handle lines of input files 146 | User don't have to code the opening of the file nor the opening of the output 147 | stdout is written to the destination file 148 | 149 | Pycsv processor: 150 | A python processor to handle line a csv files -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | cx-Freeze==5.1.1 2 | Jinja2>=2.7.3 3 | MarkupSafe>=0.23 4 | nose>=1.3.6 5 | coverage 6 | tornado>=4.3 # for mocking an S3 server 7 | pyftpdlib 8 | snakebite -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf8 -*- 3 | 4 | """ Tuttle installation and packaging script """ 5 | 6 | import sys 7 | from os.path import dirname, getsize, join 8 | 9 | try: 10 | from setuptools import setup, find_packages 11 | except ImportError: 12 | print("You need to install setuptools to build tuttle. Install it using" 13 | " your package manager (usually python-setuptools) or via pip (pip" 14 | " install setuptools).") 15 | sys.exit(1) 16 | 17 | 18 | def get_version(): 19 | version_path = join(dirname(__file__), 'tuttle', 'VERSION') 20 | version_details = open(version_path).read(getsize(version_path)) 21 | return version_details.split("\n")[0] 22 | 23 | 24 | version = get_version() 25 | 26 | tuttle_description = { 27 | 'name': 'tuttle', 28 | 'version': version, 29 | 'author': 'Lexman', 30 | 'author_email': 'tuttle@lexman.org', 31 | 'description': 'Make for data', 32 | 'long_description': 'Reliably create data from different sources. Work as a team in an industrial environment... ' 33 | 'A tool for continuous data processing', 34 | 'platforms': ['Linux', 'Windows'], 35 | 'url': 'http://tuttle.lexman.org/', 36 | 'license': 'MIT', 37 | 'install_requires': [ 38 | 'jinja2', 39 | 'MarkupSafe', 40 | 'psycopg2-binary', 41 | 'six', 42 | 'boto3', 43 | 'chardet', 44 | 'psutil', 45 | 'pyodbc', 46 | 'pycurl', 47 | ], 48 | 'packages': [ 49 | 'tuttle', 50 | 'tuttle.report', 51 | 'tuttle.processors', 52 | 'tuttle.addons', 53 | ], 54 | 'entry_points': { 55 | 'console_scripts': [ 56 | 'tuttle=tuttle.cli_tuttle:tuttle_main', 57 | 'tuttle-extend-workflow=tuttle.cli_tuttle_extend_workflow:tuttle_extend_workflow_main', 58 | ], 59 | }, 60 | 'include_package_data': True, 61 | 'package_data': { 62 | 'tuttle': ['VERSION'], 63 | 'tuttle.report': ['*.html', 'html_report_assets/*'], 64 | }, 65 | } 66 | 67 | 68 | if __name__ == '__main__': 69 | # NB: this script can be imported by windows packager 70 | setup(**tuttle_description) 71 | -------------------------------------------------------------------------------- /tests/A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lexman/tuttle/dab07db4a1e3e18c876deb2897c07be3935acd60/tests/A -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | from urllib2 import urlopen, HTTPError, URLError 2 | 3 | from tuttle.addons.netutils import hostname_resolves 4 | 5 | 6 | def is_online(): 7 | try: 8 | # If google is available, it mean Internet is up 9 | response = urlopen("http://www.google.com") 10 | some_data = response.read(0) 11 | except (HTTPError, URLError) as e: 12 | return False 13 | return True 14 | 15 | 16 | online = is_online() 17 | 18 | 19 | bad_resolving = hostname_resolves("this-host-does-not-exists") -------------------------------------------------------------------------------- /tests/functional_tests/A: -------------------------------------------------------------------------------- 1 | A 2 | -------------------------------------------------------------------------------- /tests/functional_tests/B: -------------------------------------------------------------------------------- 1 | D 2 | -------------------------------------------------------------------------------- /tests/functional_tests/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from tempfile import mkdtemp 3 | 4 | import sys 5 | from os import getcwd, chdir 6 | from shutil import rmtree, copy 7 | from functools import wraps 8 | from os.path import join, dirname 9 | from tuttle.commands import run, invalidate 10 | 11 | try: 12 | from StringIO import StringIO 13 | except ImportError: 14 | from io import StringIO 15 | 16 | 17 | def run_tuttle_file(content=None, threshold=-1, nb_workers=-1, keep_going=False, check_integrity=False): 18 | if content is not None: 19 | with open('tuttlefile', "w") as f: 20 | f.write(content.encode("utf8")) 21 | oldout, olderr = sys.stdout, sys.stderr 22 | out = StringIO() 23 | try: 24 | sys.stdout,sys.stderr = out, out 25 | rcode = run('tuttlefile', threshold=threshold, nb_workers=nb_workers, keep_going=keep_going, check_integrity=check_integrity) 26 | finally: 27 | sys.stdout, sys.stderr = oldout, olderr 28 | return rcode, out.getvalue() 29 | 30 | 31 | def tuttle_invalidate(project=None, urls=[]): 32 | if project is not None: 33 | with open('tuttlefile', "w") as f: 34 | f.write(project) 35 | oldout, olderr = sys.stdout, sys.stderr 36 | out = StringIO() 37 | try: 38 | sys.stdout, sys.stderr = out, out 39 | rcode = invalidate('tuttlefile', urls) 40 | finally: 41 | sys.stdout, sys.stderr = oldout, olderr 42 | return rcode, out.getvalue() 43 | 44 | 45 | def isolate(arg): 46 | if isinstance(arg, list): 47 | files = arg 48 | elif callable(arg): 49 | files = [] 50 | 51 | def wrap(func): 52 | funct_dir = dirname(func.func_globals['__file__']) 53 | 54 | @wraps(func) 55 | def wrapped_func(*args, **kwargs): 56 | tmp_dir = mkdtemp() 57 | for filename in files: 58 | src = join(funct_dir, filename) 59 | dst = join(tmp_dir, filename) 60 | copy(src, dst) 61 | cwd = getcwd() 62 | chdir(tmp_dir) 63 | try: 64 | return func(*args, **kwargs) 65 | finally: 66 | chdir(cwd) 67 | rmtree(tmp_dir) 68 | return wrapped_func 69 | if isinstance(arg, list): 70 | return wrap 71 | elif callable(arg): 72 | return wrap(arg) 73 | -------------------------------------------------------------------------------- /tests/functional_tests/b-produces-x.tuttle: -------------------------------------------------------------------------------- 1 | file://{{ x }} <- file://B 2 | echo B produces {{ x }} > {{ x }} 3 | -------------------------------------------------------------------------------- /tests/functional_tests/everything-produces-result.tuttle: -------------------------------------------------------------------------------- 1 | file://RESULT <- {% for input in inputs %}file://{{input}} {% endfor %} 2 | echo **{{ foo }}** 3 | echo everything produces RESULT > RESULT 4 | -------------------------------------------------------------------------------- /tests/functional_tests/test_errors/A: -------------------------------------------------------------------------------- 1 | D 2 | -------------------------------------------------------------------------------- /tests/functional_tests/test_errors/test_error_in_process.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import glob 4 | from os.path import isfile 5 | from tests.functional_tests import isolate, run_tuttle_file 6 | from tuttle.project_parser import ProjectParser 7 | from tuttle.tuttle_directories import TuttleDirectories 8 | from tuttle.workflow_runner import WorkflowRunner 9 | 10 | 11 | class TestErrorInProcess: 12 | 13 | @isolate(['A']) 14 | def test_error_in_process(self): 15 | """ When a process fail, Tuttle should exit with status code 2""" 16 | # As in Gnu Make 17 | 18 | first = """file://B <- file://A 19 | echo A produces B 20 | echo B > B 21 | 22 | file://C <- file://B 23 | Obvious syntax error 24 | echo This should not be written 25 | echo C > C 26 | 27 | file://D <- file://A 28 | echo A produces D 29 | echo D > D 30 | """ 31 | rcode, output = run_tuttle_file(first) 32 | assert rcode == 2 33 | assert output.find("::stderr") >= 0, output 34 | assert output.find("Obvious") >= 0, output 35 | assert output.find("Process ended with error code 1") >= 0, output 36 | pos_have_failed = output.find("have failed") 37 | assert pos_have_failed >= 0, output 38 | assert output.find("tuttlefile_5", pos_have_failed) >= 0, output 39 | 40 | @isolate(['A', 'test_error_in_process.py']) 41 | def test_isolation_decorator(self): 42 | files = glob.glob("*") 43 | assert set(files) == {'A', 'test_error_in_process.py'}, files 44 | # assert set(files) == set(['A', 'test_error_in_process.py']), files 45 | 46 | @isolate 47 | def test_isolation_decorator_without_args(self): 48 | assert True 49 | 50 | @isolate(['A']) 51 | def test_fail_if_already_failed(self): 52 | """ If a tuttlefile fails, then is changed, it should still fail if change is not related 53 | to the process that failed""" 54 | # As in Gnu Make 55 | first = """file://B <- file://A 56 | echo A produces B 57 | echo B > B 58 | 59 | file://C <- file://B 60 | error 61 | echo This should not be written 62 | echo C > C 63 | 64 | file://D <- file://A 65 | echo A produces D 66 | echo D > D 67 | """ 68 | rcode, output = run_tuttle_file(first) 69 | assert rcode == 2 70 | assert isfile('B') 71 | assert not isfile('C') 72 | # assert not isfile('D') 73 | second = """file://B <- file://A 74 | echo A produces B 75 | echo B > B 76 | 77 | file://C <- file://B 78 | error 79 | echo This should not be written 80 | echo C > C 81 | 82 | file://D <- file://A 83 | echo A produces D 84 | echo Minor change 85 | echo D > D 86 | """ 87 | rcode, output = run_tuttle_file(second) 88 | assert rcode == 2 89 | assert output.find("Workflow already failed") >= 0, output 90 | 91 | # Not sure about that 92 | @isolate(['A']) 93 | def test_fail_if_already_failed_even_without_outputs(self): 94 | """ When a process fail, Tuttle should exit with status code 2, even if the process has no outputs""" 95 | project = """file://B <- file://A 96 | echo A produces B 97 | echo B > B 98 | 99 | <- file://B 100 | error 101 | echo This should not be written 102 | echo C > C 103 | """ 104 | rcode, output = run_tuttle_file(project) 105 | assert rcode == 2 106 | assert isfile('B') 107 | assert not isfile('C') 108 | 109 | rcode, output = run_tuttle_file(project) 110 | assert rcode == 2 111 | assert output.find("Workflow already failed") >= 0, output 112 | 113 | @isolate(['A']) 114 | def test_process_fail_if_output_not_created(self): 115 | """ If the all the outputs of a process have not been created, the process should be marked as failed 116 | even if no error occurred. 117 | Useful when displaying html report 118 | """ 119 | 120 | first = """file://B <- file://A 121 | echo A won't produce B 122 | """ 123 | 124 | pp = ProjectParser() 125 | pp.set_project(first) 126 | workflow = pp.parse_extend_and_check_project() 127 | workflow.discover_resources() 128 | TuttleDirectories.straighten_out_process_and_logs(workflow) 129 | wr = WorkflowRunner(3) 130 | successes, failures = wr.run_parallel_workflow(workflow) 131 | assert failures, "Process should be marked as failed" 132 | assert failures[0].success is False, "Process should be marked as failed" 133 | -------------------------------------------------------------------------------- /tests/functional_tests/test_errors/test_error_in_workflow.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from tests.functional_tests import isolate, run_tuttle_file 4 | 5 | 6 | class TestErrorInWorkflow(): 7 | 8 | @isolate 9 | def test_missing_primary_resource(self): 10 | """ Should fail if a primary resource is missing""" 11 | project = """file://B <- file://A 12 | echo A produces B 13 | echo B > B 14 | """ 15 | rcode, output = run_tuttle_file(project) 16 | assert rcode == 2, rcode 17 | assert output.find("Missing") >= 0, output 18 | 19 | @isolate 20 | def test_circular_references(self): 21 | """ Should fail if a primary resource is missing""" 22 | project = """file://B <- file://A 23 | file://A <- file://B 24 | echo A produces B 25 | echo B > B 26 | """ 27 | rcode, output = run_tuttle_file(project) 28 | assert rcode == 2, rcode 29 | assert output.find("circular") >= 0, output 30 | 31 | @isolate 32 | def test_ambiguous_outputless_processes(self): 33 | """ Should not allow two outputless processes with same inputs""" 34 | project = """ <- file://A 35 | echo Do something with file A 36 | 37 | <- file://A 38 | echo Do something else with file A 39 | """ 40 | rcode, output = run_tuttle_file(project) 41 | assert rcode == 2, rcode 42 | assert output.find("both have exactly the same inputs") >= 0, output 43 | -------------------------------------------------------------------------------- /tests/functional_tests/test_errors/test_keep_going.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | from nose.plugins.skip import SkipTest 5 | from tests.functional_tests import isolate, run_tuttle_file 6 | 7 | 8 | class TestKeepGoing: 9 | 10 | @isolate(['A']) 11 | def test_keep_going(self): 12 | """ If tuttle is run with option keep_going, it should run all it can and not stop at first error""" 13 | # As in Gnu Make 14 | 15 | project = """file://B <- file://A 16 | Obvious error 17 | 18 | file://C <- file://B 19 | echo B produces C > C 20 | 21 | file://D <- file://A 22 | echo A produces D 23 | echo A produces D > D 24 | 25 | file://E <- file://A 26 | Another error 27 | """ 28 | rcode, output = run_tuttle_file(project, nb_workers=1, keep_going=True) 29 | assert rcode == 2 30 | assert output.find("::stderr") >= 0, output 31 | assert output.find("Obvious") >= 0, output 32 | assert output.find("Another") >= 0, output 33 | assert output.find("Process ended with error code 1") >= 0, output 34 | pos_have_failed = output.find("have failed") 35 | assert pos_have_failed >= 0, output 36 | assert output.find("tuttlefile_1", pos_have_failed) >= 0, output 37 | assert output.find("tuttlefile_11", pos_have_failed) >= 0, output 38 | 39 | @isolate(['A']) 40 | def test_keep_going_after_error_open(self): 41 | """ If a workflow fail, running it again with option keep_going, 42 | it should run all it can""" 43 | 44 | # The ordder matters 45 | project = """ 46 | file://B <- file://A 47 | echo A produces B > B 48 | echo A have produced B 49 | 50 | file://C <- file://A 51 | echo A won't produce C 52 | echo A won't produce C > C 53 | echo about to fail 54 | error 55 | 56 | file://D <- file://A 57 | echo A produces D > D 58 | echo A have produced D 59 | 60 | file://E <- file://A 61 | echo A produces E > E 62 | echo A have produced E 63 | 64 | file://F <- file://A 65 | echo A produces F > F 66 | echo A have produced F 67 | 68 | file://G <- file://A 69 | echo A produces G > G 70 | echo A have produced G 71 | 72 | file://H <- file://A 73 | echo A produces H > H 74 | echo A have produced H 75 | 76 | """ 77 | rcode1, output1 = run_tuttle_file(project, nb_workers=1) 78 | assert rcode1 == 2, output1 79 | # Hope that tuttle has not run this process 80 | nb_splits = len(output1.split("A have produced")) 81 | # We can't control the order in which tuttle run the processes 82 | # but we can control the order is ok to test 83 | if nb_splits >= 7: 84 | raise SkipTest("Damned ! The tests won't be accurate because tuttle choose to run the " 85 | "failing process last \n" + str(nb_splits) + "\n" + output1) 86 | 87 | rcode, output = run_tuttle_file(project, nb_workers=1, keep_going=True) 88 | assert rcode == 2, output1 + "\n" + output 89 | assert output.find("* file://C") == -1, output 90 | 91 | assert output.find("A have produced") >= 0, output 92 | 93 | @isolate(['A']) 94 | def test_keep_going_after_error_no_more_process_to_run(self): 95 | """ If a workflow fail, running it again with option keep_going, should not run another process if 96 | there nothing to run 97 | """ 98 | 99 | project = """file://B <- file://A 100 | echo A produces B > B 101 | echo about to fail 102 | error 103 | 104 | file://C <- file://A 105 | sleep 1 106 | echo A produces C > C 107 | echo A have produced C 108 | 109 | file://D <- file://B 110 | echo B produces D > D 111 | echo B have produced D 112 | """ 113 | rcode1, output1 = run_tuttle_file(project, nb_workers=2) 114 | assert rcode1 == 2, output1 115 | 116 | rcode, output = run_tuttle_file(project, nb_workers=2, keep_going=True) 117 | assert rcode == 2, output1 + "\n" + output 118 | assert output.find("* file://B") == -1, output 119 | 120 | assert output.find("Nothing to do") >= 0, output 121 | -------------------------------------------------------------------------------- /tests/functional_tests/test_errors/test_no_tuttlefile.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from subprocess import Popen, PIPE 3 | from os.path import abspath, join, dirname 4 | 5 | from tests.functional_tests import isolate 6 | 7 | 8 | class TestNoTuttlefile(): 9 | 10 | @isolate 11 | def test_no_file_in_current_dir(self): 12 | """ Should display a message if there is no tuttlefile in the current directory""" 13 | proc = Popen(['tuttle', 'run'], stdout=PIPE) 14 | output = proc.stdout.read() 15 | rcode = proc.wait() 16 | assert rcode == 2 17 | assert output.find("No tuttlefile") >= 0 18 | 19 | def test_tuttle_file_does_not_exist(self): 20 | """ Should display a message if the tuttlefile passed as argument to the command line does not exist""" 21 | proc = Popen(['tuttle', 'run', '-f', 'inexistant_file' ], stdout=PIPE) 22 | output = proc.stdout.read() 23 | rcode = proc.wait() 24 | assert rcode == 2 25 | assert output.find("No tuttlefile") >= 0 26 | -------------------------------------------------------------------------------- /tests/functional_tests/test_keyboardinterrupt.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os 3 | from subprocess import Popen, PIPE 4 | from time import sleep 5 | 6 | import signal 7 | from nose.plugins.skip import SkipTest 8 | 9 | from tests.functional_tests import isolate 10 | from tuttle.workflow import Workflow 11 | 12 | 13 | class TestKeyboardInterrupt: 14 | 15 | def setUp(self): 16 | if os.name != 'posix': 17 | raise SkipTest("Testing keyboard interrupt only works on *nix") 18 | 19 | @isolate(['A']) 20 | def test_interrupt_exit_code(self): 21 | """ Should exit with code code when interrupted """ 22 | project = """file://B <- file://A 23 | echo A produces B 24 | sleep 1 25 | echo B > B 26 | 27 | """ 28 | with open('tuttlefile', "w") as f: 29 | f.write(project) 30 | proc = Popen(['tuttle', 'run'], stdout=PIPE) 31 | 32 | sleep(0.5) 33 | proc.send_signal(signal.SIGINT) 34 | output = proc.stdout.read() 35 | rcode = proc.wait() 36 | assert rcode == 2, output 37 | # assert output.find("Process tuttlefile_1 aborted by user") > -1, output 38 | assert output.find("Interrupted") > -1, output 39 | w = Workflow.load() 40 | pB = w.find_process_that_creates("file://B") 41 | assert pB.end is not None, "Process that creates B should have ended" 42 | assert pB.success is False, "Process that creates B should have ended in error" 43 | assert pB.error_message.find("aborted") >= -1, "Process that creates B should be declared as aborted" 44 | 45 | @isolate(['A']) 46 | def test_relaunch_after_interrupt(self): 47 | """ Tuttle should run again after it has been interrupted""" 48 | project = """file://B <- file://A 49 | sleep 1 50 | echo B > B 51 | """ 52 | with open('tuttlefile', "w") as f: 53 | f.write(project) 54 | proc = Popen(['tuttle', 'run'], stdout=PIPE) 55 | 56 | sleep(0.5) 57 | proc.send_signal(signal.SIGINT) 58 | output = proc.stdout.read() 59 | rcode = proc.wait() 60 | assert rcode == 2, output 61 | 62 | proc = Popen(['tuttle', 'run'], stdout=PIPE, stderr=PIPE) 63 | rcode = proc.wait() 64 | err = proc.stderr.read() 65 | output = proc.stdout.read() 66 | assert rcode == 2, output 67 | assert output.find("already failed") > -1, output + "\n" + err 68 | 69 | @isolate(['A']) 70 | def test_relaunch_after_kill(self): 71 | """ Tuttle should run again after it has been killed (from bug)""" 72 | # raise SkipTest() 73 | project = """file://B <- file://A 74 | echo B > B 75 | sleep 1 76 | """ 77 | with open('tuttlefile', "w") as f: 78 | f.write(project) 79 | proc = Popen(['tuttle', 'run'], stdout=PIPE) 80 | 81 | sleep(0.5) 82 | proc.send_signal(signal.SIGKILL) 83 | output = proc.stdout.read() 84 | rcode = proc.wait() 85 | assert rcode == -signal.SIGKILL, output 86 | 87 | proc = Popen(['tuttle', 'run'], stdout=PIPE, stderr=PIPE) 88 | rcode = proc.wait() 89 | err = proc.stderr.read() 90 | assert err.find("DISCOVERED") < 0, err 91 | -------------------------------------------------------------------------------- /tests/functional_tests/test_preprocessors.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from os.path import isfile, join 3 | 4 | from tests.functional_tests import isolate, run_tuttle_file 5 | 6 | 7 | class TestPreprocessors: 8 | 9 | @isolate 10 | def test_preprocessor_is_run(self): 11 | """ When a preprocessor is declared, it should be run after parsing """ 12 | project = """|<< 13 | echo Running preprocess 14 | echo preprocess_has_run > preprocess_has_run 15 | """ 16 | rcode, output = run_tuttle_file(project) 17 | assert rcode == 0, output 18 | # Preprocesses should not leave anything after running 19 | # Kids, don't do it at home ! 20 | assert isfile('preprocess_has_run') 21 | 22 | @isolate 23 | def test_preprocessor_is_in_report(self): 24 | """ Preprocessors should appear in the report """ 25 | project = """|<< 26 | echo Running preprocess 27 | """ 28 | rcode, output = run_tuttle_file(project) 29 | assert rcode == 0, output 30 | report_path = join('.tuttle', 'report.html') 31 | assert isfile(report_path) 32 | report = open(report_path).read() 33 | # the code should appear in the html report 34 | code_pos = report.find("echo Running preprocess") 35 | assert code_pos > -1, code_pos 36 | 37 | @isolate 38 | def test_preprocesslogs_are_not_prefixed(self): 39 | """ Preprocessors should appear in the report """ 40 | project = """|<< 41 | echo Running preprocess line 1 42 | echo Running preprocess line 2 43 | """ 44 | rcode, output = run_tuttle_file(project) 45 | assert rcode == 0, output 46 | assert output.find("\nRunning preprocess line 2") >= 0, output 47 | 48 | @isolate(['A']) 49 | def test_no_preprocess_in_report(self): 50 | """ The report should not include a preprocess section if there are no preprocesses """ 51 | project = """file://B <- file://A 52 | echo A produces B > B 53 | """ 54 | rcode, output = run_tuttle_file(project) 55 | assert rcode == 0, output 56 | report_path = join('.tuttle', 'report.html') 57 | assert isfile(report_path) 58 | report = open(report_path).read() 59 | code_pos = report.find("reprocess") # skip the first letter in upper case 60 | assert code_pos == -1, code_pos 61 | 62 | @isolate(['A']) 63 | def test_preprocess_should_not_force_invalidation(self): 64 | """ The existance of preprocesses should not invalidate all the resources (from bug)""" 65 | project = """file://B <- file://A 66 | echo A produces B > B 67 | 68 | |<< 69 | echo Running preprocess 70 | """ 71 | rcode, output = run_tuttle_file(project) 72 | assert rcode == 0, output 73 | rcode, output = run_tuttle_file(project, threshold=0) 74 | assert rcode == 0, output 75 | pos = output.find("Nothing to do") 76 | assert pos >= 0, output 77 | 78 | def get_cmd_extend_workflow(self): 79 | """ 80 | :return: A command line to call tuttle-extend-workflow even if tuttle has not been installed with pip 81 | """ 82 | cmd_extend = 'tuttle-extend-workflow' 83 | return cmd_extend 84 | 85 | @isolate(['A', 'b-produces-x.tuttle']) 86 | def test_call_extend(self): 87 | """ A preprocess should be able to call the tuttle-extend-workflow command""" 88 | cmd_extend = self.get_cmd_extend_workflow() 89 | project = """file://B <- file://A 90 | echo A produces B > B 91 | 92 | |<< 93 | echo Expending workflow in preprocess 94 | echo "{cmd_extend} -h" 95 | {cmd_extend} -h 96 | """.format(cmd_extend=cmd_extend) 97 | rcode, output = run_tuttle_file(project) 98 | assert rcode == 0, "{} -> {}\n{}".format(cmd_extend, rcode, output) 99 | 100 | @isolate(['A', 'b-produces-x.tuttle']) 101 | def test_extend_workflow(self): 102 | """ One should be able to extend the workflow from a preprocess""" 103 | cmd_extend = self.get_cmd_extend_workflow() 104 | project = """file://B <- file://A 105 | echo A produces B > B 106 | 107 | |<< 108 | echo Running preprocess 109 | {cmd_extend} b-produces-x.tuttle x="C" 110 | """.format(cmd_extend=cmd_extend) 111 | rcode, output = run_tuttle_file(project) 112 | assert rcode == 0, output 113 | report_path = join('.tuttle', 'report.html') 114 | assert isfile(report_path) 115 | report = open(report_path).read() 116 | pos_A = report.find("file%3A//A") 117 | assert pos_A > -1, output 118 | pos_C = report.find("file%3A//C") 119 | assert pos_C > -1, report[pos_A:] 120 | 121 | @isolate(['A']) 122 | def test_pre_process_fails(self): 123 | """ A preprocess should be able to call the tuttle-extend-workflow command""" 124 | cmd_extend = self.get_cmd_extend_workflow() 125 | project = """file://B <- file://A 126 | echo Should not be executed 127 | echo A produces B > B 128 | 129 | |<< 130 | echo Failling 131 | Failling command 132 | """.format(cmd_extend=cmd_extend) 133 | rcode, output = run_tuttle_file(project) 134 | assert rcode != 0, "{} -> {}\n{}".format(cmd_extend, rcode, output) 135 | pos = output.find("Should not be executed") 136 | assert pos == -1, output 137 | 138 | @isolate(['A', 'b-produces-x.tuttle']) 139 | def test_extend_workflow_from_python(self): 140 | """ One should be able to extend the workflow from python a preprocess""" 141 | cmd_extend = self.get_cmd_extend_workflow() 142 | project = """file://B <- file://A 143 | echo A produces B > B 144 | 145 | |<< ! python 146 | from tuttle import extend_workflow 147 | print("Running a python preprocess") 148 | extend_workflow('b-produces-x.tuttle', x="C") 149 | """.format(cmd_extend=cmd_extend) 150 | rcode, output = run_tuttle_file(project) 151 | assert rcode == 0, output 152 | report_path = join('.tuttle', 'report.html') 153 | assert isfile(report_path) 154 | report = open(report_path).read() 155 | pos_A = report.find("file%3A//A") 156 | assert pos_A > -1, output 157 | pos_C = report.find("file%3A//C") 158 | assert pos_C > -1, report[pos_A:] 159 | -------------------------------------------------------------------------------- /tests/functional_tests/test_resources/A: -------------------------------------------------------------------------------- 1 | A 2 | -------------------------------------------------------------------------------- /tests/functional_tests/test_resources/test_file_resource.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf8 -*- 2 | from tempfile import mkdtemp 3 | from shutil import copytree, copy 4 | from os.path import isdir, join, isfile 5 | from tests.functional_tests import isolate, run_tuttle_file 6 | from tuttle.commands import invalidate 7 | 8 | from tuttle.resource import FileResource 9 | import tuttle.resource 10 | from os import path, listdir 11 | from tuttle.utils import CurrentDir 12 | 13 | 14 | def copycontent(src, dst): 15 | for elmt in listdir(src): 16 | src_elmt = join(src, elmt) 17 | dst_elmt = join(dst, elmt) 18 | if isdir(elmt): 19 | copytree(src_elmt, dst_elmt) 20 | else: 21 | copy(src_elmt, dst_elmt) 22 | 23 | 24 | class TestHttpResource(): 25 | 26 | def test_real_resource_exists(self): 27 | """A real resource should exist""" 28 | file_url = "file://{}".format(path.abspath(tuttle.resource.__file__)) 29 | res = FileResource(file_url) 30 | assert res.exists() 31 | 32 | def test_fictive_resource_exists(self): 33 | """A real resource should exist""" 34 | res = FileResource("fictive_file") 35 | assert not res.exists() 36 | 37 | @isolate(['A']) 38 | def test_relative_resource_is_attached_to_tuttlefile(self): 39 | """If you move a whole project, it must still work""" 40 | project = """file://B <- file://A 41 | echo A produces B >B 42 | echo A produces B 43 | """"" 44 | run_tuttle_file(project) 45 | assert isfile('B') 46 | tmp_dir = mkdtemp() 47 | copycontent('.', tmp_dir) 48 | assert isfile(join(tmp_dir, 'B')) 49 | with CurrentDir(tmp_dir): 50 | invalidate(join(tmp_dir, 'tuttlefile'), ['file://B']) 51 | assert isfile('B'), "File B in the origin project should still exist" 52 | assert not isfile(join(tmp_dir, 'B')), "File B in the copied project should have been removed" 53 | 54 | -------------------------------------------------------------------------------- /tests/functional_tests/test_standard_behaviour.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from os.path import isfile, exists, isdir 3 | 4 | from os import path 5 | from tests.functional_tests import isolate, run_tuttle_file 6 | from tuttle.tuttle_directories import TuttleDirectories 7 | 8 | 9 | class TestStandardBehaviour: 10 | 11 | @isolate(['A']) 12 | def test_create_resource(self): 13 | """ When launching "tuttle" in the command line, should produce the result""" 14 | first = """file://B <- file://A 15 | echo A produces B 16 | echo B > B 17 | 18 | """ 19 | rcode, output = run_tuttle_file(first) 20 | assert path.exists('B') 21 | 22 | @isolate(['A']) 23 | def test_create_report(self): 24 | """ When launching "tuttle" in the command line, should produce the result""" 25 | first = """file://B <- file://A 26 | echo A produces B 27 | echo B > B 28 | 29 | """ 30 | rcode, output = run_tuttle_file(first) 31 | assert path.exists(path.join('.tuttle', 'report.html')) 32 | 33 | @isolate(['A']) 34 | def test_report_execution(self): 35 | """ When launching "tuttle" in the command line, should produce the html report""" 36 | first = """file://B <- file://A 37 | echo A produces B 38 | echo B > B 39 | 40 | file://C <- file://B 41 | error 42 | echo B produces C 43 | echo C > C 44 | 45 | file://D <- file://C 46 | echo C produces D 47 | echo D > D 48 | """ 49 | rcode, output = run_tuttle_file(first) 50 | assert rcode == 2, output 51 | assert isfile(path.join(".tuttle", 'report.html')) 52 | assert path.isfile(path.join(".tuttle", "last_workflow.pickle")) 53 | second = """file://B <- file://A 54 | echo A produces B 55 | echo B > B 56 | 57 | file://C <- file://B 58 | another error 59 | echo B produces C 60 | echo C > C 61 | 62 | file://D <- file://C 63 | echo C produces D 64 | error 65 | echo D > D 66 | """ 67 | rcode, output = run_tuttle_file(second) 68 | assert rcode == 2 69 | report = file(path.join('.tuttle', 'report.html')).read() 70 | [_, sec1, sec2, sec3] = report.split('Start") >= 0, sec1 72 | assert sec2.find("Start") >= 0, sec2 73 | assert sec3.find("Start") == -1, sec3 74 | 75 | @isolate(['A']) 76 | def test_workflow_execution_should_stop_at_first_process_error(self): 77 | """ Should invalidate a resource if the code creating it changes 78 | """ 79 | project = """file://B <- file://A 80 | echo A produces B > B 81 | error 82 | 83 | file://C <- file://B 84 | echo B produces C > C 85 | """ 86 | rcode, output = run_tuttle_file(project) 87 | assert not path.exists('C') 88 | 89 | @isolate(['A']) 90 | def test_should_tell_if_already_ok(self): 91 | """ If nothing has to run, the user should be informed every thing is ok 92 | """ 93 | project = """file://B <- file://A 94 | echo A produces B > B 95 | echo A produces B 96 | """ 97 | rcode, output = run_tuttle_file(project) 98 | assert rcode == 0, output 99 | assert output.find("A produces B") >= 0, output 100 | rcode, output = run_tuttle_file(project) 101 | assert rcode == 0, output 102 | assert output.find("Nothing to do") >= 0, output 103 | 104 | @isolate(['A', 'tuttlefile']) 105 | def test_tuttlefile_should_be_in_utf8(self): 106 | """ If nothing has to run, the user should be informed every thing is ok 107 | """ 108 | rcode, output = run_tuttle_file() 109 | assert rcode == 0, output 110 | result = file('B').read().decode('utf8') 111 | assert result.find(u"du texte accentué") >= 0, result 112 | 113 | @isolate(['A']) 114 | def test_processes_paths(self): 115 | """ After a process has run, former logs and reserved_path should have moved according to 116 | the new name of the process 117 | """ 118 | project = """file://B <- file://A 119 | echo A produces B > B 120 | echo A has produced B 121 | """ 122 | rcode, output = run_tuttle_file(project) 123 | assert rcode == 0, output 124 | 125 | out_log = open(TuttleDirectories.tuttle_dir("processes", "logs", "tuttlefile_1_stdout.txt")).read() 126 | assert out_log.find("A has produced B") > -1, out_log 127 | 128 | assert exists(TuttleDirectories.tuttle_dir("processes", "tuttlefile_1")) 129 | # out_log = open(TuttleDirectories.tuttle_dir("processes", "tuttlefile_1")).read() 130 | # assert out_log.find("echo A has produced B") > -1, out_log 131 | 132 | project = """file://C <- file://A ! python 133 | f = open('C', 'w') 134 | f.write('A produces C') 135 | print('echo A has produced C') 136 | 137 | file://B <- file://A 138 | echo A produces B > B 139 | echo A has produced B 140 | """ 141 | rcode, output = run_tuttle_file(project) 142 | assert rcode == 0, output 143 | 144 | out_log = open(TuttleDirectories.tuttle_dir("processes", "logs", "tuttlefile_6_stdout.txt")).read() 145 | assert out_log.find("A has produced B") > -1, out_log 146 | 147 | reserved_path = TuttleDirectories.tuttle_dir("processes", "tuttlefile_6") 148 | assert exists(reserved_path) 149 | 150 | @isolate(['A']) 151 | def test_preprocesses_paths(self): 152 | """ After a workflow has run, logs and reserved path from preprocesses should be available (from bug) 153 | """ 154 | project = """file://B <- file://A 155 | echo A produces B > B 156 | echo A has produced B 157 | 158 | |<< 159 | echo Preprocess running 160 | """ 161 | rcode, output = run_tuttle_file(project) 162 | assert rcode == 0, output 163 | 164 | out_log = open(TuttleDirectories.tuttle_dir("processes", "logs", "tuttlefile_5_stdout.txt")).read() 165 | assert out_log.find("Preprocess running") > -1, out_log 166 | assert exists(TuttleDirectories.tuttle_dir("processes", "tuttlefile_5")) 167 | -------------------------------------------------------------------------------- /tests/functional_tests/tuttlefile: -------------------------------------------------------------------------------- 1 | file://B <- file://A 2 | echo du texte accentué > B 3 | -------------------------------------------------------------------------------- /tests/included_project.tuttle: -------------------------------------------------------------------------------- 1 | file://B <- file://A 2 | echo B > B -------------------------------------------------------------------------------- /tests/test_addons/.tuttlepass: -------------------------------------------------------------------------------- 1 | ftp://localhost.* user password 2 | -------------------------------------------------------------------------------- /tests/test_addons/A: -------------------------------------------------------------------------------- 1 | A 2 | -------------------------------------------------------------------------------- /tests/test_addons/a_lib.py: -------------------------------------------------------------------------------- 1 | def a_function(): 2 | return '42' 3 | -------------------------------------------------------------------------------- /tests/test_addons/bad_csv.csv: -------------------------------------------------------------------------------- 1 | country_name,country_code,population 2 | "Aruba","ABW",102911 3 | "Andorra","AND",79218 4 | "Afghanistan",30551674 5 | "Angola","AGO",21471618 6 | "Albania","ALB",2897366 7 | "Arab World","ARB",369762523 8 | "United Arab Emirates","ARE",9346129 9 | -------------------------------------------------------------------------------- /tests/test_addons/ftp/ftp_resource: -------------------------------------------------------------------------------- 1 | An ftp resource 2 | -------------------------------------------------------------------------------- /tests/test_addons/test.csv: -------------------------------------------------------------------------------- 1 | country_name,country_code,population 2 | "Aruba","ABW",102911 3 | "Andorra","AND",79218 4 | "Afghanistan","AFG",30551674 5 | "Angola","AGO",21471618 6 | "Albania","ALB",2897366 7 | "Arab World","ARB",369762523 8 | "United Arab Emirates","ARE",9346129 9 | -------------------------------------------------------------------------------- /tests/test_addons/test_ftp.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf8 -*- 2 | 3 | from time import sleep 4 | 5 | from os import remove 6 | 7 | from nose.plugins.skip import SkipTest 8 | from pyftpdlib.authorizers import DummyAuthorizer 9 | from pyftpdlib.handlers import FTPHandler 10 | from pyftpdlib.servers import FTPServer 11 | from os.path import dirname, join, exists 12 | 13 | from tests import online 14 | from tuttle.addons.ftp import FTPResource 15 | from tuttle.error import TuttleError 16 | from tuttle.project_parser import ProjectParser 17 | 18 | 19 | class TestFtpResource: 20 | 21 | ftpd = None 22 | p = None 23 | ftp_dir = join(dirname(__file__), 'ftp') 24 | 25 | @classmethod 26 | def run_server(cls): 27 | authorizer = DummyAuthorizer() 28 | authorizer.add_user("user", "password", cls.ftp_dir, perm="elrd") 29 | handler = FTPHandler 30 | handler.authorizer = authorizer 31 | cls.ftpd = FTPServer(("0.0.0.0", 8021), handler) 32 | cls.ftpd.serve_forever(timeout=0.2, handle_exit=True) 33 | 34 | @classmethod 35 | def setUpClass(cls): 36 | """ Run a web server in background to mock some specific HTTP behaviours 37 | """ 38 | from threading import Thread 39 | cls.p = Thread(target=cls.run_server) 40 | cls.p.start() 41 | sleep(0.1) # The server needs time to start 42 | 43 | @classmethod 44 | def tearDownClass(cls): 45 | """ Stop the http server in background 46 | """ 47 | cls.ftpd.close_all() 48 | cls.ftpd.ioloop.close() 49 | cls.p.join() 50 | to_rm = join(cls.ftp_dir, 'to_remove') 51 | if exists(to_rm): 52 | remove(to_rm) 53 | 54 | def test_resource_exists(self): 55 | """A mocked ftp resource should exist""" 56 | res = FTPResource("ftp://localhost:8021/ftp_resource") 57 | res.set_authentication("user", "password") 58 | assert res.exists() 59 | 60 | def test_missing_resource_should_not_exists(self): 61 | """A mocked ftp resource should exist""" 62 | res = FTPResource("ftp://localhost:8021/not_an_ftp_resource") 63 | res.set_authentication("user", "password") 64 | assert not res.exists() 65 | 66 | def test_raise_if_wrong_credentials(self): 67 | """A real resource should exist""" 68 | # Or by a local http server 69 | res = FTPResource("ftp://localhost:8021/ftp_resource_without_cred") 70 | res.set_authentication("user", "bad_password") 71 | try: 72 | res.exists() 73 | assert False, "exists should have raised" 74 | except: 75 | assert True 76 | 77 | def test_delete(self): 78 | """ When an ftp resource is deeted it shouldn't exist anymore""" 79 | with open(join(self.ftp_dir, 'to_remove'), 'w') as f: 80 | f.write("Will be removed\n") 81 | 82 | res = FTPResource("ftp://localhost:8021/to_remove") 83 | res.set_authentication("user", "password") 84 | assert res.exists() 85 | res.remove() 86 | assert not res.exists() 87 | 88 | def test_signature(self): 89 | """ Should return a signature for an ftp resource """ 90 | res = FTPResource("ftp://localhost:8021/ftp_resource") 91 | res.set_authentication("user", "password") 92 | assert res.exists() 93 | s = res.signature() 94 | assert s == 'sha1-32K: 4627d1a3557c0c75698b70df9f17c8654f734f55', s 95 | 96 | def test_signature_raises_if_bad_credentials(self): 97 | """ If crendentials are wrong, signarue() should raise """ 98 | res = FTPResource("ftp://localhost:8021/ftp_resource") 99 | res.set_authentication("user", "bad_password") 100 | try: 101 | s = res.signature() 102 | assert False, "signature should have raised" 103 | except: 104 | assert True 105 | 106 | def test_ftp_resource_in_workflow(self): 107 | """An HTTPS resource should be allowed in a workflow""" 108 | pp = ProjectParser() 109 | project = " <- ftp://localhost/ftp_resource" 110 | pp.set_project(project) 111 | workflow = pp.parse_project() 112 | assert len(workflow._processes) == 1 113 | inputs = [res for res in workflow._processes[0].iter_inputs()] 114 | assert len(inputs) == 1 115 | 116 | def test_real_resource_exists(self): 117 | """A real ftp resource should exist""" 118 | if not online: 119 | raise SkipTest("Offline") 120 | res = FTPResource("ftp://ftp.free.fr/mirrors/ftp.ubuntu.com/releases/HEADER.html") 121 | try: 122 | assert res.exists() 123 | except TuttleError as e: 124 | if e.message.find("425"): 125 | raise SkipTest("Offline") 126 | 127 | -------------------------------------------------------------------------------- /tests/test_addons/test_hdfs.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf8 -*- 2 | from os import remove, getcwd 3 | from os.path import dirname, join, abspath, exists 4 | from urllib import urlretrieve 5 | from nose.plugins.skip import SkipTest 6 | from setuptools.archive_util import unpack_tarfile 7 | 8 | from tuttle.addons.hdfs import HDFSResource 9 | from snakebite.minicluster import MiniCluster 10 | import sys, os 11 | 12 | class TestHdfsResource: 13 | 14 | #testfiles_path = join(dirname(abspath(__file__)), "testfiles") 15 | testfiles_path = dirname(abspath(__file__)) 16 | cluster = None 17 | 18 | @classmethod 19 | def setUpClass(cls): 20 | if 'HADOOP_HOME' not in os.environ or not os.environ['HADOOP_HOME']: 21 | raise SkipTest("Hadoop not installed") 22 | 23 | if not cls.cluster: 24 | c = MiniCluster(None, start_cluster=False) 25 | result = c.ls("/") 26 | if result: 27 | raise Exception("An active Hadoop cluster is found! Not running tests!") 28 | 29 | cls.cluster = MiniCluster(cls.testfiles_path) 30 | result = cls.cluster.ls("/") 31 | if result: 32 | raise Exception("An active Hadoop cluster is found! Not running tests!") 33 | cls.cluster.put("/A", "/A") 34 | cls.cluster.put("/A", "/file_to_be_deleted") 35 | cls.cluster.mkdir("/dir") 36 | cls.cluster.mkdir("/dir_to_be_deleted") 37 | cls.cluster.put("/A", "/dir_to_be_deleted/inside_dir") 38 | 39 | @classmethod 40 | def tearDownClass(cls): 41 | if cls.cluster: 42 | cls.cluster.terminate() 43 | 44 | def test_file_resource_exists(self): 45 | """A mocked hdfs file resource should exist""" 46 | res = HDFSResource("hdfs://localhost:{}/A".format(self.cluster.port)) 47 | assert res.exists() 48 | 49 | def test_directory_resource_exists(self): 50 | """A mocked hdfs directory resource should exist""" 51 | res = HDFSResource("hdfs://localhost:{}/dir".format(self.cluster.port)) 52 | assert res.exists() 53 | 54 | def test_resource_not_exists(self): 55 | """An hdfs resource not mocked should not exist""" 56 | res = HDFSResource("hdfs://localhost:{}/B".format(self.cluster.port)) 57 | assert not res.exists() 58 | 59 | def test_resource_with_bad_credentials_should_raise(self): 60 | """ An hdfs resource with bad credentials should raise """ 61 | raise SkipTest() # Minicluster does not seam to implement authentication. Does snakebite ? 62 | res = HDFSResource("hdfs://localhost:{}/A".format(self.cluster.port)) 63 | res.set_authentication('foo', 'bar') 64 | try: 65 | res.exists() 66 | assert False, "exists should have raised" 67 | except: 68 | assert True 69 | 70 | def test_delete_file(self): 71 | """ When an hdfs resource is deleted it shouldn't exist anymore""" 72 | res = HDFSResource("hdfs://localhost:{}/file_to_be_deleted".format(self.cluster.port)) 73 | assert res.exists() 74 | res.remove() 75 | assert not res.exists() 76 | 77 | def test_delete_dir(self): 78 | """ An hdfs directory can be deleted, even if not empty """ 79 | res = HDFSResource("hdfs://localhost:{}/dir_to_be_deleted".format(self.cluster.port)) 80 | assert res.exists() 81 | res.remove() 82 | assert not res.exists() 83 | 84 | def test_file_signature(self): 85 | """ An hdfs file has a signature """ 86 | res = HDFSResource("hdfs://localhost:{}/A".format(self.cluster.port)) 87 | assert res.signature().startswith("modification_time:"), res.signature() 88 | 89 | def test_dir_signature(self): 90 | """ An hdfs file has a signature """ 91 | res = HDFSResource("hdfs://localhost:{}/dir".format(self.cluster.port)) 92 | assert res.signature() == "d", res.signature() 93 | 94 | 95 | def install_hadoop(): 96 | try: 97 | import tests 98 | hadoop_path = join(dirname(tests.__file__), 'hadoop') 99 | except: 100 | hadoop_path = join('tests', 'hadoop') 101 | if not exists(hadoop_path): 102 | print("Installing hadoop 2.9.1 in {}".format(hadoop_path)) 103 | url = "http://apache.mirrors.ovh.net/ftp.apache.org/dist/hadoop/common/hadoop-2.9.1/hadoop-2.9.1.tar.gz" 104 | if not exists("hadoop-2.9.1.tar.gz"): 105 | print("Downloading from {} to {}".format(url, getcwd())) 106 | urlretrieve(url, "hadoop-2.9.1.tar.gz") 107 | print("Unzipping to {}".format(hadoop_path)) 108 | unpack_tarfile("hadoop-2.9.1.tar.gz", hadoop_path) 109 | if os.name=="posix": 110 | with open(join(hadoop_path, "vars.sh"), "w") as f: 111 | f.write('export HADOOP_HOME="{}"\n'.format(join(hadoop_path, "hadoop-2.9.1"))) 112 | if os.name=="nt": 113 | with open(join(hadoop_path, "vars.bat"), "w") as f: 114 | f.write('HADOOP_HOME="{}"\n'.format(join(hadoop_path, "hadoop-2.9.1"))) 115 | remove("hadoop-2.9.1.tar.gz") 116 | 117 | else: 118 | print("Hadoop already installed in {}".format(hadoop_path)) 119 | 120 | 121 | if __name__ == '__main__': 122 | if len(sys.argv) == 2 and sys.argv[1] == 'install': 123 | install_hadoop() 124 | -------------------------------------------------------------------------------- /tests/test_addons/test_pycurl.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf8 -*- 2 | import pycurl 3 | import sys 4 | from unittest.case import SkipTest 5 | 6 | from tests import online 7 | 8 | 9 | class TestPyCurl: 10 | 11 | def test_pycurl_download_progress(self): 12 | if not online: 13 | raise SkipTest("Can't test download offline") 14 | def progress(download_t, download_d, upload_t, upload_d): 15 | print("Total to download {}\n".format(download_t)) 16 | print("Total downloaded {}\n".format(download_d)) 17 | print("Total to upload {}\n".format(upload_t)) 18 | print("Total uploaded {}\n".format(upload_d)) 19 | 20 | with open('out.html', 'wb') as f: 21 | c = pycurl.Curl() 22 | c.setopt(c.URL, 'http://pycurl.io/') 23 | #c.setopt(c.URL, 'http://planet.osm.org/pbf/planet-latest.osm.pbf') 24 | c.setopt(pycurl.USERAGENT, "python test") 25 | c.setopt(c.NOPROGRESS, False) 26 | c.setopt(c.XFERINFOFUNCTION, progress) 27 | c.setopt(c.WRITEDATA, f) 28 | c.perform() 29 | c.close() 30 | 31 | -------------------------------------------------------------------------------- /tests/test_addons/test_python.py: -------------------------------------------------------------------------------- 1 | 2 | # -*- coding: utf8 -*- 3 | from os.path import join, isfile 4 | from tests.functional_tests import isolate, run_tuttle_file 5 | from tuttle.addons.sqlite import SQLiteResource, SQLiteTuttleError 6 | from tuttle.project_parser import ProjectParser 7 | 8 | 9 | class TestPythonProcessor(): 10 | 11 | def test_python_processor_should_be_availlable(self): 12 | """A project with an python processor should be Ok""" 13 | project = "file://B <- file://A ! python" 14 | pp = ProjectParser() 15 | pp.set_project(project) 16 | pp.read_line() 17 | process = pp.parse_dependencies_and_processor() 18 | assert process._processor.name == "python" 19 | 20 | 21 | @isolate(['A']) 22 | def test_python_processor(self): 23 | """A python process should run""" 24 | project = u"""file://B <- file://A ! python 25 | 26 | from time import time 27 | print("A python process at {}".format(time())) 28 | print("texte accentué") 29 | open('B', 'w').write('A produces B') 30 | """ 31 | rcode, output = run_tuttle_file(project) 32 | assert rcode == 0, output 33 | assert output.find("A python process") >= 0 34 | 35 | 36 | @isolate(['A']) 37 | def test_error_in_python_processor(self): 38 | """ If an error occurs, tuttle should fail and output logs should trace the error""" 39 | project = """file://B <- file://A ! python 40 | 41 | open('B', 'w').write('A produces B') 42 | a = 0 43 | print("should raise an error : {}".format(0 / a)) 44 | """ 45 | rcode, output = run_tuttle_file(project) 46 | assert rcode == 2 47 | error_log = open(join('.tuttle', 'processes', 'logs', 'tuttlefile_1_err.txt')).read() 48 | assert error_log.find('ZeroDivisionError:') >= 0, error_log 49 | 50 | # @isolate(['tests.sqlite']) 51 | # def test_comments_in_process(self): 52 | # """ If an error occurs, tuttle should fail and output logs should trace the error""" 53 | # project = """sqlite://tests.sqlite/tables/new_table <- sqlite://tests.sqlite/tables/test_table ! sqlite 54 | # CREATE TABLE new_table AS SELECT * FROM test_table; 55 | # -- This is a comment 56 | # /* last comment style*/ 57 | # """ 58 | # rcode, output = run_tuttle_file(project) 59 | # error_log = open(join('.tuttle', 'processes', 'logs', 'tuttlefile_1_err')).read() 60 | # assert rcode == 0, error_log 61 | # assert output.find("comment") >= 0 62 | 63 | @isolate(['A', 'a_lib.py']) 64 | def test_import_library(self): 65 | """ If an error occurs, tuttle should fail and output logs should trace the error""" 66 | project = """file://B <- file://A ! python 67 | import a_lib 68 | open('B', 'w').write(a_lib.a_function()) 69 | """ 70 | rcode, output = run_tuttle_file(project) 71 | assert rcode == 0, output 72 | B_contents = open('B').read() 73 | assert(B_contents == '42') 74 | -------------------------------------------------------------------------------- /tests/test_addons/test_s3_addon.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf8 -*- 2 | from tempfile import mkdtemp 3 | from shutil import rmtree 4 | from os import makedirs, environ 5 | from os.path import join 6 | from unittest.case import SkipTest 7 | 8 | from tests.functional_tests import run_tuttle_file 9 | from s3server import start, stop 10 | from tuttle.project_parser import ProjectParser 11 | from tuttle.addons.s3 import S3Resource 12 | from tests import bad_resolving 13 | 14 | 15 | class TestS3Resource: 16 | 17 | server_thread = None 18 | tmp_dir = None 19 | ioloop = None 20 | 21 | @classmethod 22 | def run_server(cls): 23 | cls.tmp_dir = mkdtemp() 24 | bucket_dir = join(cls.tmp_dir, "test_bucket") 25 | makedirs(bucket_dir) 26 | test_key_file = join(bucket_dir, "test_key") 27 | open(test_key_file, "w").close() 28 | key_for_removal = join(bucket_dir, "key_for_removal") 29 | open(key_for_removal, "w").close() 30 | from tornado import ioloop 31 | cls.ioloop = ioloop.IOLoop.current() 32 | start(8069, root_directory=cls.tmp_dir) 33 | 34 | @classmethod 35 | def stop_server(cls): 36 | stop(cls.ioloop) 37 | 38 | @classmethod 39 | def setUpClass(cls): 40 | """ Run a S3 compatible server mock 41 | """ 42 | from threading import Thread 43 | cls.server_thread = Thread(target=cls.run_server) 44 | cls.server_thread.start() 45 | # Set environment variable to ensure client authentication 46 | environ['AWS_ACCESS_KEY_ID'] = "MY_AWS_ACCOUNT" 47 | environ['AWS_SECRET_ACCESS_KEY'] = "MY_AWS_PASSWORD" 48 | 49 | @classmethod 50 | def tearDownClass(cls): 51 | """ Stop the S3 server in background 52 | """ 53 | cls.stop_server() 54 | cls.server_thread.join() 55 | rmtree(cls.tmp_dir) 56 | 57 | def test_resource_properties(self): 58 | """An s3 resource should have an endpoint, a bucket and a key""" 59 | res = S3Resource("s3://localhost:8069/test_bucket/test_key") 60 | assert res._endpoint == 'http://localhost:8069', res._endpoint 61 | assert res._bucket == 'test_bucket', res._bucket 62 | assert res._key == 'test_key', res._key 63 | 64 | def test_real_resource_exists(self): 65 | """A real s3 resource should exist""" 66 | res = S3Resource("s3://localhost:8069/test_bucket/test_key") 67 | assert res.exists() 68 | 69 | def test_fictive_resource_not_exists(self): 70 | """A fictive resource should not exist""" 71 | res = S3Resource("s3://localhost:8069/test_bucket/i_dont_exist") 72 | assert not res.exists() 73 | 74 | def test_s3_resource_in_workflow(self): 75 | """An s3 resource should be allowed in a workflow""" 76 | pp = ProjectParser() 77 | project = "<- s3://localhost:8069/test_bucket/test_key" 78 | pp.set_project(project) 79 | workflow = pp.parse_project() 80 | assert len(workflow._processes) == 1 81 | inputs = [res for res in workflow._processes[0].iter_inputs()] 82 | assert len(inputs) == 1, len(inputs) 83 | assert inputs[0].scheme == "s3", inputs[0].scheme 84 | 85 | def test_resource_signature(self): 86 | """ An S3 signature is the Etag of the web object""" 87 | res = S3Resource("s3://localhost:8069/test_bucket/test_key") 88 | sig = res.signature() 89 | assert sig == '"da39a3ee5e6b4b0d3255bfef95601890afd80709"', sig 90 | 91 | def test_remove_s3_resource(self): 92 | """remove() should remove the resource""" 93 | res = S3Resource("s3://localhost:8069/test_bucket/key_for_removal") 94 | assert res.exists() 95 | res.remove() 96 | assert not res.exists() 97 | 98 | def test_when_host_is_unknown_should_display_message(self): 99 | """ Should display a message if tuttle cant connect to database because host does not exists """ 100 | if bad_resolving: 101 | raise SkipTest("Skipping test because of resolving faillure on the host") 102 | project = """<- s3://no-s3-host.com:8069/test_bucket/test_key 103 | echo "Test" 104 | """ 105 | rcode, output = run_tuttle_file(project) 106 | assert rcode == 2, output 107 | assert output.find("Unknown host") > -1, output 108 | 109 | # TODO 110 | # Maybe we can create a specific tuttle exception that could also be valid at least 111 | # with postgresql resources. Usable for files ? 112 | def test_invalid_credential_should_make_resource_not_exist(self): 113 | """ If bad credentials, resource should be considered as not existing """ 114 | del environ['AWS_ACCESS_KEY_ID'] 115 | del environ['AWS_SECRET_ACCESS_KEY'] 116 | try: 117 | res = S3Resource("s3://localhost:8069/test_bucket/test_key") 118 | assert not res.exists() 119 | finally: 120 | environ['AWS_ACCESS_KEY_ID'] = "MY_AWS_ACCOUNT" 121 | environ['AWS_SECRET_ACCESS_KEY'] = "MY_AWS_PASSWORD" 122 | -------------------------------------------------------------------------------- /tests/test_addons/tests.sqlite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lexman/tuttle/dab07db4a1e3e18c876deb2897c07be3935acd60/tests/test_addons/tests.sqlite -------------------------------------------------------------------------------- /tests/test_addons/utf8.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lexman/tuttle/dab07db4a1e3e18c876deb2897c07be3935acd60/tests/test_addons/utf8.csv -------------------------------------------------------------------------------- /tests/test_authentication.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os 3 | 4 | from cStringIO import StringIO 5 | from nose.plugins.skip import SkipTest 6 | 7 | from tuttle.workflow_builder import MalformedTuttlepassError, tuttlepass_file, ResourceAuthenticator 8 | from tuttle.utils import EnvVar 9 | 10 | 11 | class TestAuthentication: 12 | 13 | def test_path_on_linux(self): 14 | """ On linux, default path must be on ~/.tuttlepass """ 15 | if os.name != 'posix': 16 | raise SkipTest("This test is valid only on Linux") 17 | assert tuttlepass_file().endswith("/.tuttlepass"), tuttlepass_file() 18 | 19 | def test_path_on_windows(self): 20 | """ On Windows, default path must be on XXX """ 21 | if os.name != 'nt': 22 | raise SkipTest("This test is valid only on Windows") 23 | assert tuttlepass_file().endswith("\.tuttlepass"), tuttlepass_file() 24 | 25 | def test_path_with_env_var(self): 26 | """ if environnement variable TUTTLEPASSPAHT """ 27 | with EnvVar('TUTTLEPASSFILE', 'bar'): 28 | assert tuttlepass_file() == "bar" 29 | 30 | def test_bad_regexp(self): 31 | rules = """ 32 | http://g[ithub.com/\tuser\tpassword 33 | """ 34 | passfile = StringIO(rules) 35 | try: 36 | auth = ResourceAuthenticator(passfile) 37 | assert False, "should not be here" 38 | except MalformedTuttlepassError as e: 39 | assert e.message.find("Parse error on regular expression") > -1, e.message 40 | 41 | def test_field_missing(self): 42 | rules = """ 43 | http://github.com/\tuser 44 | """ 45 | passfile = StringIO(rules) 46 | try: 47 | auth = ResourceAuthenticator(passfile) 48 | assert False, "should not be here" 49 | except MalformedTuttlepassError as e: 50 | assert e.message.find("wrong number of fields") > -1, e.message 51 | 52 | def test_two_many_fields(self): 53 | rules = """ 54 | http://github.com/\tuser\tpassword\textra 55 | """ 56 | passfile = StringIO(rules) 57 | try: 58 | auth = ResourceAuthenticator(passfile) 59 | assert False, "should not be here" 60 | except MalformedTuttlepassError as e: 61 | assert e.message.find("wrong number of fields") > -1, e.message 62 | 63 | def test_comment_is_allowed(self): 64 | """ When there is a comment, spaces before # are not considered as part of the password """ 65 | rules = """ 66 | http://github.com/\tuser\tpassword # comment 67 | """ 68 | passfile = StringIO(rules) 69 | auth = ResourceAuthenticator(passfile) 70 | user, password = auth.get_auth("http://github.com/") 71 | assert user == "user", user 72 | assert password == "password", password 73 | 74 | def test_regexp(self): 75 | """ When there is a comment, spaces before # are not considered as part of the password """ 76 | rules = """ 77 | http://.*github.com/\tuser\tpassword 78 | """ 79 | passfile = StringIO(rules) 80 | auth = ResourceAuthenticator(passfile) 81 | user, password = auth.get_auth("http://github.com/") 82 | assert user == "user", user 83 | assert password == "password", password 84 | 85 | user, password = auth.get_auth("http://www.github.com/") 86 | assert user == "user", user 87 | assert password == "password", password 88 | 89 | def test_several_regexp(self): 90 | """ url can be captured by the second regex """ 91 | rules = """ 92 | http://.*github.com/\tuser\tpassword 93 | http://.*python.org\tuser2\tpassword2 94 | """ 95 | passfile = StringIO(rules) 96 | auth = ResourceAuthenticator(passfile) 97 | print auth._rules 98 | user, password = auth.get_auth("http://python.org") 99 | assert user == "user2", user 100 | assert password == "password2", "'{}'".format(password) 101 | 102 | def test_partial_regexp(self): 103 | """ When there is a comment, spaces before # are not considered as part of the password """ 104 | rules = """ 105 | github\tuser\tpassword 106 | """ 107 | passfile = StringIO(rules) 108 | auth = ResourceAuthenticator(passfile) 109 | user, password = auth.get_auth("http://github.com/") 110 | assert user == "user", user 111 | assert password == "password", password 112 | 113 | def test_no_match(self): 114 | """ When there is a comment, spaces before # are not considered as part of the password """ 115 | rules = """ 116 | github\tuser\tpassword 117 | """ 118 | passfile = StringIO(rules) 119 | auth = ResourceAuthenticator(passfile) 120 | user, password = auth.get_auth("http://linux.com/") 121 | assert user is None, user 122 | assert password is None, password 123 | -------------------------------------------------------------------------------- /tests/test_figures_formating.py: -------------------------------------------------------------------------------- 1 | from tuttle.error import TuttleError 2 | from tuttle.figures_formating import nice_size, nice_duration, parse_duration 3 | 4 | 5 | class TestFileSizeFormating: 6 | 7 | def test_nice_size_B(self): 8 | """ A number below 1 000 B should be expressed in B""" 9 | nice = nice_size(12) 10 | assert nice == "12 B", nice 11 | 12 | def test_nice_size_KB(self): 13 | """ A number below 1 000 000 B should be expressed in KB""" 14 | nice = nice_size(12034) 15 | assert nice == "11.7 KB", nice 16 | 17 | def test_nice_size_MB(self): 18 | """ A number below 1 000 000 0000 B should be expressed in MB""" 19 | nice = nice_size(12056000) 20 | assert nice == "11.4 MB", nice 21 | 22 | def test_nice_size_MB_after_dot(self): 23 | """ A number below 1 000 000 0000 B should be expressed in MB""" 24 | nice = nice_size(12506000) 25 | assert nice == "11.9 MB", nice 26 | 27 | def test_nice_size_GB(self): 28 | """ A number below 1 000 000 0000 000 B should be expressed in GB""" 29 | nice = nice_size(12049000000) 30 | assert nice == "11.2 GB", nice 31 | 32 | 33 | class TestDurationFormating: 34 | 35 | def test_nice_duration_s(self): 36 | """ A duration below the minute should be expressed in seconds""" 37 | nice = nice_duration(12) 38 | assert nice == "12s", nice 39 | 40 | def test_nice_duration_min(self): 41 | """ A duration below the hour should be expressed in minutes and seconds""" 42 | nice = nice_duration(64) 43 | assert nice == "1min 4s", nice 44 | 45 | def test_nice_size_hour(self): 46 | """ A duration below the day should be expressed in hours and minutes""" 47 | nice = nice_duration(10000) 48 | assert nice == "2h 46min", nice 49 | 50 | def test_nice_size_day(self): 51 | """ A duration above the day should be expressed in days and hours""" 52 | nice = nice_duration(1000000) 53 | assert nice == "11d 13h", nice 54 | 55 | def test_nice_duration_ms(self): 56 | """ A duration must be rounded to seconds""" 57 | nice = nice_duration(73.3) 58 | assert nice == "1min 13s", nice 59 | 60 | 61 | class TestDurationParsing: 62 | 63 | def test_parse_negative_value(self): 64 | """ Should raise if the expression is negative because a duration can't be negative""" 65 | try: 66 | d = parse_duration("-1") 67 | assert False, "Should have raised" 68 | except ValueError as e: 69 | assert True 70 | 71 | def test_parse_seconds(self): 72 | """ should interpret s as seconds """ 73 | d = parse_duration("12s") 74 | assert d == 12, d 75 | 76 | def test_parse_bad_expression(self): 77 | """ Should raise if the expression isn't a duration""" 78 | try: 79 | d = parse_duration("Not a number, Bro") 80 | assert False, "Should have raised" 81 | except ValueError as e: 82 | assert True 83 | 84 | def test_parse_minutes_secs(self): 85 | """ A duration can have minutes and seconds """ 86 | d = parse_duration("14min 12s") 87 | assert d == 14*60 + 12, d 88 | 89 | def test_parse_minutes(self): 90 | """ A duration can have only minutes """ 91 | d = parse_duration("14min") 92 | assert d == 14*60, d 93 | 94 | def test_parse_several_spaces(self): 95 | """ Figures and units also parts of the duration can be separated by any number of spaces """ 96 | d = parse_duration("14 min 12 s") 97 | assert d == 14*60 + 12, d 98 | 99 | def test_parse_hours(self): 100 | """ A duration can have hours """ 101 | d = parse_duration("3 h 12s") 102 | assert d == 3*3600 + 12, d 103 | 104 | def test_parse_days(self): 105 | """ A duration can have days """ 106 | d = parse_duration("4d 12s") 107 | assert d == 4*24*3600 + 12, d 108 | -------------------------------------------------------------------------------- /tests/test_file.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from tests.functional_tests import isolate 4 | from tuttle.resource import FileResource 5 | import os 6 | 7 | 8 | # TODO what about symlinks ? 9 | class TestFile(): 10 | 11 | 12 | @isolate 13 | def test_directory_should_be_removable(self): 14 | """ if a file resource is a directory it should be removable """ 15 | os.mkdir('a_dir') 16 | assert os.path.isdir('a_dir') 17 | r = FileResource("file://a_dir") 18 | r.remove() 19 | assert not os.path.exists('a_dir') 20 | 21 | @isolate 22 | def test_directory_should_be_removable_even_if_not_empty(self): 23 | """ if a file resource is a directory it should be removable even if it contains files """ 24 | os.mkdir('a_dir') 25 | open('a_dir/A', 'w').write('A') 26 | assert os.path.isdir('a_dir') 27 | r = FileResource("file://a_dir") 28 | r.remove() 29 | assert not os.path.exists('a_dir') 30 | 31 | @isolate 32 | def test_directory_should_have_a_signature(self): 33 | """ if a file resource is a directory it should be removable """ 34 | os.mkdir('a_dir') 35 | assert os.path.isdir('a_dir') 36 | r = FileResource("file://a_dir") 37 | sig = r.signature() 38 | # TODO should a directory have a signature resulting of its content ? 39 | assert sig.startswith("sha1:"), sig 40 | -------------------------------------------------------------------------------- /tests/test_processors/test_bat_processor.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf8 -*- 2 | from glob import glob 3 | from tests.functional_tests import isolate 4 | from tuttle.process import Process 5 | from tuttle.processors import BatProcessor 6 | from os.path import join 7 | 8 | class TestBatProcessor(): 9 | 10 | @isolate 11 | def test_executable_generation(self): 12 | """Should generate an executable""" 13 | bp = BatProcessor() 14 | code = "bla bla\nblou" 15 | processor = BatProcessor() 16 | process = Process(processor, "tuttlefile", 23) 17 | process.set_code(code) 18 | bp.generate_executable(process, "tuttlefile_25") 19 | content = open(join("tuttlefile_25", "tuttlefile_23.bat")).read() 20 | assert content.startswith("@echo off") 21 | assert content.find("bla bla") >= 0 22 | assert content.find("blou") >= 0 23 | -------------------------------------------------------------------------------- /tests/test_processors/test_shell_processor.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf8 -*- 2 | 3 | 4 | from nose.tools import * 5 | from tests.functional_tests import isolate 6 | from tuttle.process import Process 7 | from tuttle.processors import ShellProcessor 8 | from os import remove 9 | 10 | 11 | class TestShellProcessor(): 12 | 13 | @isolate 14 | def test_executable_generation(self): 15 | """Should generate an executable""" 16 | sp = ShellProcessor() 17 | code = "bla bla\nblou" 18 | processor = ShellProcessor() 19 | process = Process(processor, "tuttlefile", 12) 20 | process.set_code(code) 21 | sp.generate_executable(process, "tuttlefile_12") 22 | content = open("tuttlefile_12").read() 23 | assert content.startswith("#!") 24 | assert content.endswith(code) 25 | -------------------------------------------------------------------------------- /tests/test_report.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from os.path import isfile, join 3 | from re import search, DOTALL, findall 4 | 5 | from tests.functional_tests import isolate, run_tuttle_file 6 | 7 | 8 | class TestReport: 9 | 10 | @isolate(['A']) 11 | def test_success(self): 12 | """ If a workflow finishes with all processes in success, it should display success in the main title""" 13 | project = """file://B <- file://A 14 | echo A produces B 15 | echo B > B 16 | """ 17 | rcode, output = run_tuttle_file(project) 18 | assert rcode == 0 19 | report_path = join('.tuttle', 'report.html') 20 | assert isfile(report_path) 21 | report = open(report_path).read() 22 | title_match = search(r'

.*Success.*

', report, DOTALL) 23 | assert title_match, report 24 | title_2_match = search(r'', report, DOTALL) 25 | assert title_2_match, report 26 | 27 | @isolate(['A']) 28 | def test_failure(self): 29 | """ If process in the workflow fails, the report should display failure in the main title""" 30 | project = """file://B <- file://A 31 | echo A produces B 32 | echo B > B 33 | 34 | file://C <- file://B 35 | echo B produces C 36 | echo B produces C > C 37 | failure on purpose 38 | """ 39 | rcode, output = run_tuttle_file(project) 40 | assert rcode == 2 41 | report_path = join('.tuttle', 'report.html') 42 | assert isfile(report_path) 43 | report = open(report_path).read() 44 | title_match = search(r'

.*Failure.*

', report, DOTALL) 45 | assert title_match, report 46 | title_2_match = search(r'', report, DOTALL) 47 | assert title_2_match, report 48 | 49 | @isolate(['A']) 50 | def test_a_failure_in_a_process_without_output_should_be_marked_in_the_repoort(self): 51 | """ If process without outputs fails, the report should display failure in the main title""" 52 | project = """<- file://A 53 | failure on purpose 54 | """ 55 | rcode, output = run_tuttle_file(project) 56 | assert rcode == 2 57 | report_path = join('.tuttle', 'report.html') 58 | assert isfile(report_path) 59 | report = open(report_path).read() 60 | title_match = search(r'

.*Failure.*

', report, DOTALL) 61 | assert title_match, report 62 | title_2_match = search(r'', report, DOTALL) 63 | assert title_2_match, report 64 | 65 | @isolate(['A']) 66 | def test_all_relative_links_must_exists(self): 67 | """ If process without outputs fails, the report should display failure in the main title""" 68 | project = """file://B <- file://A 69 | echo A produces B > B 70 | """ 71 | rcode, output = run_tuttle_file(project) 72 | assert rcode == 0 73 | report_path = join('.tuttle', 'report.html') 74 | assert isfile(report_path) 75 | report = open(report_path).read() 76 | links = findall(r'', report) 77 | for link in links: 78 | rel_path = link[1].split('/') 79 | path = join('.tuttle', *rel_path) 80 | assert isfile(path), path 81 | 82 | @isolate(['A']) 83 | def test_all_imports_must_exists(self): 84 | """ If process without outputs fails, the report should display failure in the main title""" 85 | project = """file://B <- file://A 86 | echo A produces B > B 87 | """ 88 | rcode, output = run_tuttle_file(project) 89 | assert rcode == 0 90 | report_path = join('.tuttle', 'report.html') 91 | assert isfile(report_path) 92 | report = open(report_path).read() 93 | links = findall(r'src=\"([^"]*)>', report) 94 | for link in links: 95 | rel_path = link[1].split('/') 96 | path = join('.tuttle', *rel_path) 97 | assert isfile(path), path 98 | -------------------------------------------------------------------------------- /tests/test_workflow.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from tests.functional_tests import isolate, run_tuttle_file 4 | from tests.test_project_parser import ProjectParser 5 | from os import path 6 | 7 | from tuttle.tuttle_directories import TuttleDirectories 8 | from tuttle.workflow_runner import WorkflowRunner 9 | 10 | 11 | class TestWorkflow(): 12 | 13 | def test_one_param_from_dir(self): 14 | """Should find the right path to a file in the project directory""" 15 | assert TuttleDirectories.tuttle_dir("test") == path.join(".tuttle", "test") 16 | 17 | def test_two_params_from_dir(self): 18 | """Should find the right path to a file in the project directory""" 19 | assert TuttleDirectories.tuttle_dir("test1", "test2") == path.join(".tuttle", "test1", "test2") 20 | 21 | def get_workflow(self, project_source): 22 | pp = ProjectParser() 23 | pp.set_project(project_source) 24 | return pp.parse_project() 25 | 26 | def test_compute_dependencies(self): 27 | """ Every resource should know the processes dependant from it """ 28 | workflow = self.get_workflow( 29 | """file://file2 <- file://file1 30 | Original code 31 | 32 | file://file3 <- file://file1 33 | 34 | """) 35 | workflow.compute_dependencies() 36 | assert workflow._resources['file://file1'].dependant_processes == [workflow._processes[0], 37 | workflow._processes[1]] 38 | 39 | @isolate 40 | def test_run_process(self): 41 | """ 42 | Should run a process and create the expected files according to the process and to tuttle tool 43 | """ 44 | workflow = self.get_workflow( 45 | """file://result <- file://source 46 | echo result > result 47 | """) 48 | process = workflow._processes[0] 49 | TuttleDirectories.create_tuttle_dirs() 50 | TuttleDirectories.prepare_and_assign_paths(process) 51 | process._processor.run(process, process._reserved_path, process.log_stdout, process.log_stderr) 52 | assert path.isfile("result") 53 | 54 | @isolate(['A']) 55 | def test_dump_and_report_workflow(self): 56 | """ 57 | When a workflow is run, the report should be written and the state should be dumped, even if there is a failure 58 | """ 59 | project = """file://result <- file://A 60 | echo result > result 61 | error 62 | """ 63 | rcode, output = run_tuttle_file(project) 64 | assert rcode == 2 65 | assert path.isfile(path.join(".tuttle", "last_workflow.pickle")) 66 | 67 | @isolate(['A']) 68 | def test_check_process_output(self): 69 | """ 70 | Should raise an exception if the output resource was not really created 71 | """ 72 | workflow = self.get_workflow( 73 | """file://result <- file://A 74 | echo test 75 | """) 76 | workflow.static_check_processes() 77 | workflow.discover_resources() 78 | wr = WorkflowRunner(3) 79 | TuttleDirectories.create_tuttle_dirs() 80 | TuttleDirectories.straighten_out_process_and_logs(workflow) 81 | successes, failures = wr.run_parallel_workflow(workflow) 82 | assert failures 83 | failure = failures[0] 84 | assert failure.error_message.find("these resources should have been created") >= 0, failure.error_message 85 | 86 | @isolate(['A']) 87 | def test_missing_outputs(self): 88 | """Test the list of missing outputs""" 89 | pp = ProjectParser() 90 | project = """file://B file://C file://D <- file://file1 file://A 91 | echo C > C 92 | """ 93 | pp.set_project(project) 94 | workflow = pp.parse_project() 95 | 96 | process = workflow._processes[0] 97 | TuttleDirectories.create_tuttle_dirs() 98 | TuttleDirectories.prepare_and_assign_paths(process) 99 | process._processor.run(process, process._reserved_path, process.log_stdout, process.log_stderr) 100 | missing = process.missing_outputs() 101 | 102 | assert len(missing) == 2 103 | assert missing[0].url == "file://B" 104 | assert missing[1].url == "file://D" 105 | 106 | def test_check_circular_references(self): 107 | """ 108 | Should return true for there are some circular references 109 | """ 110 | workflow = self.get_workflow( 111 | """file://A <- file://B 112 | 113 | file://B <- file://A 114 | file://D <- file://C 115 | """) 116 | cr = workflow.circular_references() 117 | assert len(cr) == 2, cr 118 | 119 | def test_check_no_circular_references(self): 120 | """ 121 | Should return true for there are some circular references 122 | """ 123 | workflow = self.get_workflow( 124 | """file://A <- file://B 125 | 126 | file://B <- file://C 127 | """) 128 | assert not workflow.circular_references() 129 | 130 | @isolate(['A']) 131 | def test_runnable_processes(self): 132 | """ 133 | Should run a process and update the state of the workflow 134 | """ 135 | workflow = self.get_workflow( 136 | """file://C <- file://B 137 | echo C > C 138 | echo B creates C 139 | 140 | file://B <- file://A 141 | echo B > B 142 | echo A creates B 143 | """) 144 | workflow.discover_resources() 145 | processes = workflow.runnable_processes() 146 | assert processes, processes 147 | p = processes.pop() 148 | assert p.id.find("_5") >= 0, p.id 149 | -------------------------------------------------------------------------------- /tests/test_workflow_builder.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from nose.tools import * 4 | from tuttle.workflow_builder import * 5 | 6 | 7 | class TestProjectBuilder(): 8 | 9 | def test_extract_scheme(self): 10 | """A file ressource should return a file protocol""" 11 | wb = WorkflowBuilder() 12 | url = "file://test.csv" 13 | assert wb.extract_scheme(url) == "file" 14 | 15 | def test_cant_extract_scheme(self): 16 | """Should return False if no protocol is present""" 17 | wb = WorkflowBuilder() 18 | url = "LoremIpsum" 19 | assert wb.extract_scheme(url) is False 20 | 21 | def test_build_file_ressource(self): 22 | """Build a file resource according to a file: uri""" 23 | wb = WorkflowBuilder() 24 | url = "file://test.csv" 25 | resource = wb.build_resource(url) 26 | assert isinstance(resource, FileResource) 27 | 28 | def test_build_ressource_with_unknown_scheme(self): 29 | """Building a resource with an unknown protocol should return None""" 30 | wb = WorkflowBuilder() 31 | url = "unknown://test.csv" 32 | resource = wb.build_resource(url) 33 | assert resource is None 34 | 35 | def test_cannot_build_ressource_with_only_scheme(self): 36 | """Building a resource with only the protocol should return None""" 37 | wb = WorkflowBuilder() 38 | url = "file://" 39 | resource = wb.build_resource(url) 40 | assert resource is None 41 | 42 | def test_build_ressource_with_mallformed_uri(self): 43 | """Building a resource with a malformed uri should return None""" 44 | wb = WorkflowBuilder() 45 | url = "file:test.csv" 46 | resource = wb.build_resource(url) 47 | assert resource is None 48 | 49 | def test_build_process(self): 50 | """Test building a process with shell processor""" 51 | wb = WorkflowBuilder() 52 | processor_name = "shell" 53 | process = wb.build_process(processor_name, "tuttlefile", 51) 54 | assert process._processor.name == "shell" 55 | 56 | def test_build_process_with_default_processor(self): 57 | """Building a process with default processor should return a shell processor""" 58 | wb = WorkflowBuilder() 59 | process = wb.build_process("shell", "uttlefile", 69) 60 | # TODO : get back to shell processors by default 61 | assert process._processor.name == "shell" 62 | assert process._line_num == 69 63 | 64 | def test_build_process_with_unknown_processor(self): 65 | """Building a process with an unknown processor should return False""" 66 | wb = WorkflowBuilder() 67 | processor_name = "unknown_processor" 68 | process = wb.build_process(processor_name, "tuttlefile", 69) 69 | assert process is False 70 | -------------------------------------------------------------------------------- /tests/utf8_file.txt: -------------------------------------------------------------------------------- 1 | une ligne accentuée 2 | second line -------------------------------------------------------------------------------- /tuttle/VERSION: -------------------------------------------------------------------------------- 1 | 0.6.dev2 -------------------------------------------------------------------------------- /tuttle/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf8 -*- 2 | 3 | from tuttle.extend_workflow import extend_workflow 4 | 5 | extend_workflow = extend_workflow 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /tuttle/addons/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lexman/tuttle/dab07db4a1e3e18c876deb2897c07be3935acd60/tuttle/addons/__init__.py -------------------------------------------------------------------------------- /tuttle/addons/csv_addon.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf8 -*- 2 | import csv 3 | import sqlite3 4 | import chardet 5 | import codecs 6 | 7 | from tuttle.error import TuttleError 8 | from tuttle.addons.sqlite import SQLiteResource 9 | from tuttle.resource import ResourceMixIn, FileResource 10 | 11 | 12 | def strip_backstophes(st): 13 | return st.replace('`', '') 14 | 15 | 16 | def escape_column_name(st): 17 | return '`{}`'.format(strip_backstophes(st)) 18 | 19 | 20 | def column_list(column_names): 21 | escaped_columns = map(escape_column_name, column_names) 22 | return ','.join(escaped_columns) 23 | 24 | 25 | def create_table(db, table_name, column_names): 26 | columns = column_list(column_names) 27 | sql = "CREATE TABLE `{}` ({})".format(table_name, columns) 28 | print(sql) 29 | db.execute(sql) 30 | 31 | class UTF8Recoder: 32 | """ 33 | Iterator that reads an encoded stream and reencodes the input to UTF-8 34 | """ 35 | def __init__(self, f, encoding): 36 | self.reader = codecs.getreader(encoding)(f) 37 | 38 | def __iter__(self): 39 | return self 40 | 41 | def next(self): 42 | return self.reader.next().encode("utf-8") 43 | 44 | class UnicodeReader: 45 | """ 46 | A CSV reader which will iterate over lines in the CSV file "f", 47 | which is encoded in the given encoding. 48 | """ 49 | 50 | def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds): 51 | f = UTF8Recoder(f, encoding) 52 | self.reader = csv.reader(f, dialect=dialect, **kwds) 53 | 54 | def next(self): 55 | row = self.reader.next() 56 | return [unicode(s, "utf-8") for s in row] 57 | 58 | def __iter__(self): 59 | return self 60 | 61 | def open_csv(csv_file): 62 | sample = csv_file.read(1024) 63 | csv_file.seek(0) 64 | dialect = csv.Sniffer().sniff(sample) 65 | detect = chardet.detect(sample) 66 | return UnicodeReader(csv_file, dialect=dialect, encoding=detect['encoding']) 67 | 68 | 69 | def check_csv_row(csv_reader, nb_cols): 70 | line_num = 2 # first line after the header 71 | for row in csv_reader: 72 | if len(row) != nb_cols: 73 | msg = "Wrong number of columns on line {}".format(line_num) 74 | raise TuttleError(msg) 75 | yield row 76 | line_num += 1 77 | 78 | 79 | def fill_table(db, table_name, column_names, csv_reader): 80 | place_holders = ",".join(["?" for _ in column_names]) 81 | columns = column_list(column_names) 82 | sql = "INSERT INTO `{}` ({}) VALUES ({})".format(table_name, columns, place_holders) 83 | db.executemany(sql, check_csv_row(csv_reader, len(column_names))) 84 | db.commit() 85 | 86 | 87 | def csv2sqlite(db, table_name, csv_file): 88 | csv_reader = open_csv(csv_file) 89 | column_names = csv_reader.next() 90 | create_table(db, table_name, column_names) 91 | fill_table(db, table_name, column_names, csv_reader) 92 | 93 | 94 | class CSV2SQLiteProcessor: 95 | """ A processor for Windows command line 96 | """ 97 | name = 'csv2sqlite' 98 | 99 | def static_check(self, process): 100 | inputs = [res for res in process.iter_inputs()] 101 | outputs = [res for res in process.iter_outputs()] 102 | if len(inputs) != 1 \ 103 | or len(outputs) != 1 \ 104 | or inputs[0].scheme != 'file' \ 105 | or outputs[0].scheme != 'sqlite': 106 | raise TuttleError("CSV2SQLite processor {} don't know how to handle his inputs / outputs".format(process.id)) 107 | 108 | def run(self, process, reserved_path, log_stdout, log_stderr): 109 | # TODO : log queries 110 | # static_check ensured we know what are inputs and outputs 111 | input_res = process.iter_inputs().next() 112 | assert isinstance(input_res, FileResource) 113 | csv_filename = input_res._get_path() 114 | 115 | output_res = process.iter_outputs().next() 116 | assert isinstance(output_res, SQLiteResource) 117 | sqlite_filename = output_res.db_file 118 | table = output_res.objectname 119 | 120 | with open(log_stdout, "w") as lout, \ 121 | open(log_stderr, "w") as lerr, \ 122 | open(csv_filename, 'rb') as csv_file: 123 | db = sqlite3.connect(sqlite_filename) 124 | try: 125 | csv2sqlite(db, table, csv_file) 126 | except TuttleError as e: 127 | # Any well defined error it re-emitted as-is 128 | raise 129 | except Exception as e: 130 | lerr.write("Unexpected error while importing {} in SQLite database :".format(input_res._get_path())) 131 | lerr.write(e.message) 132 | lerr.write("\n") 133 | import traceback 134 | traceback.print_exc(lerr) 135 | msg = "SQLite error on process {} while importing '{}' : '{}. Is this file a valid CSV file ? " \ 136 | "More detail about the error in the error logs'".format(process.id, input_res.url, e.message) 137 | raise TuttleError(msg) 138 | finally: 139 | db.close() 140 | -------------------------------------------------------------------------------- /tuttle/addons/ftp.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf8 -*- 2 | 3 | from hashlib import sha1 4 | from re import compile 5 | from urllib2 import urlopen, Request, URLError 6 | from tuttle.error import TuttleError 7 | from tuttle.resource import ResourceMixIn, MalformedUrl 8 | from ftplib import FTP 9 | 10 | 11 | class FTPResource(ResourceMixIn, object): 12 | """An FTP resource""" 13 | scheme = 'ftp' 14 | 15 | __ereg = compile("^ftp://([^/^:]*)(:[0-9]*)?/(.*)$") 16 | 17 | def __init__(self, url): 18 | super(FTPResource, self).__init__(url) 19 | m = self.__ereg.match(url) 20 | if m is None: 21 | raise MalformedUrl("Malformed FTP url : '{}'".format(url)) 22 | self._host = m.group(1) 23 | captured_port = m.group(2) 24 | if captured_port: 25 | self._port = captured_port[1:] 26 | else: 27 | self._port = 21 28 | self._partial = m.group(3) 29 | self._authenticated_url = self.url 30 | 31 | def set_authentication(self, user, password): 32 | super(FTPResource, self).set_authentication(user, password) 33 | self._authenticated_url = 'ftp://{}:{}@{}'.format(self._user, self._password, self.url[6:]) 34 | 35 | def exists(self): 36 | try: 37 | req = Request(self._authenticated_url) 38 | response = urlopen(req) 39 | some_data = response.read(0) 40 | except URLError as e: 41 | if e.reason.find("550") > -1: 42 | return False 43 | msg = "An error occured while accessing {} : \n{}".format(self.url, str(e)) 44 | raise TuttleError(msg) 45 | return True 46 | 47 | def remove(self): 48 | ftp = FTP() 49 | ftp.connect(self._host, self._port) 50 | if self._user or self._password: 51 | ftp.login(self._user, self._password) 52 | ftp.delete(self._partial) 53 | ftp.close() 54 | 55 | def signature(self): 56 | # There are so many implementations of ftp it's hard to find a common way to even 57 | # retrieve the size of the file. That's why we fallback to a short hash 58 | try: 59 | req = Request(self._authenticated_url) 60 | response = urlopen(req) 61 | # a hash from the beginning of the resource 62 | chunk_32k = response.read(32768) 63 | checksum = sha1() 64 | checksum.update(chunk_32k) 65 | return "sha1-32K: {}".format(checksum.hexdigest()) 66 | except URLError as e: 67 | return TuttleError("Can't compute signature for {}. Error was : {}".format(self.url, str(e))) 68 | -------------------------------------------------------------------------------- /tuttle/addons/hdfs.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf8 -*- 2 | from hashlib import sha1 3 | 4 | import sys 5 | from re import compile 6 | 7 | from tuttle.error import TuttleError 8 | from tuttle.resource import ResourceMixIn, MalformedUrl 9 | from snakebite.client import Client 10 | 11 | 12 | class HDFSResource(ResourceMixIn, object): 13 | """An HTTP resource""" 14 | scheme = 'hdfs' 15 | 16 | __ereg = compile("^hdfs://([^/^:]*)(:[0-9]*)?(/.*)$") 17 | 18 | def __init__(self, url): 19 | super(HDFSResource, self).__init__(url) 20 | m = self.__ereg.match(url) 21 | if m is None: 22 | raise MalformedUrl("Malformed HDFS url : '{}'".format(url)) 23 | self._host = m.group(1) 24 | captured_port = m.group(2) 25 | if captured_port: 26 | self._port = int(captured_port[1:]) 27 | else: 28 | self._port = 8020 29 | self._partial = m.group(3) 30 | 31 | def set_authentication(self, user, password): 32 | super(HDFSResource, self).set_authentication(user, password) 33 | 34 | def exists(self): 35 | client = Client(self._host, self._port, effective_user=self._user, use_trash=False) 36 | return client.test(self._partial, exists=True) 37 | 38 | def remove(self): 39 | client = Client(self._host, self._port, effective_user=self._user, use_trash=False) 40 | it = client.delete([self._partial], recurse=True) 41 | for elmt in it: 42 | pass 43 | 44 | def signature(self): 45 | client = Client(self._host, self._port, effective_user=self._user, use_trash=False) 46 | stats = client.stat([self._partial]) 47 | if stats['file_type'] == 'f': 48 | return "modification_time:{}".format(stats['modification_time']) 49 | else: 50 | return stats['file_type'] 51 | -------------------------------------------------------------------------------- /tuttle/addons/netutils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf8 -*- 2 | from socket import gethostbyname, error 3 | 4 | 5 | def hostname_resolves(hostname): 6 | try: 7 | gethostbyname(hostname) 8 | return True 9 | except error: 10 | return False 11 | -------------------------------------------------------------------------------- /tuttle/addons/python.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf8 -*- 2 | 3 | from os import path, mkdir 4 | from tuttle.processors import run_and_log 5 | 6 | 7 | class PythonProcessor: 8 | """ A processor to run python2 code 9 | """ 10 | name = 'python' 11 | header = u"""# -*- coding: utf8 -*- 12 | from os import getcwd as __get_current_dir__ 13 | from sys import path as __python__path__ 14 | __python__path__.append(__get_current_dir__()) 15 | """ 16 | 17 | def generate_executable(self, process, reserved_path): 18 | """ Create an executable file 19 | :param process: Process 20 | :param reserved_path: string 21 | :return: the path to the file 22 | """ 23 | mkdir(reserved_path) 24 | script_name = path.abspath(path.join(reserved_path, "{}.py".format(process.id))) 25 | with open(script_name, "w+") as f: 26 | f.write(self.header.encode("utf8")) 27 | f.write(process.code.encode('utf8')) 28 | return script_name 29 | 30 | def run(self, process, reserved_path, log_stdout, log_stderr): 31 | script = self.generate_executable(process, reserved_path) 32 | run_and_log(["python", script], log_stdout, log_stderr) 33 | 34 | def static_check(self, process): 35 | pass 36 | 37 | -------------------------------------------------------------------------------- /tuttle/addons/s3.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf8 -*- 2 | 3 | from re import compile 4 | 5 | from tuttle.addons.netutils import hostname_resolves 6 | from tuttle.error import TuttleError 7 | from tuttle.resource import ResourceMixIn, MalformedUrl 8 | from tuttle.version import version 9 | from boto3.session import Session 10 | from botocore.exceptions import ClientError, BotoCoreError 11 | 12 | 13 | USER_AGENT = "tuttle/{}".format(version) 14 | 15 | 16 | class S3Resource(ResourceMixIn, object): 17 | """An S3 resource""" 18 | scheme = 's3' 19 | 20 | ereg = compile("^s3://([^/]+)/([^/]+)/(.+)$") 21 | 22 | def __init__(self, url): 23 | super(S3Resource, self).__init__(url) 24 | m = self.ereg.match(url) 25 | if m is None: 26 | raise MalformedUrl("Malformed S3 url : '{}'".format(url)) 27 | self._host = m.group(1).split(':')[0] 28 | self._endpoint = "http://{}".format(m.group(1)) 29 | self._bucket = m.group(2) 30 | self._key = m.group(3) 31 | 32 | def _object(self): 33 | session = Session() 34 | s3 = session.resource('s3', endpoint_url=self._endpoint) 35 | obj = s3.Object(self._bucket, self._key) 36 | return obj 37 | 38 | def exists(self): 39 | if not hostname_resolves(self._host): 40 | raise TuttleError("Unknown host : \"{}\"... " 41 | "Can't check existence of resource {}.".format(self._host, self.url)) 42 | 43 | object = self._object() 44 | try: 45 | res = object.get() 46 | return True 47 | except (ClientError, BotoCoreError) as e: 48 | return False 49 | 50 | def remove(self): 51 | object = self._object() 52 | object.delete() 53 | 54 | def signature(self): 55 | object = self._object() 56 | try: 57 | res = object.get() 58 | return res[u'ETag'] 59 | except (ClientError, BotoCoreError) as e: 60 | return False 61 | -------------------------------------------------------------------------------- /tuttle/addons/utf8.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lexman/tuttle/dab07db4a1e3e18c876deb2897c07be3935acd60/tuttle/addons/utf8.csv -------------------------------------------------------------------------------- /tuttle/cli_tuttle.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf8 -*- 3 | 4 | import sys 5 | from os.path import abspath, exists 6 | from argparse import ArgumentParser, ArgumentTypeError 7 | from tuttle.commands import run, invalidate 8 | from tuttle.figures_formating import parse_duration 9 | from tuttle.utils import CurrentDir 10 | from tuttle.version import version 11 | 12 | 13 | def check_minus_1_or_positive(value): 14 | ivalue = int(value) 15 | if ivalue == 0 or ivalue < -1: 16 | raise ArgumentTypeError("%s is an invalid positive int value or -1" % value) 17 | return ivalue 18 | 19 | 20 | def check_duration(value): 21 | if len(value) == 0: 22 | raise ArgumentTypeError("Duration can't be empty") 23 | try: 24 | sec = int(value) 25 | if sec < -1: 26 | raise ArgumentTypeError("A duration can't be negative (found {})".format(sec)) 27 | return sec 28 | except ValueError: 29 | pass 30 | try: 31 | return parse_duration(value) 32 | except ValueError as e: 33 | raise ArgumentTypeError(e.message) 34 | 35 | 36 | def tuttle_main(): 37 | try: 38 | parser = ArgumentParser( 39 | description="Runs a workflow - version {}".format(version) 40 | ) 41 | parent_parser = ArgumentParser( 42 | add_help=False 43 | ) 44 | parent_parser.add_argument('-f', '--file', 45 | default='tuttlefile', 46 | dest='tuttlefile', 47 | help='Path to the tuttlefile : project file describing the workflow') 48 | parent_parser.add_argument('-w', '--workspace', 49 | default='.', 50 | dest='workspace', 51 | help='Directory where the workspace lies. Default is the current directory') 52 | parent_parser.add_argument('-t', '--threshold', 53 | default=-1, 54 | type=check_duration, 55 | dest='threshold', 56 | help='Threshold for invalidation : \n' 57 | '-1 (default) - no verification\n' 58 | '0 - prevents any invalidation \n' 59 | 'N - prevents invalidation if lost processing time >= N (in seconds)\n' 60 | 'DURATION - prevents invalidation if processing time >= DURATION. DURATION can either be in second or in duration format, eg 4d8h32min5s : 4 days, 8 hours, 32 minutes 5 seconds' 61 | 62 | ) 63 | subparsers = parser.add_subparsers(help='commands help', dest='command') 64 | parser_run = subparsers.add_parser('run', parents=[parent_parser], 65 | help='Run the missing part of workflow') 66 | parser_run.add_argument('-j', '--jobs', 67 | help='Number of workers (to run processes in parallel)\n' 68 | '-1 = half of the number of cpus', 69 | default=1, 70 | type=check_minus_1_or_positive) 71 | parser_run.add_argument('-k', '--keep-going', 72 | help="Don't stop when a process fail : run all the processes you can", 73 | default=False, 74 | dest='keep_going', 75 | action="store_true") 76 | parser_run.add_argument('-i', '--check-integrity', 77 | help="Check integrity of all resources in case some have changed outside of tuttle.\n" 78 | "Requires to compute signature of every resource produced by tuttle, which can " 79 | "can be time consuming eg for big files or database tables.", 80 | default=False, 81 | dest='check_integrity', 82 | action="store_true") 83 | parser_invalidate = subparsers.add_parser('invalidate', parents=[parent_parser], 84 | help='Remove some resources already computed and all their dependencies') 85 | parser_invalidate.add_argument('resources', help='url of the resources to invalidate', nargs="*") 86 | params = parser.parse_args(sys.argv[1:]) 87 | 88 | tuttlefile_path = abspath(params.tuttlefile) 89 | if not exists(tuttlefile_path): 90 | print "No tuttlefile" 91 | sys.exit(2) 92 | with CurrentDir(params.workspace): 93 | if params.command == 'run': 94 | return run(tuttlefile_path, params.threshold, params.jobs, params.keep_going, params.check_integrity) 95 | elif params.command == 'invalidate': 96 | return invalidate(tuttlefile_path, params.resources, params.threshold) 97 | except KeyboardInterrupt: 98 | print("Interrupted by user") 99 | sys.exit(2) 100 | 101 | 102 | def format_variable(name, value): 103 | if isinstance(value, list): 104 | res = "{}[]={}".format(name, " ".join(value)) 105 | else: 106 | res = "{}={}".format(name, value) 107 | return res 108 | -------------------------------------------------------------------------------- /tuttle/cli_tuttle_extend_workflow.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf8 -*- 3 | 4 | from argparse import ArgumentParser, ArgumentTypeError 5 | from tuttle.version import version 6 | from tuttle.extend_workflow import extend_workflow, ExtendError, extract_variables 7 | 8 | 9 | def check_minus_1_or_positive(value): 10 | ivalue = int(value) 11 | if ivalue == 0 or ivalue < -1: 12 | raise ArgumentTypeError("%s is an invalid positive int value or -1" % value) 13 | return ivalue 14 | 15 | 16 | def format_variable(name, value): 17 | if isinstance(value, list): 18 | res = "{}[]={}".format(name, " ".join(value)) 19 | else: 20 | res = "{}={}".format(name, value) 21 | return res 22 | 23 | 24 | def tuttle_extend_workflow_main(): 25 | parser = ArgumentParser( 26 | description="Extends a workflow by adding a templated tuttle project. Must be run from a preprocessor in a " 27 | "tuttle project - version {}".format(version) 28 | ) 29 | parser.add_argument("-v", "--verbose", action="store_true", 30 | help="Display template and variables") 31 | parser.add_argument("template", help="template file") 32 | parser.add_argument('variables', help='variables to insert into the template int the form my_var="my value"', 33 | nargs="*") 34 | parser.add_argument('-n', '--name', 35 | default='extension', 36 | dest='name', 37 | help='Name of the extended workflow') 38 | params = parser.parse_args() 39 | 40 | try: 41 | vars_dic = extract_variables(params.variables) 42 | extend_workflow(params.template, name=params.name, **vars_dic) 43 | if params.verbose: 44 | print("Injecting into template {} :".format(params.template)) 45 | for key, value in vars_dic.iteritems(): 46 | print(" * {}".format(format_variable(key, value))) 47 | except ExtendError as e: 48 | print e.message 49 | exit(1) 50 | -------------------------------------------------------------------------------- /tuttle/error.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf8 -*- 2 | 3 | 4 | class TuttleError(Exception): 5 | """ 6 | A generic class parent of all errors in tuttle, in order to catch them all but only tem 7 | """ 8 | pass 9 | -------------------------------------------------------------------------------- /tuttle/extend_workflow.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf8 -*- 3 | 4 | from jinja2.environment import Environment 5 | from jinja2.exceptions import UndefinedError 6 | from jinja2.runtime import StrictUndefined 7 | from os.path import abspath, exists, join 8 | from os import environ 9 | from tuttle.utils import CurrentDir 10 | 11 | 12 | class ExtendError(Exception): 13 | pass 14 | 15 | 16 | def get_a_name(prefix): 17 | # TODO : not scalable if called a lot of times 18 | name = abspath(join('extensions', prefix)) 19 | i = 1 20 | while exists(name): 21 | i += 1 22 | name = abspath(join('extensions', "{}{}".format(prefix, i))) 23 | return name 24 | 25 | 26 | def extract_variables(variables): 27 | res = {} 28 | it = iter(variables) 29 | try: 30 | var = next(it) 31 | while True: 32 | name, value = var.split("=", 2) 33 | if name.endswith("[]"): 34 | name = name[:-2] 35 | array = [value] 36 | res[name] = array 37 | var = next(it) 38 | while var.find('=') == -1: 39 | array.append(var) 40 | var = next(it) 41 | else: 42 | res[name] = value 43 | var = next(it) 44 | except ValueError: 45 | msg = 'Can\'t extract variable from parameter "{}"'.format(var) 46 | raise ExtendError(msg) 47 | except StopIteration: 48 | pass 49 | return res 50 | 51 | 52 | def load_template(template): 53 | try: 54 | jinja_env = Environment(undefined=StrictUndefined) 55 | with open(template, 'rb') as ftpl: 56 | t = jinja_env.from_string(ftpl.read().decode('utf8')) 57 | except IOError: 58 | msg = 'Can\'t find template file "{}"'.format(template) 59 | raise ExtendError(msg) 60 | return t 61 | 62 | 63 | def get_tuttle_env(env_vars): 64 | try: 65 | env = env_vars['TUTTLE_ENV'] 66 | except KeyError: 67 | msg = 'Can\'t find workspace... Maybe your are not extending a workflow from a preprocessor in a ' \ 68 | 'tuttle project' 69 | raise ExtendError(msg) 70 | return env 71 | 72 | 73 | def render_extension(name, t, tuttle_env, vars_dic): 74 | with CurrentDir(tuttle_env): 75 | with open(get_a_name(name), 'w') as ext_file: 76 | try: 77 | content = t.render(**vars_dic) 78 | except UndefinedError as e: 79 | msg = 'Missing value for a template variable.\n{}'.format(e.message) 80 | raise ExtendError(msg) 81 | ext_file.write(content.encode('utf8)')) 82 | 83 | 84 | def extend_workflow(template, name='extension', env_vars=environ, **variables): 85 | t = load_template(template) 86 | tuttle_env = get_tuttle_env(env_vars) 87 | render_extension(name, t, tuttle_env, variables) 88 | -------------------------------------------------------------------------------- /tuttle/figures_formating.py: -------------------------------------------------------------------------------- 1 | from datetime import timedelta 2 | from os import path, error 3 | from re import compile 4 | 5 | 6 | KB = 1024 7 | MB = 1024 * 1024 8 | GB = 1024 * 1024 * 1024 9 | 10 | 11 | def round_after_dot(num, precision): 12 | meaningful = 10 * num / precision 13 | return meaningful * 0.1 14 | 15 | 16 | def nice_size(size): 17 | if size < KB: 18 | return "{} B".format(size) 19 | elif size < MB: 20 | return "{} KB".format(round_after_dot(size, KB)) 21 | elif size < GB: 22 | return "{} MB".format(round_after_dot(size, MB)) 23 | else: 24 | return "{} GB".format(round_after_dot(size, GB)) 25 | 26 | 27 | def nice_file_size(filename, running): 28 | if running: 29 | return "running" 30 | if not filename: 31 | return "" 32 | try: 33 | file_size = path.getsize(filename) 34 | if file_size == 0: 35 | return "empty" 36 | return nice_size(file_size) 37 | except error: 38 | return "" 39 | 40 | 41 | ONE_MINUTE = timedelta(minutes=1) 42 | ONE_HOUR = timedelta(hours=1) 43 | ONE_DAY = timedelta(days=1) 44 | 45 | 46 | def nice_duration(duration): 47 | duration_in_s = int(duration) 48 | delta = timedelta(seconds=duration_in_s) 49 | if delta < ONE_MINUTE: 50 | return "{}s".format(delta.seconds) 51 | elif delta < ONE_HOUR: 52 | minutes = duration_in_s / 60 53 | seconds = duration_in_s % 60 54 | return "{}min {}s".format(minutes, seconds) 55 | elif delta < ONE_DAY: 56 | hours = duration_in_s / 3600 57 | minutes = (duration_in_s - hours * 3600) / 60 58 | return "{}h {}min".format(hours, minutes) 59 | else: 60 | hours = (duration_in_s - delta.days * 3600 * 24) / 3600 61 | return "{}d {}h".format(delta.days, hours) 62 | 63 | 64 | DURATION_REGEX = compile("^((?P\d+)\s*d)?\s*((?P\d+)\s*h)?\s*((?P\d+)\s*min)?\s*((?P\d+)\s*s)?$") 65 | 66 | 67 | def group_value(match_result, group_name): 68 | if match_result.group(group_name): 69 | return int(match_result.group(group_name)) 70 | return 0 71 | 72 | 73 | def parse_duration(expression): 74 | # Not a simple int, we have to parse 75 | m = DURATION_REGEX.match(expression) 76 | if m: 77 | sec = group_value(m, 'sec') 78 | min = group_value(m, 'min') 79 | hours = group_value(m, 'hours') 80 | days = group_value(m, 'days') 81 | return ((((days * 24) + hours ) * 60) + min) * 60 + sec 82 | raise ValueError('"{}" is not a valid duration'.format(expression)) 83 | -------------------------------------------------------------------------------- /tuttle/log_follower.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf8 -*- 2 | 3 | """ 4 | Utility methods for to send processes logs to a python logger. 5 | """ 6 | 7 | 8 | import logging 9 | import sys 10 | from os.path import isfile 11 | from time import sleep 12 | from threading import Thread 13 | from traceback import format_exception 14 | 15 | 16 | class LogTracer: 17 | 18 | READ_SIZE = 1024 19 | 20 | def __init__(self, logger, namespace, filename): 21 | self._filename = filename 22 | self._logger = logger 23 | self._namespace = namespace 24 | self._filedescr = None 25 | 26 | @staticmethod 27 | def remove_ending_cr(line): 28 | if line[-1:] == "\n": 29 | return line[:-1] 30 | else: 31 | return line 32 | 33 | def trace(self): 34 | if not self._filedescr: 35 | if isfile(self._filename): 36 | self._filedescr = open(self._filename, 'r') 37 | traced = False 38 | if self._filedescr: 39 | lines = self._filedescr.readlines(self.READ_SIZE) 40 | for line in lines: 41 | traced = True 42 | if self._namespace: 43 | msg = "[{}] {}".format(self._namespace, LogTracer.remove_ending_cr(line)) 44 | else: 45 | msg = LogTracer.remove_ending_cr(line) 46 | self._logger.info(msg) 47 | return traced 48 | 49 | def close(self): 50 | if self._filedescr: 51 | self._filedescr.close() 52 | 53 | 54 | class EnsureLogsFollowerStops(object): 55 | """ 56 | Ensures a LogFollower is stopped when no longer required 57 | """ 58 | def __init__(self, lf): 59 | self._lf = lf 60 | 61 | def __enter__(self): 62 | pass 63 | 64 | def __exit__(self, *args): 65 | self._lf.terminate() 66 | 67 | 68 | class LogsFollower: 69 | 70 | def __init__(self): 71 | self._logs = [] 72 | self._terminate = False 73 | self._logger = LogsFollower.get_logger() 74 | 75 | def follow_process(self, filestdout, filestderr, process_name): 76 | """ Adds 2 files to follow : the stderr and stdin of a process """ 77 | if process_name: 78 | nsout = "{}::stdout".format(process_name) 79 | nserr = "{}::stderr".format(process_name) 80 | else: 81 | nsout = None 82 | nserr = None 83 | tracer_stdout = LogTracer(self._logger, nsout, filestdout) 84 | tracer_stderr = LogTracer(self._logger, nserr, filestderr) 85 | self._logs.append(tracer_stdout) 86 | self._logs.append(tracer_stderr) 87 | 88 | def trace_logs(self): 89 | traced = False 90 | for log in self._logs: 91 | if log.trace(): 92 | traced = True 93 | return traced 94 | 95 | def trace_logs_forever(self): 96 | while True: 97 | if not self.trace_logs(): 98 | sleep(0.1) 99 | 100 | def trace_in_background(self): 101 | def trace_logs_until_stop(): 102 | while True: 103 | traced = self.trace_logs() 104 | if self._terminate and not traced: 105 | break 106 | if not traced: 107 | sleep(0.1) 108 | self._thread = Thread(target=trace_logs_until_stop, name="worker") 109 | self._thread.start() 110 | return EnsureLogsFollowerStops(self) 111 | 112 | def terminate(self): 113 | # sleep(0.1) # wait for flush 114 | self._terminate = True 115 | self._thread.join() 116 | for log in self._logs: 117 | log.close() 118 | 119 | @staticmethod 120 | def get_logger(): 121 | logger = logging.getLogger(__name__) 122 | formater = logging.Formatter("%(message)s") 123 | handler = logging.StreamHandler(sys.stdout) 124 | handler.setFormatter(formater) 125 | handler.setLevel(logging.INFO) 126 | logger.setLevel(logging.INFO) 127 | logger.addHandler(handler) 128 | return logger 129 | -------------------------------------------------------------------------------- /tuttle/process.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf8 -*- 2 | 3 | from time import time 4 | 5 | 6 | class Process: 7 | """ Class wrapping a process. A process has some input resources, some output resources, 8 | some code that produces outputs from inputs, a processor that handle the language specificities 9 | """ 10 | 11 | def __init__(self, processor, filename, line_num): 12 | self._start = None 13 | self._end = None 14 | self._processor = processor 15 | self._filename = filename 16 | self._line_num = line_num 17 | self._inputs = [] 18 | self._outputs = [] 19 | self._code = "" 20 | self.log_stdout = None 21 | self.log_stderr = None 22 | self._reserved_path = None 23 | self._success = None 24 | self._error_message = None 25 | self._id = "{}_{}".format(self._filename, self._line_num) 26 | 27 | @property 28 | def start(self): 29 | return self._start 30 | 31 | @property 32 | def end(self): 33 | return self._end 34 | 35 | @property 36 | def id(self): 37 | return self._id 38 | 39 | @property 40 | def code(self): 41 | return self._code 42 | 43 | # TODO Use a setter ? 44 | def set_code(self, code): 45 | self._code = code 46 | 47 | @property 48 | def success(self): 49 | return self._success 50 | 51 | @property 52 | def error_message(self): 53 | return self._error_message 54 | 55 | @property 56 | def processor(self): 57 | return self._processor 58 | 59 | def add_input(self, input_res): 60 | self._inputs.append(input_res) 61 | 62 | def add_output(self, output): 63 | self._outputs.append(output) 64 | 65 | def iter_inputs(self): 66 | for res in self._inputs: 67 | yield res 68 | 69 | def iter_outputs(self): 70 | for res in self._outputs: 71 | yield res 72 | 73 | def has_outputs(self): 74 | return len(self._outputs) > 0 75 | 76 | def has_input(self, resource): 77 | return resource in self._inputs 78 | 79 | def input_urls(self): 80 | return {resource.url for resource in self._inputs} 81 | 82 | def output_urls(self): 83 | return {resource.url for resource in self._outputs} 84 | 85 | def sorted_inputs_string(self): 86 | sorted_inputs_urls = sorted([resource.url for resource in self.iter_inputs()]) 87 | return ",".join(sorted_inputs_urls) 88 | 89 | def depends_on_process(self, process): 90 | """ Returns True if self deprends on a resource created by process""" 91 | for output_resource in process.iter_outputs(): 92 | if self.has_input(output_resource): 93 | return True 94 | return False 95 | 96 | def pick_an_output(self): 97 | if not self.has_outputs(): 98 | return None 99 | return self._outputs[0] 100 | 101 | def retrieve_execution_info(self, process): 102 | """ Copy the execution info (all the properties set by function run()) from another process 103 | :param process: 104 | :return: 105 | """ 106 | self._start = process.start 107 | self._end = process.end 108 | self._success = process.success 109 | self.log_stdout = process.log_stdout 110 | self.log_stderr = process.log_stderr 111 | self._reserved_path = process._reserved_path 112 | 113 | def reset_execution_info(self): 114 | """ Reset the execution info (all the properties set by function run()) because the resources produced 115 | by this process have been invalidated 116 | :return: 117 | """ 118 | self._start = None 119 | self._end = None 120 | self.log_stdout = None 121 | self.log_stderr = None 122 | self._success = None 123 | 124 | def static_check(self): 125 | """ 126 | Runs a verification that the process won't obviously fail. This is used for static analysis before any process 127 | is run 128 | """ 129 | self._processor.static_check(self) 130 | 131 | def assign_paths(self, reserved_path, log_stdout, log_stderr): 132 | assert reserved_path is not None 133 | self._reserved_path = reserved_path 134 | self.log_stdout = log_stdout 135 | self.log_stderr = log_stderr 136 | 137 | def set_start(self): 138 | self._start = time() 139 | 140 | def set_end(self, success, error_msg): 141 | self._end = time() 142 | self._success = success 143 | self._error_message = error_msg 144 | 145 | def missing_outputs(self): 146 | """ 147 | :return: True if all input resources for this process exist, False otherwise 148 | """ 149 | result = [] 150 | for resource in self.iter_outputs(): 151 | if not resource.exists(): 152 | result.append(resource) 153 | return result -------------------------------------------------------------------------------- /tuttle/processors/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf8 -*- 2 | 3 | from os import path, chmod, stat, mkdir 4 | from stat import S_IXUSR, S_IXGRP, S_IXOTH 5 | from subprocess import Popen, PIPE 6 | from tuttle.error import TuttleError 7 | 8 | 9 | class ProcessExecutionError(TuttleError): 10 | pass 11 | 12 | 13 | def run_and_log(args, log_stdout, log_stderr): 14 | fout = open(log_stdout, 'w') 15 | ferr = open(log_stderr, 'w') 16 | osprocess = Popen(args, stdout=fout.fileno(), stderr=ferr.fileno(), stdin=PIPE) 17 | osprocess.stdin.close() 18 | fout.close() 19 | ferr.close() 20 | rcode = osprocess.wait() 21 | if rcode != 0: 22 | msg = "Process ended with error code {}".format(rcode) 23 | raise ProcessExecutionError(msg) 24 | 25 | 26 | class ShellProcessor: 27 | """ A processor to run *nix shell code 28 | """ 29 | name = 'shell' 30 | header = u"#!/usr/bin/env sh\nset -e\n" 31 | 32 | def generate_executable(self, process, script_path): 33 | """ Create an executable file 34 | :param directory: string 35 | :return: the path to the file 36 | """ 37 | with open(script_path, "wb+") as f: 38 | f.write(self.header) 39 | f.write(process._code.encode('utf8')) 40 | mode = stat(script_path).st_mode 41 | chmod(script_path, mode | S_IXUSR | S_IXGRP | S_IXOTH) 42 | 43 | def run(self, process, reserved_path, log_stdout, log_stderr): 44 | self.generate_executable(process, reserved_path) 45 | run_and_log([reserved_path], log_stdout, log_stderr) 46 | 47 | def static_check(self, process): 48 | pass 49 | 50 | 51 | class BatProcessor: 52 | """ A processor for Windows command line 53 | """ 54 | name = 'bat' 55 | header = u"@echo off\n" 56 | exit_if_fail = u'if %ERRORLEVEL% neq 0 exit /b 1\n' 57 | 58 | def generate_executable(self, process, reserved_path): 59 | """ Create an executable file 60 | :param directory: string 61 | :return: the path to the file 62 | """ 63 | mkdir(reserved_path) 64 | script_name = path.abspath(path.join(reserved_path, "{}.bat".format(process.id))) 65 | with open(script_name, "wb+") as f: 66 | f.write(self.header) 67 | lines = process._code.split("\n") 68 | for line in lines: 69 | f.write(line.encode('utf8')) 70 | f.write(u"\n") 71 | f.write(self.exit_if_fail) 72 | return script_name 73 | 74 | def run(self, process, reserved_path, log_stdout, log_stderr): 75 | prog = self.generate_executable(process, reserved_path) 76 | run_and_log([prog], log_stdout, log_stderr) 77 | 78 | def static_check(self, process): 79 | pass 80 | 81 | -------------------------------------------------------------------------------- /tuttle/report/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf8 -*- 2 | 3 | -------------------------------------------------------------------------------- /tuttle/report/dot_repport.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf8 -*- 2 | 3 | from os import path 4 | 5 | 6 | DOT_HEADER = """digraph workflow { 7 | rankdir="LR"; 8 | Node [style="rounded,filled", shape=box, fillcolor=none] 9 | """ 10 | 11 | 12 | def color_from_process(process): 13 | color = "none" 14 | if process.start: 15 | if not process.end: 16 | # Running 17 | color = "skyblue" 18 | elif process.success: 19 | # success 20 | color = "green" 21 | else: 22 | color = "red" 23 | return color 24 | 25 | 26 | # TODO nick names for resources should be uniq 27 | def nick_from_url(url): 28 | parts = url.split("/") 29 | return parts.pop() 30 | 31 | 32 | def dot_id(url): 33 | import urllib 34 | return urllib.quote(url) 35 | 36 | 37 | def process_node_id(id): 38 | p_node = '"p_{}"'.format(id) 39 | return p_node 40 | 41 | 42 | def dot(workflow): 43 | # TODO : 44 | # * Add a legend 45 | # * Show missing resources in a different color 46 | result = DOT_HEADER 47 | for process in workflow.iter_processes(): 48 | color = color_from_process(process) 49 | p_node = process_node_id(process.id) 50 | if color != "none": 51 | fontcolor = color 52 | else: 53 | fontcolor = "black" 54 | result += ' {} [label="{}", URL="#{}", color={}, fontcolor={}, width=0, height=0] ' \ 55 | ';\n'.format(p_node, process.id, process.id, color, fontcolor) 56 | for res_input in process.iter_inputs(): 57 | nick = nick_from_url(res_input.url) 58 | resource_id = dot_id(res_input.url) 59 | result += ' "{}" -> {} [arrowhead="none"] \n'.format(resource_id, p_node) 60 | if res_input.is_primary(): 61 | result += ' "{}" [fillcolor=beige, label="{}"] ;\n'.format(resource_id, nick) 62 | for res_output in process.iter_outputs(): 63 | nick = nick_from_url(res_output.url) 64 | resource_id = dot_id(res_output.url) 65 | result += ' {} -> "{}" \n'.format(p_node, resource_id) 66 | result += ' "{}" [fillcolor={}, label="{}"] ;\n'.format(resource_id, color, nick) 67 | result += '}' 68 | return result 69 | 70 | 71 | def create_dot_report(workflow, filename): 72 | """ Write an html file describing the workflow 73 | :param workflow: 74 | :param filename: path to the html fil to be generated 75 | :return: None 76 | """ 77 | with open(filename, 'wb') as fout: 78 | dot_src = dot(workflow) 79 | fout.write(dot_src.encode('utf8)')) 80 | -------------------------------------------------------------------------------- /tuttle/report/html_repport.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf8 -*- 2 | import sys 3 | from os import path 4 | from os.path import dirname, join, relpath, abspath, split 5 | from shutil import copytree 6 | from time import strftime, localtime 7 | 8 | from jinja2 import Template 9 | 10 | from tuttle.figures_formating import nice_file_size, nice_duration 11 | from tuttle.report.dot_repport import dot 12 | 13 | 14 | def data_path(*path_parts): 15 | if getattr(sys, 'frozen', False): 16 | # The application is frozen 17 | datadir = join(dirname(sys.executable), "report") 18 | else: 19 | # The application is not frozen 20 | # Change this bit to match where you store your data files: 21 | datadir = dirname(__file__) 22 | return join(datadir, *path_parts) 23 | 24 | 25 | def format_resource(resource, workflow): 26 | sig = workflow.signature(resource.url) 27 | creator_process_id = None 28 | if resource.creator_process: 29 | creator_process_id = resource.creator_process.id 30 | return { 31 | 'url': resource.url, 32 | 'signature': sig, 33 | 'creator_process_id' : creator_process_id, 34 | } 35 | 36 | 37 | def workflow_status(workflow): 38 | for process in workflow.iter_preprocesses(): 39 | if process.start and not process.end: 40 | return "NOT_FINISHED" 41 | if process.success is False: 42 | return "PREPROCESS_FAILURE" 43 | for process in workflow.iter_processes(): 44 | if process.start and not process.end: 45 | return "NOT_FINISHED" 46 | if process.success is False: 47 | return "FAILURE" 48 | if not process.start: 49 | return "NOT_FINISHED" 50 | return "SUCCESS" 51 | 52 | 53 | def path2url(path, ref_path): 54 | if path is None: 55 | return 56 | abs_path = abspath(path) 57 | rel_path = relpath(abs_path, ref_path) 58 | parts = split(rel_path) 59 | return '/'.join(parts) 60 | 61 | 62 | def format_process(process, workflow, report_dir): 63 | duration = "" 64 | start = "" 65 | end = "" 66 | if process.start: 67 | start = strftime("%a, %d %b %Y %H:%M:%S", localtime(process.start)) 68 | if process.end: 69 | end = strftime("%a, %d %b %Y %H:%M:%S", localtime(process.end)) 70 | duration = nice_duration(process.end - process.start) 71 | running = start and not end 72 | return { 73 | 'id': process.id, 74 | 'processor': process.processor.name, 75 | 'start': start, 76 | 'end': end, 77 | 'duration': duration, 78 | 'log_stdout': path2url(process.log_stdout, report_dir), 79 | 'log_stdout_size': nice_file_size(process.log_stdout, running), 80 | 'log_stderr': path2url(process.log_stderr, report_dir), 81 | 'log_stderr_size': nice_file_size(process.log_stderr, running), 82 | 'outputs': (format_resource(resource, workflow) for resource in process.iter_outputs()), 83 | 'inputs': (format_resource(resource, workflow) for resource in process.iter_inputs()), 84 | 'code': process.code, 85 | 'success': process.success, 86 | 'error_message': process.error_message, 87 | } 88 | 89 | 90 | def ensure_assets(dest_dir): 91 | assets_dir = path.join(dest_dir, 'html_report_assets') 92 | if not path.isdir(assets_dir): 93 | copytree(data_path('html_report_assets', ''), assets_dir) 94 | 95 | 96 | def create_html_report(workflow, filename): 97 | """ Write an html file describing the workflow 98 | :param workflow: 99 | :param filename: path to the html fil to be generated 100 | :return: None 101 | """ 102 | file_dir = path.dirname(filename) 103 | ensure_assets(file_dir) 104 | tpl_filename = data_path("report_template.html") 105 | with open(tpl_filename, 'rb') as ftpl: 106 | t = Template(ftpl.read().decode('utf8')) 107 | processes = [format_process(p, workflow, abspath(file_dir)) for p in workflow.iter_processes()] 108 | preprocesses = [format_process(p, workflow, abspath(file_dir)) for p in workflow.iter_preprocesses()] 109 | with open(filename, 'wb') as fout: 110 | content = t.render( 111 | processes=processes, 112 | preprocesses=preprocesses, 113 | dot_src=dot(workflow), 114 | status=workflow_status(workflow), 115 | tuttle_version=workflow.tuttle_version 116 | ) 117 | fout.write(content.encode('utf8)')) 118 | -------------------------------------------------------------------------------- /tuttle/resource.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf8 -*- 2 | from hashlib import sha1 3 | from os import remove 4 | from os.path import abspath, exists, isfile 5 | from shutil import rmtree 6 | from tuttle.error import TuttleError 7 | 8 | 9 | class MalformedUrl(TuttleError): 10 | pass 11 | 12 | 13 | class ResourceMixIn: 14 | """ Common behaviour for all resources """ 15 | 16 | def __init__(self, url): 17 | self.url = url 18 | self.creator_process = None 19 | self._user = None 20 | self._password = None 21 | 22 | @staticmethod 23 | def check_consistency(workflow): 24 | pass 25 | 26 | def set_authentication(self, user, password): 27 | self._user = user 28 | self._password = password 29 | 30 | def set_creator_process(self, process): 31 | self.creator_process = process 32 | 33 | def is_primary(self): 34 | """ Returns True if the resources is a primary resource, ie if it not computed by tuttle but is needed 35 | to compute other resources. 36 | This information is meaningful only in a workflow context : it is valid only after 37 | a call to workflow.compute_dependencies() 38 | :return: True if resource is a primary resource 39 | """ 40 | return self.creator_process is None 41 | 42 | def created_by_same_inputs(self, other_resource): 43 | """ 44 | Call to depends_on_same_inputs is valid only if both resources are not primary (ie creator_process exists !) 45 | """ 46 | self_inputs = self.creator_process.input_urls() 47 | other_inputs = other_resource.creator_process.input_urls() 48 | return self_inputs == other_inputs 49 | 50 | 51 | def hash_file(file_like_object): 52 | """Generate a hash for the contents of a file.""" 53 | checksum = sha1() 54 | for chunk in iter(lambda: file_like_object.read(32768), b''): 55 | checksum.update(chunk) 56 | return checksum.hexdigest() 57 | 58 | 59 | class FileResource(ResourceMixIn, object): 60 | """A resource for a local file""" 61 | scheme = 'file' 62 | 63 | def __init__(self, url): 64 | super(FileResource, self).__init__(url) 65 | 66 | def _get_path(self): 67 | return abspath(self.url[len("file://"):]) 68 | 69 | def exists(self): 70 | return exists(self._get_path()) 71 | 72 | def signature(self): 73 | res_sha1 = None 74 | try: 75 | with open(self._get_path()) as f: 76 | res_sha1 = hash_file(f) 77 | except IOError: 78 | pass 79 | return "sha1:{}".format(res_sha1) 80 | 81 | def remove(self): 82 | path = self._get_path() 83 | if isfile(path): 84 | remove(path) 85 | else: 86 | # directory 87 | rmtree(path) 88 | # TODO what about links ? 89 | -------------------------------------------------------------------------------- /tuttle/tuttle_directories.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf8 -*- 2 | from glob import glob 3 | from itertools import chain 4 | from os.path import join, isfile, isdir, basename, exists 5 | from os import remove, makedirs 6 | from shutil import rmtree, move 7 | 8 | 9 | def tuttle_dir(*args): 10 | return join('.tuttle', *args) 11 | 12 | 13 | class TuttleDirectories: 14 | 15 | _processes_dir = tuttle_dir('processes') 16 | _logs_dir = tuttle_dir('processes', 'logs') 17 | _extensions_dir = tuttle_dir('extensions') 18 | 19 | @staticmethod 20 | def tuttle_dir(*args): 21 | return tuttle_dir(*args) 22 | #return join('.tuttle', *args) 23 | 24 | @staticmethod 25 | def list_extensions(): 26 | path = join(TuttleDirectories._extensions_dir, '*') 27 | return glob(path) 28 | 29 | @staticmethod 30 | def prepare_and_assign_paths(process): 31 | log_stdout = join(TuttleDirectories._logs_dir, "{}_stdout.txt".format(process.id)) 32 | log_stderr = join(TuttleDirectories._logs_dir, "{}_err.txt".format(process.id)) 33 | # It would be a good idea to clean up all directories before 34 | # running the whole workflow 35 | # For the moment we clean here : before folowing the logs 36 | if isfile(log_stdout): 37 | remove(log_stdout) 38 | if isfile(log_stderr): 39 | remove(log_stderr) 40 | reserved_path = join(TuttleDirectories._processes_dir, process.id) 41 | if isdir(reserved_path): 42 | rmtree(reserved_path) 43 | elif isfile(reserved_path): 44 | remove(reserved_path) 45 | process.assign_paths(reserved_path, log_stdout, log_stderr) 46 | 47 | @staticmethod 48 | def create_tuttle_dirs(): 49 | if not isdir(TuttleDirectories._processes_dir): 50 | makedirs(TuttleDirectories._processes_dir) 51 | if not isdir(TuttleDirectories._logs_dir): 52 | makedirs(TuttleDirectories._logs_dir) 53 | 54 | @staticmethod 55 | def empty_extension_dir(): 56 | if not isdir(TuttleDirectories._extensions_dir): 57 | makedirs(TuttleDirectories._extensions_dir) 58 | else: 59 | rmtree(TuttleDirectories._extensions_dir) 60 | makedirs(TuttleDirectories._extensions_dir) 61 | 62 | @staticmethod 63 | def move_paths_from(process, from_path): 64 | reserved_path = join(from_path, basename(process._reserved_path)) 65 | log_stdout = join(from_path, 'logs', basename(process.log_stdout)) 66 | log_stderr = join(from_path, 'logs', basename(process.log_stderr)) 67 | TuttleDirectories.prepare_and_assign_paths(process) 68 | # Some process don't create all the necessary files 69 | if exists(reserved_path): 70 | move(reserved_path, process._reserved_path) 71 | #if exists(log_stdout): 72 | move(log_stdout, process.log_stdout) 73 | #if exists(log_stdout): 74 | move(log_stderr, process.log_stderr) 75 | 76 | @staticmethod 77 | def straighten_out_process_and_logs(workflow): 78 | tmp_processes = TuttleDirectories.tuttle_dir('tmp_processes') 79 | rmtree(tmp_processes, True) 80 | move(TuttleDirectories._processes_dir, tmp_processes) 81 | TuttleDirectories.create_tuttle_dirs() 82 | for process in chain(workflow.iter_processes(), workflow.iter_preprocesses()): 83 | if process.start is not None: 84 | TuttleDirectories.move_paths_from(process, tmp_processes) 85 | else: 86 | TuttleDirectories.prepare_and_assign_paths(process) 87 | rmtree(tmp_processes) 88 | -------------------------------------------------------------------------------- /tuttle/utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf8 -*- 2 | import os 3 | 4 | 5 | class CurrentDir(object): 6 | """ 7 | Step into a directory temporarily. 8 | """ 9 | def __init__(self, path): 10 | self.old_dir = os.getcwd() 11 | self.new_dir = path 12 | 13 | def __enter__(self): 14 | os.chdir(self.new_dir) 15 | 16 | def __exit__(self, *args): 17 | os.chdir(self.old_dir) 18 | 19 | 20 | class EnvVar(object): 21 | """ 22 | Adds an environment variable temporarily. 23 | """ 24 | def __init__(self, var, value): 25 | self.var = var 26 | self.value = value 27 | 28 | def __enter__(self): 29 | if os.environ.has_key(self.var): 30 | self.former_value = os.environ[self.var] 31 | else: 32 | self.former_value = None 33 | os.environ[self.var] = self.value 34 | 35 | def __exit__(self, *args): 36 | if self.former_value is not None: 37 | os.environ[self.var] = self.former_value 38 | else: 39 | del os.environ[self.var] 40 | -------------------------------------------------------------------------------- /tuttle/version.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf8 -*- 2 | from os.path import dirname, join, getsize 3 | import sys 4 | 5 | 6 | def module_path(): 7 | if getattr(sys, 'frozen', False): 8 | # The application is frozen 9 | return join(dirname(sys.executable), "tuttle") 10 | else: 11 | # The application is not frozen 12 | # Change this bit to match where you store your data files: 13 | return dirname(__file__) 14 | 15 | 16 | def get_version(): 17 | version_path = join(module_path(), "VERSION") 18 | version_details = open(version_path).read(getsize(version_path)) 19 | return version_details.split("\n")[0] 20 | 21 | 22 | version = get_version() 23 | 24 | -------------------------------------------------------------------------------- /tuttle/workflow_builder.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf8 -*- 2 | import re 3 | from os.path import expanduser, exists, join 4 | 5 | from tuttle.addons.ftp import FTPResource 6 | from tuttle.addons.odbc import ODBCResource, ODBCProcessor 7 | from tuttle.error import TuttleError 8 | from tuttle.resource import FileResource 9 | from tuttle.processors import * 10 | from tuttle.process import Process 11 | from tuttle.addons.csv_addon import CSV2SQLiteProcessor 12 | from tuttle.addons.net import DownloadProcessor, HTTPResource 13 | from tuttle.addons.postgres import PostgreSQLResource, PostgresqlProcessor 14 | from tuttle.addons.python import PythonProcessor 15 | from tuttle.addons.s3 import S3Resource 16 | from tuttle.addons.sqlite import SQLiteProcessor, SQLiteResource 17 | import os 18 | 19 | 20 | 21 | class MalformedTuttlepassError(TuttleError): 22 | pass 23 | 24 | 25 | def tuttlepass_file(): 26 | if 'TUTTLEPASSFILE' in os.environ: 27 | return os.environ['TUTTLEPASSFILE'] 28 | else: 29 | return expanduser(join('~', '.tuttlepass')) 30 | 31 | 32 | class ResourceAuthenticator: 33 | 34 | def __init__(self, lines_reader): 35 | self._rules = [rule for rule in ResourceAuthenticator.read_rules(lines_reader)] 36 | 37 | def fill_rules(self, line_reader): 38 | self._rules = [rule for rule in ResourceAuthenticator.read_rules(line_reader)] 39 | 40 | def get_auth(self, url): 41 | for regex, user, password in self._rules: 42 | if regex.search(url): 43 | return user, password 44 | return None, None 45 | 46 | @staticmethod 47 | def empty_line(line): 48 | for ch in line: 49 | if ch != " " and ch != "\t" and ord(ch) != 10: 50 | return False 51 | return True 52 | 53 | @staticmethod 54 | def read_rules(file_in): 55 | line_no = 1 56 | try: 57 | for line in file_in: 58 | pos_sharp = line.find('#') 59 | if pos_sharp > -1: 60 | line = line[:pos_sharp].strip() 61 | else: 62 | line = line.strip() 63 | if not ResourceAuthenticator.empty_line(line): 64 | url_regex, user, password = line.split("\t") 65 | yield (re.compile(url_regex), user, password) 66 | line_no += 1 67 | except re.error: 68 | msg = "Parse error on regular expression in tuttlepass file at line {}".format(line_no) 69 | raise MalformedTuttlepassError(msg) 70 | except ValueError: 71 | msg = "Parse error on tuttlepass file at line {} : wrong number of fields (3 expected). Or maybe t" \ 72 | "hey are not separated by tabs".format(line_no) 73 | raise MalformedTuttlepassError(msg) 74 | except: 75 | msg = "Parse error on tuttlepass file at line {}".format(line_no) 76 | raise MalformedTuttlepassError(msg) 77 | 78 | 79 | class WorkflowBuilder(): 80 | """A helper class to build Process classes from the name of processors and resources""" 81 | 82 | def __init__(self): 83 | self._resources_definition = {} 84 | self._processors = {} 85 | self._resource_authenticator = None 86 | self.init_resource_authenticator() 87 | self.init_resources_and_processors() 88 | 89 | def init_resource_authenticator(self): 90 | pass_file = tuttlepass_file() 91 | if exists(pass_file): 92 | with open(pass_file) as f: 93 | self._resource_authenticator = ResourceAuthenticator(f) 94 | else: 95 | self._resource_authenticator = ResourceAuthenticator([]) 96 | 97 | def init_resources_and_processors(self): 98 | self._resources_definition['file'] = FileResource 99 | self._resources_definition['http'] = HTTPResource 100 | self._resources_definition['https'] = HTTPResource 101 | self._resources_definition['ftp'] = FTPResource 102 | self._resources_definition['sqlite'] = SQLiteResource 103 | self._resources_definition['pg'] = PostgreSQLResource 104 | self._resources_definition['s3'] = S3Resource 105 | self._resources_definition['odbc'] = ODBCResource 106 | self._processors['shell'] = ShellProcessor() 107 | self._processors['bat'] = BatProcessor() 108 | self._processors['python'] = PythonProcessor() 109 | self._processors['download'] = DownloadProcessor() 110 | self._processors['sqlite'] = SQLiteProcessor() 111 | self._processors['postgresql'] = PostgresqlProcessor() 112 | self._processors['csv2sqlite'] = CSV2SQLiteProcessor() 113 | self._processors['odbc'] = ODBCProcessor() 114 | if os.name == "nt": 115 | self._processors['default'] = self._processors['bat'] 116 | else: 117 | self._processors['default'] = self._processors['shell'] 118 | 119 | def extract_scheme(self, url): 120 | """Extract the scheme from an url 121 | url is supposed to be stripped from spaces 122 | """ 123 | separator_pos = url.find('://') 124 | if separator_pos == -1: 125 | return False 126 | url_scheme = url[:separator_pos] 127 | return url_scheme 128 | 129 | def url_is_empty(self, url): 130 | """ 131 | :param url: 132 | :return: True if the url consist only in the scheme without further information 133 | """ 134 | separator_pos = url.find('://') 135 | return separator_pos == (len(url) - len('://')) 136 | 137 | def build_resource(self, url): 138 | scheme = self.extract_scheme(url) 139 | if scheme is False or scheme not in self._resources_definition: 140 | return None 141 | if self.url_is_empty(url): 142 | return None 143 | ResDefClass = self._resources_definition[scheme] 144 | resource = ResDefClass(url) 145 | user, password = self._resource_authenticator.get_auth(url) 146 | resource.set_authentication(user, password) 147 | return resource 148 | 149 | def build_process(self, processor, file_name, line_num): 150 | if processor in self._processors: 151 | return Process(self._processors[processor], file_name, line_num) 152 | else: 153 | return False 154 | 155 | def get_or_build_resource(self, url, resources): 156 | url = url.encode("ascii") 157 | if url not in resources: 158 | resource = self.build_resource(url) 159 | resources[url] = resource 160 | else: 161 | resource = resources[url] 162 | return resource 163 | --------------------------------------------------------------------------------