├── .coveragerc
├── .gitignore
├── .travis.yml
├── MANIFEST.in
├── README.markdown
├── README.zh_CN.markdown
├── TODO.markdown
├── bin
    └── luiti
├── changelog.markdown
├── example_webui_run.py
├── install-dependencies.sh
├── luiti
    ├── __init__.py
    ├── daemon
    │   ├── __init__.py
    │   ├── graph.py
    │   ├── ptm.py
    │   ├── query_engine
    │   │   ├── __init__.py
    │   │   ├── builder.py
    │   │   ├── create_task.py
    │   │   └── params.py
    │   ├── utils
    │   │   ├── __init__.py
    │   │   ├── cache.py
    │   │   ├── string.py
    │   │   ├── task_storage.py
    │   │   └── template.py
    │   └── web
    │   │   ├── __init__.py
    │   │   ├── assets.py
    │   │   ├── code_render.py
    │   │   ├── handlers.py
    │   │   └── server.py
    ├── java
    │   └── MultipleTextFiles.java
    ├── luigi_decorators
    │   ├── __init__.py
    │   ├── as_a_luiti_task.py
    │   ├── check_date_range.py
    │   ├── check_runtime_range.py
    │   ├── mr_local.py
    │   ├── multiple_text_files.py
    │   ├── persist_files.py
    │   ├── plug_packages.py
    │   └── ref_tasks.py
    ├── luigi_extensions
    │   ├── __init__.py
    │   ├── create_python_package.py
    │   ├── hadoop_ext.py
    │   ├── luigi_root_context.py
    │   ├── manage_decorators.py
    │   ├── parameter.py
    │   ├── root_task.py
    │   ├── task_base.py
    │   └── task_init.py
    ├── manager
    │   ├── __init__.py
    │   ├── active_packages.py
    │   ├── cli.py
    │   ├── config.py
    │   ├── dep.py
    │   ├── files.py
    │   ├── generate_from_templates.py
    │   ├── lazy_data.py
    │   ├── loader.py
    │   ├── package_map.py
    │   ├── sys_argv.py
    │   └── table.py
    ├── schedule
    │   ├── __init__.py
    │   └── sensor_schedule.py
    ├── task_templates
    │   ├── __init__.py
    │   ├── other
    │   │   ├── __init__.py
    │   │   ├── hive_task.py
    │   │   ├── mongo_import_task.py
    │   │   └── static_file.py
    │   └── time
    │   │   ├── __init__.py
    │   │   ├── task_biweekly.py
    │   │   ├── task_biweekly_hadoop.py
    │   │   ├── task_day.py
    │   │   ├── task_day_hadoop.py
    │   │   ├── task_hour.py
    │   │   ├── task_hour_hadoop.py
    │   │   ├── task_month.py
    │   │   ├── task_month_hadoop.py
    │   │   ├── task_quarter.py
    │   │   ├── task_quarter_hadoop.py
    │   │   ├── task_range.py
    │   │   ├── task_range_hadoop.py
    │   │   ├── task_week.py
    │   │   ├── task_week_hadoop.py
    │   │   ├── task_year.py
    │   │   └── task_year_hadoop.py
    ├── tests
    │   ├── __init__.py
    │   ├── mr_test_case.py
    │   └── setup_luiti_packages.py
    ├── utils
    │   ├── __init__.py
    │   ├── command_utils.py
    │   ├── compress_utils.py
    │   ├── date_utils.py
    │   ├── ext_utils.py
    │   ├── hdfs_utils.py
    │   ├── io_utils.py
    │   ├── math_utils.py
    │   ├── mr_utils.py
    │   ├── target_utils.py
    │   └── visualiser_env_template.py
    └── webui
    │   ├── INSTALL.markdown
    │   ├── assets
    │       ├── javascripts
    │       │   └── luiti.js
    │       ├── jsx
    │       │   └── luiti.jsx
    │       └── stylesheets
    │       │   └── luiti.css
    │   ├── bower.json
    │   └── index.html
├── requirements.txt
├── screenshots
    ├── README.markdown
    ├── luiti_code_show.png
    ├── luiti_webui_list.png
    └── luiti_webui_show.png
├── setup.py
├── tests
    ├── client.cfg
    ├── jsons_data
    │   └── mr_local.json
    ├── project_A
    │   ├── __init__.py
    │   └── luiti_tasks
    │   │   ├── __init__.py
    │   │   ├── __init_luiti.py
    │   │   ├── a_day.py
    │   │   ├── b_day.py
    │   │   ├── c_day.py
    │   │   ├── d_day.py
    │   │   ├── foobar_day.py
    │   │   ├── import_packages_day.py
    │   │   └── multiple_dependent_day.py
    ├── project_B
    │   ├── __init__.py
    │   └── luiti_tasks
    │   │   ├── __init__.py
    │   │   ├── __init_luiti.py
    │   │   └── h_day.py
    ├── test_daemon.py
    ├── test_luigi_decorators.py
    ├── test_main.py
    ├── test_manager.py
    ├── test_mr_test_case.py
    ├── test_schedule.py
    ├── test_task.py
    ├── test_task_templates.py
    ├── test_utils.py
    ├── webui_packages
    │   ├── README.markdown
    │   ├── luiti_clean
    │   │   ├── README.markdown
    │   │   ├── luiti_clean
    │   │   │   ├── __init__.py
    │   │   │   └── luiti_tasks
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── __init_luiti.py
    │   │   │   │   └── clean_web_log_day.py
    │   │   ├── setup.py
    │   │   └── tests
    │   │   │   └── test_main.py
    │   ├── luiti_dump
    │   │   ├── README.markdown
    │   │   ├── luiti_dump
    │   │   │   ├── __init__.py
    │   │   │   └── luiti_tasks
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── __init_luiti.py
    │   │   │   │   ├── dump_browser_map_day.py
    │   │   │   │   └── dump_web_log_day.py
    │   │   ├── setup.py
    │   │   └── tests
    │   │   │   └── test_main.py
    │   ├── luiti_middle
    │   │   ├── README.markdown
    │   │   ├── luiti_middle
    │   │   │   ├── __init__.py
    │   │   │   └── luiti_tasks
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── __init_luiti.py
    │   │   │   │   ├── counter_visitor_by_browser_day.py
    │   │   │   │   ├── counter_visitor_by_region_day.py
    │   │   │   │   └── counter_visitor_day.py
    │   │   ├── setup.py
    │   │   └── tests
    │   │   │   └── test_main.py
    │   ├── luiti_summary
    │   │   ├── README.markdown
    │   │   ├── luiti_summary
    │   │   │   ├── __init__.py
    │   │   │   └── luiti_tasks
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── __init_luiti.py
    │   │   │   │   └── beta_report_day.py
    │   │   ├── setup.py
    │   │   └── tests
    │   │   │   └── test_main.py
    │   └── luiti_webui_tests
    │   │   └── luiti_webui_tests
    │   │       └── __init__.py
    └── zip_package_by_luiti
    │   ├── setup.py
    │   └── zip_package_by_luiti
    │       ├── __init__.py
    │       └── subfold
    │           └── __init__.py
└── tox.ini


/.coveragerc:
--------------------------------------------------------------------------------
 1 | [report]
 2 | omit =
 3 |     */python?.?/*
 4 |     */site-packages/nose/*
 5 |     *__init__*
 6 |     */__init__.py
 7 |     */*/__init__.py
 8 |     luiti/utils/__init__.py
 9 |     tests/*
10 |     */setup.py
11 | 
12 | [run]
13 | parallel = True
14 | source   = luiti
15 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | 
 5 | # C extensions
 6 | *.so
 7 | 
 8 | # Distribution / packaging
 9 | .Python
10 | env/
11 | bin/
12 | build/
13 | develop-eggs/
14 | dist/
15 | eggs/
16 | lib/
17 | lib64/
18 | parts/
19 | sdist/
20 | var/
21 | *.egg-info/
22 | .installed.cfg
23 | *.egg
24 | 
25 | # Installer logs
26 | pip-log.txt
27 | pip-delete-this-directory.txt
28 | 
29 | # Unit test / coverage reports
30 | htmlcov/
31 | .tox/
32 | .coverage
33 | .cache
34 | nosetests.xml
35 | coverage.xml
36 | 
37 | # Translations
38 | *.mo
39 | 
40 | # Mr Developer
41 | .mr.developer.cfg
42 | .project
43 | .pydevproject
44 | 
45 | # Rope
46 | .ropeproject
47 | 
48 | # Django stuff:
49 | *.log
50 | *.pot
51 | 
52 | # Sphinx documentation
53 | docs/_build/
54 | 
55 | *.yml
56 | *.jar
57 | *.coverage.*
58 | 
59 | bower_components
60 | node_modules
61 | .idea/
62 | .DS_Store
63 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | 
 3 | env:
 4 |   global:
 5 |     - PIP_DOWNLOAD_CACHE=$HOME/.pip-cache
 6 |   matrix:
 7 |     - TOXENV=pep8
 8 |     - TOXENV=docs
 9 |     - TOXENV=py27-nonhdfs
10 |     - TOXENV=py33-nonhdfs
11 |     - TOXENV=py34-nonhdfs
12 |     - TOXENV=py27-cdh
13 |     - TOXENV=py33-cdh
14 |     - TOXENV=py34-cdh
15 | 
16 | sudo: false
17 | 
18 | cache:
19 |   - $HOME/.pip-cache
20 | 
21 | install:
22 |   - pip install coveralls
23 |   - pip install tox
24 | 
25 | before_script:
26 |   # allow ssh loopback
27 |   - ssh-keygen -t rsa -N '' -C '' -f ~/.ssh/id_rsa
28 |   - cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
29 |   - ssh -o StrictHostKeyChecking=no localhost true
30 | 
31 |   - ./install-dependencies.sh
32 | 
33 | script:
34 |   - nosetests
35 |   - coverage run --source=luiti setup.py test
36 | 
37 | after_failure:
38 |   - cat /home/travis/build/luiti/luiti/.tox/cdh/log/cdh-1.log
39 | 
40 | after_success:
41 |   - coveralls
42 | 
43 | branches:
44 |   only:
45 |     - master
46 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | include changelog.markdown
 2 | include README.markdown
 3 | 
 4 | include luiti/java/*.java
 5 | 
 6 | include luiti/webui/assets/*/**
 7 | 
 8 | include luiti/webui/*.html
 9 | recursive-include luiti/webui/bower_components *
10 | 


--------------------------------------------------------------------------------
/TODO.markdown:
--------------------------------------------------------------------------------
 1 | 1. Seperate MapReduce's requires, one is used to input, another is used
 2 |   to dict.
 3 | 2. Clean /tmp/sjfljslfjs after package task related files into a tar.
 4 | 3. Support without current package.
 5 | 
 6 | 
 7 | ## WebUI
 8 | 1. OPTIMIZE task dep infos.
 9 | 2. Add daemon tests.
10 | 2. Add webui tests.
11 | 


--------------------------------------------------------------------------------
/bin/luiti:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | 
3 | import sys
4 | from luiti.manager import Cli
5 | 
6 | cli = Cli(sys.argv)
7 | cli.run()
8 | 


--------------------------------------------------------------------------------
/changelog.markdown:
--------------------------------------------------------------------------------
 1 | ### 0.2.2   - Nov 10, 2015
 2 | * Add lots of test cases
 3 | * Document wording
 4 | 
 5 | ### 0.2.1   - July 15, 2015
 6 | * Add SensorSchedule to wait external task to finish lazily.
 7 | * Add WebUI screenshots.
 8 | * Lots of bug fixes.
 9 | 
10 | ### 0.2.0   - July 7, 2015
11 | * Add WebUI and daemon.
12 | * Lots of bug fixes and refactor.
13 | 
14 | ### 0.1.4   - May 10, 2015
15 | * Add English README
16 | 
17 | ### 0.1.3   - April 20, 2015
18 | * All codes are conform to PEP8 style.
19 | * Add @luigi.multiple_text_files decorator
20 | 
21 | ### 0.1.2   - April 20, 2015
22 | * Project is more solid, add services such as travis, etc.
23 | 
24 | ### 0.1.0   - March 24, 2015
25 | * Stable version, compact with luigi==1.0.19 and snakebite==1.3.8,
26 |   is already validated in a production environment.
27 | 


--------------------------------------------------------------------------------
/example_webui_run.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*-coding:utf-8-*-
 3 | 
 4 | import os
 5 | import sys
 6 | 
 7 | try:
 8 |     # Try load installed version first.
 9 |     import luiti
10 |     luiti
11 | except:
12 |     root_dir = os.path.dirname(os.path.abspath(__file__))
13 |     sys.path.insert(0, root_dir)
14 | 
15 | import logging
16 | logger = logging.getLogger("luiti.server")
17 | 
18 | # link webui_packages path
19 | from luiti.tests import SetupLuitiPackages
20 | config = SetupLuitiPackages.config
21 | from luiti.daemon import Server
22 | 
23 | 
24 | task_list_url = "http://localhost:8082/luiti/dag_visualiser?date_value=2015-07-09T00%3A00%3A00%2B08%3A00&language=English&luiti_package=luiti_summary&luiti_package=luiti_clean&luiti_package=luiti_dump&luiti_package=luiti_middle&luiti_package=project_A&luiti_package=project_B"
25 | task_show_url = "http://localhost:8082/luiti/dag_visualiser?date_value=2015-07-09T00%3A00%3A00%2B08%3A00&language=English&luiti_package=luiti_summary&luiti_package=luiti_clean&luiti_package=luiti_dump&luiti_package=luiti_middle&luiti_package=project_A&luiti_package=project_B&task_cls=BetaReportDay"
26 | 
27 | # generated from http://www.network-science.de/ascii/
28 | print "Welcome to luiti's test webui example!"
29 | print
30 | print "  Open below two urls in your favourite browser."
31 | print
32 | print "  task_list_url: ", task_list_url
33 | print "  task_show_url: ", task_show_url
34 | print
35 | 
36 | Server("localhost", 8082).run()
37 | 


--------------------------------------------------------------------------------
/install-dependencies.sh:
--------------------------------------------------------------------------------
 1 | # Travis had already installed Node.js with npm.
 2 | npm install bower -g
 3 | cd luiti/webui; bower install; cd -;
 4 | 
 5 | # Install eggs dependencies.
 6 | 
 7 | # Fix => Reading http://pyparsing.wikispaces.com/ error: timed out
 8 | pip install pyparsing --retries 10 --timeout 60
 9 | python setup.py install
10 | 


--------------------------------------------------------------------------------
/luiti/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | __all__ = ['luigi', 'config', "VisualiserEnvTemplate",
 4 | 
 5 |            'TaskBase',
 6 |            "TaskHour",
 7 |            "TaskHourHadoop",
 8 |            "TaskDay",
 9 |            "TaskDayHadoop",
10 |            "TaskWeek",
11 |            "TaskWeekHadoop",
12 |            "TaskBiweekly",
13 |            "TaskBiweeklyHadoop",
14 |            "TaskMonth",
15 |            "TaskMonthHadoop",
16 |            "TaskQuarter",
17 |            "TaskQuarterHadoop",
18 |            "TaskYear",
19 |            "TaskYearHadoop",
20 |            "TaskRange",
21 |            "TaskRangeHadoop",
22 | 
23 |            'RootTask',
24 | 
25 |            'StaticFile',
26 |            'MongoImportTask',
27 |            'HiveTask',
28 | 
29 |            'HadoopExt',
30 | 
31 |            'manager',
32 | 
33 |            'IOUtils', 'DateUtils', 'TargetUtils', 'HDFSUtils',
34 |            'MRUtils', 'MathUtils', 'CommandUtils',
35 |            'CompressUtils',
36 | 
37 |            'ArrowParameter',
38 | 
39 |            'os', 're', 'sys', 'defaultdict', 'json', 'cached_property',
40 |            'arrow',
41 | 
42 |            'MrTestCase', ]
43 | 
44 | import os
45 | import sys
46 | import re
47 | from collections import defaultdict
48 | import json
49 | from etl_utils import cached_property
50 | 
51 | from .luigi_extensions import luigi
52 | 
53 | from .task_templates import TaskHour, TaskDay, TaskWeek, TaskBiweekly, TaskMonth, TaskQuarter, TaskYear, TaskRange
54 | from .task_templates import TaskHourHadoop, TaskDayHadoop, TaskWeekHadoop, TaskBiweeklyHadoop, TaskMonthHadoop, TaskQuarterHadoop, TaskYearHadoop, TaskRangeHadoop
55 | from .task_templates import StaticFile, MongoImportTask, HiveTask
56 | 
57 | 
58 | from . import manager
59 | from .utils import IOUtils, DateUtils, TargetUtils, HDFSUtils
60 | from .utils import MRUtils, MathUtils, CommandUtils, CompressUtils
61 | 
62 | import arrow
63 | from .luigi_extensions import RootTask, TaskBase, ArrowParameter, HadoopExt
64 | 
65 | from .utils.visualiser_env_template import VisualiserEnvTemplate
66 | 
67 | from .tests import MrTestCase
68 | 
69 | 
70 | config = manager.luiti_config
71 | 


--------------------------------------------------------------------------------
/luiti/daemon/__init__.py:
--------------------------------------------------------------------------------
1 | # -*-coding:utf-8-*-
2 | 
3 | __all__ = ["Server", ]
4 | 
5 | 
6 | from .web import Server
7 | 


--------------------------------------------------------------------------------
/luiti/daemon/graph.py:
--------------------------------------------------------------------------------
  1 | # -*-coding:utf-8-*-
  2 | 
  3 | __all__ = ["Graph"]
  4 | 
  5 | from copy import deepcopy
  6 | 
  7 | from .utils import Template, stringify, TaskStorageSet, TaskStorageDict
  8 | 
  9 | 
 10 | class Graph(object):
 11 |     """
 12 |     Analysis graph relation between nodes.
 13 |     """
 14 | 
 15 |     @staticmethod
 16 |     def analysis_dependencies_between_nodes(task_instances, selected_packages):
 17 |         """
 18 |         Based on Data:
 19 |         1. Task_instances
 20 |         2. Their `requires` informations.
 21 | 
 22 |         Related function is luiti.manager.dep.Dep.find_dep_on_tasks
 23 |         """
 24 |         uniq_set = TaskStorageSet(task_instances)
 25 | 
 26 |         # 1. raw `requires` and `invert` informations.
 27 |         # TODO TaskStorageDict are already String, modify back to real Python objects.
 28 |         task_instances_to_their_direct_requires = TaskStorageDict()
 29 |         task_instances_to_their_direct_upons = TaskStorageDict()
 30 | 
 31 |         for task_instance in task_instances:
 32 |             deps = Utils.read_requires_from_task(task_instance, selected_packages)
 33 |             selected_deps = [d1 for d1 in deps if d1 in uniq_set]
 34 |             task_instances_to_their_direct_requires[task_instance] = TaskStorageSet(selected_deps)
 35 |             for dep1 in selected_deps:
 36 |                 task_instances_to_their_direct_upons[dep1].add(task_instance)
 37 | 
 38 |         # 2. unfold `requires` and `invert` informations.
 39 |         task_instances_to_their_total_requires = TaskStorageDict()
 40 |         task_instances_to_their_total_upons = TaskStorageDict()
 41 | 
 42 |         for task_instance in task_instances:
 43 |             Utils.add_total_deps(task_instances_to_their_total_requires, task_instances_to_their_direct_requires, task_instance)
 44 |             Utils.add_total_deps(task_instances_to_their_total_upons, task_instances_to_their_direct_upons, task_instance)
 45 | 
 46 |         def generate_result(_type="python"):
 47 |             """
 48 |             provide two versions of graph infos.
 49 | 
 50 |             1. one for front-end javascript.
 51 |             2. another for API python.
 52 |             """
 53 |             def wrap(obj):
 54 |                 if _type == "python":
 55 |                     return obj
 56 |                 if _type == "json":
 57 |                     return stringify(obj)
 58 | 
 59 |             return {
 60 |                 "requires": {
 61 |                     "direct": wrap(task_instances_to_their_direct_requires),
 62 |                     "total": wrap(task_instances_to_their_total_requires),
 63 |                 },
 64 |                 "upons": {
 65 |                     "direct": wrap(task_instances_to_their_direct_upons),
 66 |                     "total": wrap(task_instances_to_their_total_upons),
 67 |                 },
 68 |             }
 69 | 
 70 |         return {
 71 |             "python": generate_result("python"),
 72 |             "json": generate_result("json"),
 73 |         }
 74 | 
 75 |     @staticmethod
 76 |     def split_edges_into_groups(edges, nodes, task_instances):
 77 |         """
 78 |         Put linked task instances into a group.
 79 |         """
 80 |         edges = deepcopy(edges)
 81 |         groups = list()  # element is set
 82 | 
 83 |         # make sure every node appear, even has not link to other tasks.
 84 |         for ti in task_instances:
 85 |             edges.append(Template.an_edge(ti, ti))
 86 | 
 87 |         # 1. first time, divid edges into groups.
 88 |         for edge in edges:
 89 |             is_in_current_groups = False
 90 |             for group in groups:
 91 |                 if (edge["from"] in group) or (edge["to"] in group):
 92 |                     is_in_current_groups = True
 93 |                     group.add(edge["from"])
 94 |                     group.add(edge["to"])
 95 |             if is_in_current_groups is False:
 96 |                 groups.append(set([edge["from"], edge["to"]]))
 97 | 
 98 |         # 2. second time, merge groups that has common tasks
 99 |         # iterate to reduce redudant group
100 |         result = list()
101 |         for group1 in groups:
102 |             append_idx = None
103 |             for idx2, group2 in enumerate(result):
104 |                 if len(group1 & group2) > 0:
105 |                     append_idx = idx2
106 |                     break
107 |             if append_idx is None:
108 |                 result.append(group1)
109 |             else:
110 |                 result[append_idx] = result[append_idx] | group1
111 | 
112 |         result = sorted(result, key=lambda i1: (-len(i1), i1))
113 |         return result
114 | 
115 | 
116 | class Utils(object):
117 |     """ only for this file """
118 | 
119 |     @staticmethod
120 |     def read_requires_from_task(task_instance, selected_packages):
121 |         deps = task_instance.requires()
122 |         if not isinstance(deps, list):
123 |             deps = [deps]
124 |         # make sure it's a valid luiti task
125 |         deps = filter(lambda i1: hasattr(i1, "package_name"), deps)
126 |         # filter is very important, or can't find dict data.
127 |         deps = filter(lambda i1: i1.package_name in selected_packages, deps)
128 |         return deps
129 | 
130 |     @staticmethod
131 |     def add_total_deps(store, tree, store_node, fetch_node=None):
132 |         """ add all recursive dependencies.
133 |         1. `store_node` used to store in a result store.
134 |         2. `fetch_node` used to fetch dependencies from a tree.
135 |         """
136 |         fetch_node = fetch_node or store_node
137 | 
138 |         for d1 in tree[fetch_node]:
139 |             if d1 == store_node:
140 |                 continue
141 | 
142 |             store[store_node].add(d1)
143 | 
144 |             for d2 in tree[d1]:
145 |                 if d2 not in store[store_node]:
146 |                     Utils.add_total_deps(store, tree, store_node, d2)
147 | 


--------------------------------------------------------------------------------
/luiti/daemon/ptm.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | __all__ = ["PTM"]
 4 | 
 5 | 
 6 | import sys
 7 | from etl_utils import singleton, cached_property
 8 | import importlib
 9 | import inspect
10 | 
11 | from .. import manager
12 | from ..utils import VisualiserEnvTemplate
13 | 
14 | 
15 | @singleton()
16 | class PackageTaskManagementClass(object):
17 |     """
18 |     Manage packages and tasks.
19 | 
20 |     When webui daemon started, these values are readed, and will not be modified. It means they are static.
21 |     """
22 | 
23 |     @cached_property
24 |     def current_package_name(self):
25 |         return manager.luiti_config.get_curr_project_name()
26 | 
27 |     @cached_property
28 |     def current_init_luiti(self):
29 |         self.current_package_path  # insert pacakge into sys.path
30 |         __init_luiti = self.current_package_name + ".luiti_tasks.__init_luiti"
31 |         return importlib.import_module(__init_luiti)
32 | 
33 |     @cached_property
34 |     def current_package_path(self):
35 |         p1 = manager.luiti_config.get_curr_project_path()
36 |         sys.path.insert(0, p1)
37 |         return p1
38 | 
39 |     @cached_property
40 |     def current_luiti_visualiser_env(self):
41 |         env = getattr(self.current_init_luiti, "luiti_visualiser_env", VisualiserEnvTemplate())
42 |         assert isinstance(env, VisualiserEnvTemplate), env
43 |         return env.data
44 | 
45 |     @cached_property
46 |     def load_all_tasks_result(self):
47 |         return manager.load_all_tasks()
48 | 
49 |     @cached_property
50 |     def task_classes(self):
51 |         return [i1["task_cls"] for i1 in self.load_all_tasks_result["success"]]
52 | 
53 |     @cached_property
54 |     def task_class_names(self):
55 |         return sorted([i1.__name__ for i1 in self.task_classes])
56 | 
57 |     @cached_property
58 |     def task_clsname_to_package(self):
59 |         return manager.PackageMap.task_clsname_to_package
60 | 
61 |     @cached_property
62 |     def task_clsname_to_source_file(self):
63 |         def get_pyfile(task_cls):
64 |             f1 = inspect.getfile(task_cls)
65 |             return f1.replace(".pyc", ".py")
66 | 
67 |         return {task_cls.__name__: get_pyfile(task_cls) for task_cls in self.task_classes}
68 | 
69 |     @cached_property
70 |     def task_clsname_to_package_name(self):
71 |         return {t1: p1.__name__ for t1, p1 in self.task_clsname_to_package.iteritems()}
72 | 
73 |     @cached_property
74 |     def task_package_names(self):
75 |         return sorted([p1.__name__ for p1 in set(self.task_clsname_to_package.values())])
76 | 
77 |     @cached_property
78 |     def package_to_task_clsnames(self):
79 |         return {package.__name__: sorted(list(task_clsnames)) for package, task_clsnames
80 |                 in manager.PackageMap.package_to_task_clsnames.iteritems()}
81 | 
82 | 
83 | PTM = PackageTaskManagementClass()
84 | 


--------------------------------------------------------------------------------
/luiti/daemon/query_engine/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | __all__ = ["Query"]
 4 | 
 5 | from .builder import QueryBuilder
 6 | 
 7 | 
 8 | class Query(object):
 9 |     """
10 |     Use params to query some data from luiti.
11 |     """
12 | 
13 |     cache = dict()
14 | 
15 |     def __init__(self, ptm):
16 |         self.ptm = ptm  # global task and package data.
17 | 
18 |     def get_env(self, raw_params=dict()):
19 |         """
20 |         Generate all data needed.
21 |         """
22 |         # Compact with yesterday and today are the same cache key.
23 |         raw_params["date_value"] = raw_params.get("date_value", unicode(QueryBuilder.yesterday()))
24 | 
25 |         # TODO cache maybe replaced by a decorator, such as @functools.lru_cache
26 |         cache_key = unicode(sorted(raw_params.items()))  # A simple cache
27 | 
28 |         result = self.cache.get(cache_key, None)
29 |         if result is None:
30 |             result = QueryBuilder(self.ptm, raw_params).result
31 |             self.cache[cache_key] = QueryBuilder(self.ptm, raw_params).result
32 | 
33 |         return result
34 | 


--------------------------------------------------------------------------------
/luiti/daemon/query_engine/builder.py:
--------------------------------------------------------------------------------
  1 | # -*-coding:utf-8-*-
  2 | 
  3 | __all__ = ["QueryBuilder"]
  4 | 
  5 | import arrow
  6 | from etl_utils import cached_property
  7 | from copy import deepcopy
  8 | 
  9 | from ...luigi_extensions import ArrowParameter
 10 | from ..graph import Graph
 11 | from ..utils import stringify, Template, TaskStorageSet
 12 | from .params import Params
 13 | from .create_task import CreateTask
 14 | 
 15 | 
 16 | class QueryBuilder(object):
 17 |     """
 18 |     Construct a query builder.
 19 | 
 20 |     All propertyies are generated lazily by using `cached_property`, as in a **DAG**.
 21 |     """
 22 | 
 23 |     def __init__(self, ptm, raw_params):
 24 |         assert isinstance(raw_params, dict), raw_params
 25 | 
 26 |         self.raw_params = raw_params
 27 |         self.ptm = ptm
 28 | 
 29 |     @cached_property
 30 |     def date_begin(self):
 31 |         return self.ptm.current_luiti_visualiser_env["date_begin"]
 32 | 
 33 |     @cached_property
 34 |     def date_end(self):
 35 |         date_end = self.ptm.current_luiti_visualiser_env.get("date_end", self.yesterday_str)
 36 |         self.ptm.current_luiti_visualiser_env["date_end"] = date_end
 37 |         return date_end
 38 | 
 39 |     @staticmethod
 40 |     def yesterday():
 41 |         return ArrowParameter.now().replace(days=-1).floor("day")
 42 | 
 43 |     @cached_property
 44 |     def yesterday_str(self):
 45 |         return QueryBuilder.yesterday().format("YYYY-MM-DD")
 46 | 
 47 |     @cached_property
 48 |     def accepted_params(self):
 49 |         """
 50 |         Comes from current luiti that selected.
 51 |         """
 52 |         return self.ptm.current_luiti_visualiser_env["additional_task_parameters"]
 53 | 
 54 |     @cached_property
 55 |     def accepted_query_params(self):
 56 |         """
 57 |         provide to visualSearch.js, used for autocomplete.
 58 | 
 59 |         user query via URL search.
 60 | 
 61 |         autocomplete params key/value.
 62 |         """
 63 |         # date range related.
 64 |         days_range = arrow.Arrow.range("day",
 65 |                                        ArrowParameter.get(self.date_begin),
 66 |                                        ArrowParameter.get(self.date_end))
 67 |         accepted_date_values = sorted(map(str, days_range))
 68 | 
 69 |         # result
 70 |         return {
 71 |             "date_value": accepted_date_values,
 72 |             "task_cls": self.ptm.task_class_names,
 73 |             "luiti_package": self.ptm.task_package_names,
 74 |         }
 75 | 
 76 |     @cached_property
 77 |     def default_query(self):
 78 |         """ Query provide by user config. """
 79 |         # assign default params
 80 |         default_query = {
 81 |             "date_value": str(QueryBuilder.yesterday()),
 82 |             # to insert more key-value
 83 |         }
 84 | 
 85 |         # get config from current package's luiti_visualiser_env
 86 |         for task_param, task_param_opt in self.accepted_params.iteritems():
 87 |             self.accepted_query_params[task_param] = task_param_opt["values"]
 88 |             default_query[task_param] = task_param_opt["default"]
 89 | 
 90 |         return default_query
 91 | 
 92 |     @cached_property
 93 |     def selected_query(self):
 94 |         selected_query = {k1: v1 for k1, v1 in self.raw_params.iteritems() if k1 in self.accepted_params or k1 == "date_value"}
 95 |         selected_query["luiti_package"] = self.selected_packages
 96 |         selected_query = dict(self.default_query.items() + selected_query.items())
 97 | 
 98 |         return selected_query
 99 | 
100 |     @cached_property
101 |     def default_packages(self):
102 |         """ user provided. """
103 |         return self.ptm.current_luiti_visualiser_env["package_config"].get("defaults", [])
104 | 
105 |     @cached_property
106 |     def selected_packages(self):
107 |         result = self.raw_params.get("luiti_package", self.default_packages)
108 |         result = result or self.ptm.task_package_names
109 |         return result
110 | 
111 |     @cached_property
112 |     def selected_task_cls_names(self):
113 |         """
114 |         current selected.
115 |         """
116 |         result = set(self.raw_params.get("task_cls", []))
117 | 
118 |         # modify other cached_property
119 |         self.selected_query["task_cls"] = list(result)
120 | 
121 |         return result
122 | 
123 |     @cached_property
124 |     def total_task_instances(self):
125 |         """
126 |         Total task instances.
127 |         """
128 |         # 1. build possible params.
129 |         # **remove** luiti_package and task_cls query str
130 |         params_array = Params.build_params_array(self.default_query, self.selected_query)
131 | 
132 |         # 2. and generate task instances.
133 |         total_task_instances = list()
134 |         for ti in self.ptm.task_classes:
135 |             # TODO why below two lines exist before.
136 |             # if ti.__name__ not in self.selected_task_cls_names:
137 |             #     continue
138 | 
139 |             for _params in params_array:
140 |                 task_instance = CreateTask.new(ti, _params)
141 |                 total_task_instances.append(task_instance)
142 | 
143 |         result = sorted(list(set(total_task_instances)))
144 |         return result
145 | 
146 |     @cached_property
147 |     def selected_task_instances(self):
148 |         """ nodes that drawed in vis.js """
149 |         # filter by package
150 |         result = sorted(list(set(self.total_task_instances)))
151 |         result = filter(lambda ti: ti.package_name in self.selected_packages,
152 |                         result)
153 | 
154 |         # To avoid only self is in the graph.
155 |         # If select task class, then to find linked task instances.
156 |         if not self.selected_task_cls_names:
157 |             return result
158 | 
159 |         pure_selected_task_instances = [ti for ti in result if ti.task_clsname in self.selected_task_cls_names]
160 |         pure_linked = TaskStorageSet()
161 |         for ti in pure_selected_task_instances:
162 |             for t2 in self.graph_infos_python["requires"]["direct"][ti]:
163 |                 pure_linked.add(t2)
164 |             for t2 in self.graph_infos_python["upons"]["direct"][ti]:
165 |                 pure_linked.add(t2)
166 | 
167 |         # filter that tasks are linked, in current task_classes.
168 |         result = [ti for ti in result if ti in pure_linked]
169 |         result.extend(pure_selected_task_instances)
170 |         result = list(set(result))
171 |         return result
172 | 
173 |     @cached_property
174 |     def graph_infos_data(self):
175 |         return Graph.analysis_dependencies_between_nodes(self.total_task_instances,
176 |                                                          self.selected_packages)
177 | 
178 |     @cached_property
179 |     def graph_infos_python(self):
180 |         return self.graph_infos_data["python"]
181 | 
182 |     @cached_property
183 |     def nodes(self):
184 |         return [Template.a_node(ti) for ti in self.selected_task_instances]
185 | 
186 |     @cached_property
187 |     def edges(self):
188 |         return Template.edges_from_nodes(self.selected_task_instances)
189 | 
190 |     @cached_property
191 |     def nodes_groups(self):
192 |         return Graph.split_edges_into_groups(self.edges,
193 |                                              self.nodes,
194 |                                              self.selected_task_instances)
195 | 
196 |     @cached_property
197 |     def nodes_groups_in_view(self):
198 |         return [sorted(list(nodes_set)) for nodes_set in self.nodes_groups]
199 | 
200 |     @cached_property
201 |     def task_instance_repr_to_info(self):
202 |         result = dict()
203 |         for ti in self.total_task_instances:
204 |             param_kwargs = deepcopy(ti.param_kwargs)
205 |             if "pool" in param_kwargs:
206 |                 del param_kwargs["pool"]
207 |             result[str(ti)] = {"task_cls": ti.task_clsname, "param_kwargs": stringify(param_kwargs)}
208 |         return result
209 | 
210 |     @cached_property
211 |     def result(self):
212 |         return {
213 |             "title": "Luiti WebUI, a DAG timely visualiser.",
214 | 
215 |             "queryparams": {
216 |                 "accepted": self.accepted_query_params,
217 |                 "selected_query": self.selected_query,
218 |                 "default_query": self.default_query,
219 |                 "luiti_visualiser_env": self.ptm.current_luiti_visualiser_env,
220 |             },
221 | 
222 |             "ptm": {
223 |                 "task_class_names": self.ptm.task_class_names,
224 |                 "task_package_names": self.ptm.task_package_names,
225 |                 "task_clsname_to_package_name": self.ptm.task_clsname_to_package_name,
226 |                 "package_to_task_clsnames": self.ptm.package_to_task_clsnames,
227 |                 "task_instance_repr_to_info": self.task_instance_repr_to_info,
228 |             },
229 | 
230 |             "nodeedge": {
231 |                 "nodes": self.nodes,
232 |                 "edges": self.edges,
233 |                 "nodes_groups": self.nodes_groups_in_view,
234 |                 "graph_infos": self.graph_infos_data["json"],
235 |             },
236 | 
237 |             "errors": {
238 |                 "load_tasks": self.ptm.load_all_tasks_result["failure"],
239 |             }
240 |         }
241 | 


--------------------------------------------------------------------------------
/luiti/daemon/query_engine/create_task.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | 
 4 | __all__ = ["CreateTask"]
 5 | 
 6 | import luigi
 7 | from ..utils import CacheByDictKey
 8 | 
 9 | 
10 | class CreateTask(object):
11 | 
12 |     task_clsname_cache = dict()
13 | 
14 |     @staticmethod
15 |     def new(task_cls, _params):
16 |         """ Initialize a task instance, with filter invalid params. """
17 |         task_cls_cache = CreateTask.task_clsname_cache.get(task_cls, None)
18 |         if task_cls_cache is None:
19 |             task_cls_cache = TaskInstanceCache(task_cls)
20 |             CreateTask.task_clsname_cache[task_cls] = task_cls_cache
21 | 
22 |         return task_cls_cache[_params]
23 | 
24 | 
25 | class TaskInstanceCache(object):
26 |     """
27 |     To avoid create duplicated task instances.
28 |     """
29 | 
30 |     def __init__(self, task_cls):
31 |         self.task_cls = task_cls
32 |         self.cache = CacheByDictKey(self.process)
33 | 
34 |     def __getitem__(self, _params):
35 |         return self.cache[_params]
36 | 
37 |     def process(self, _params):
38 |         _real_task_params = dict()
39 |         for k1, v1 in _params.iteritems():
40 |             has_key = hasattr(self.task_cls, k1)
41 |             is_luigi_params = isinstance(getattr(self.task_cls, k1, None), luigi.Parameter)
42 |             if has_key and is_luigi_params:
43 |                 _real_task_params[k1] = v1
44 |         task_instance = self.task_cls(**_real_task_params)
45 |         return task_instance
46 | 


--------------------------------------------------------------------------------
/luiti/daemon/query_engine/params.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | __all__ = ["Params"]
 4 | 
 5 | from ...luigi_extensions import ArrowParameter
 6 | import itertools
 7 | 
 8 | 
 9 | class Params(object):
10 | 
11 |     @staticmethod
12 |     def build_params_array(default_query, selected_query):
13 |         """
14 |         1. build possible params
15 |         2. and with default params
16 |         """
17 |         selected_query_with_kv_array = list()
18 |         for k1, v1 in selected_query.iteritems():
19 |             k1_v2_list = list()
20 | 
21 |             # v1 is params value list
22 |             if not isinstance(v1, list):
23 |                 v1 = [v1]
24 | 
25 |             if len(v1) == 0:
26 |                 continue  # ignore key that no value.
27 | 
28 |             for v2 in v1:
29 |                 # Already overwrited params type and luigi.Task#__eq__ in luiti.
30 |                 # See more details at task_templates.time.task_base.py
31 |                 if k1 == "date_value":
32 |                     v2 = ArrowParameter.get(v2)
33 |                 else:
34 |                     v2 = unicode(v2)
35 |                 k1_v2_list.append({"key": k1, "val": v2})
36 |             selected_query_with_kv_array.append(k1_v2_list)
37 | 
38 |         possible_params_in_kv = map(list, itertools.product(*selected_query_with_kv_array))
39 | 
40 |         params_array = list()
41 |         for kv_list in possible_params_in_kv:
42 |             opt = {kv1["key"]: kv1["val"] for kv1 in kv_list}
43 |             opt = dict(default_query.items() + opt.items())
44 |             params_array.append(opt)
45 | 
46 |         return sorted(params_array)
47 | 


--------------------------------------------------------------------------------
/luiti/daemon/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | __all__ = ["stringify",
 4 |            "TaskStorageSet", "TaskStorageDict",
 5 |            "Template",
 6 |            "CacheByDictKey", ]
 7 | 
 8 | 
 9 | from .string import stringify
10 | from .task_storage import TaskStorageSet, TaskStorageDict
11 | from .template import Template
12 | from .cache import CacheByDictKey
13 | 


--------------------------------------------------------------------------------
/luiti/daemon/utils/cache.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | __all__ = ["CacheByDictKey"]
 4 | 
 5 | # TODO cache maybe replaced by a decorator, such as @functools.lru_cache
 6 | # 1. https://pypi.python.org/pypi/py_lru_cache/0.1.4 is slow, 100 ms, but simple dict cache is only 1 ms.
 7 | # 2. https://github.com/tkem/cachetools dont support dict parameters.
 8 | 
 9 | 
10 | class CacheByDictKey(object):
11 |     """
12 |     Support cache by a dict.
13 | 
14 |     Only support dict[] operation.
15 |     """
16 | 
17 |     def __init__(self, func):
18 |         self.store = dict()
19 | 
20 |         assert callable(func)
21 |         self.func = func
22 | 
23 |     def __getitem__(self, query):
24 |         cache_key = self.generate_cache_key(query)
25 | 
26 |         result = self.store.get(cache_key, None)
27 |         if result is None:
28 |             result = self.func(query)
29 |             self.store[cache_key] = result
30 |         return result
31 | 
32 |     def generate_cache_key(self, query):
33 |         assert isinstance(query, dict)
34 |         return unicode(sorted(query.items()))
35 | 


--------------------------------------------------------------------------------
/luiti/daemon/utils/string.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | __all__ = ["stringify"]
 4 | 
 5 | 
 6 | def stringify(default_dict):
 7 |     """
 8 |     make an object can be serialized by JSON.
 9 | 
10 |     This function is not general, just for luiti.daemon .
11 |     """
12 |     result = dict()
13 |     for k1, vs1 in default_dict.iteritems():
14 |         # only wrap first level, such as ArrowParameter
15 |         if isinstance(vs1, (list, set)):
16 |             vs1 = map(str, vs1)
17 |         else:
18 |             vs1 = str(vs1)
19 |         result[str(k1)] = vs1
20 |     return result
21 | 


--------------------------------------------------------------------------------
/luiti/daemon/utils/task_storage.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | __all__ = ["TaskStorageSet", "TaskStorageDict"]
 4 | 
 5 | from UserDict import UserDict
 6 | 
 7 | """
 8 | Task#__hash isn't consistent when one is from task_instances, and another is from `requires`.
 9 | 
10 | Here we use #task_id to compare that if two tasks are the same one.
11 | """
12 | 
13 | 
14 | class TaskStorageSet(set):
15 |     """
16 |     hash(luigi.Task) don't work well, so use `luigi.Task.task_id` fix it temporarily.
17 |     """
18 | 
19 |     def __init__(self, task_list=list()):
20 |         self.store = dict()
21 | 
22 |         for t1 in task_list:
23 |             self.add(t1)
24 | 
25 |     def __contains__(self, t1):
26 |         return t1.task_id in self.store
27 | 
28 |     def add(self, t1):
29 |         self.store[t1.task_id] = t1
30 | 
31 |     def remove(self, t1):
32 |         del self.store[t1.task_id]
33 | 
34 |     def __repr__(self):
35 |         return repr(self.store.keys())
36 | 
37 |     def __len__(self):
38 |         return len(self.store)
39 | 
40 |     def __iter__(self):
41 |         return self.store.itervalues()
42 | 
43 | 
44 | class TaskStorageDict(UserDict):
45 | 
46 |     def __getitem__(self, ti):
47 |         if ti.task_id in self.data:
48 |             return self.data[ti.task_id]
49 |         if hasattr(self.__class__, "__missing__"):
50 |             return self.__class__.__missing__(self, ti)
51 |         raise KeyError(ti)
52 | 
53 |     def __setitem__(self, ti, item):
54 |         self.data[ti.task_id] = item
55 | 
56 |     def __delitem__(self, ti):
57 |         del self.data[ti.task_id]
58 | 
59 |     def __missing__(self, ti):
60 |         s1 = TaskStorageSet()
61 |         self.data[ti.task_id] = s1
62 |         return s1
63 | 


--------------------------------------------------------------------------------
/luiti/daemon/utils/template.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | __all__ = ["Template"]
 4 | 
 5 | import luigi
 6 | 
 7 | 
 8 | class Template(object):
 9 |     """
10 |     Generate some output from entities.
11 |     """
12 | 
13 |     @staticmethod
14 |     def task_doc(ti):
15 |         """ Get task doc from class. """
16 |         doc = (ti.task_class.__doc__ or "").strip()
17 |         if isinstance(doc, str):
18 |             doc = doc.decode("UTF-8")
19 |         return doc
20 | 
21 |     @staticmethod
22 |     def a_node(ti):
23 |         result = {"id": ti.task_id,
24 |                   "label": ti.task_class.__name__,
25 |                   "group": ti.package_name,
26 | 
27 |                   "detail": str(ti),
28 |                   "data_file": ti.data_file,
29 |                   "task_doc": Template.task_doc(ti),
30 |                   "task_file": ti.task_class.__module__.replace(".", "/") + ".py",
31 |                   "package_name": ti.package_name,
32 |                   }
33 |         result["size"] = 20
34 |         return result
35 | 
36 |     @staticmethod
37 |     def edges_from_nodes(nodes):
38 |         """
39 |         Generate relations between current task instances, but just only these task instances.
40 |         """
41 |         # 1. check input is valid
42 |         assert isinstance(nodes, list)
43 |         if len(nodes):
44 |             assert isinstance(nodes[0], luigi.Task)
45 | 
46 |         edges = list()
47 |         for ti in nodes:
48 |             t2_in_requires = ti.requires()
49 |             if not isinstance(t2_in_requires, list):
50 |                 t2_in_requires = [t2_in_requires]
51 |             for t2 in t2_in_requires:
52 |                 if t2 is None:  # dep on none tasks
53 |                     continue
54 |                 if t2 not in nodes:
55 |                     continue
56 |                 edges.append(Template.an_edge(t2, ti))
57 | 
58 |         return edges
59 | 
60 |     @staticmethod
61 |     def an_edge(from_task, to_task):
62 |         arrows = "to"  # default
63 |         if from_task == to_task:
64 |             arrows = "self_to_self"
65 | 
66 |         result = {"id": from_task.task_id + " " + to_task.task_id,  # id is uniq.
67 |                   "from": from_task.task_id,
68 |                   "source_name": from_task.task_class.__name__,
69 |                   "to": to_task.task_id,
70 |                   "target_name": to_task.task_class.__name__,
71 |                   "strength": 1.0,
72 |                   "arrows": arrows}
73 | 
74 |         return result
75 | 


--------------------------------------------------------------------------------
/luiti/daemon/web/__init__.py:
--------------------------------------------------------------------------------
1 | # -*-coding:utf-8-*-
2 | 
3 | __all__ = ["Server"]
4 | 
5 | 
6 | from .server import Server
7 | 


--------------------------------------------------------------------------------
/luiti/daemon/web/assets.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | __all__ = ["assets_main_dir", "assets_thirdparty_dir"]
 4 | 
 5 | 
 6 | import os
 7 | 
 8 | 
 9 | luiti_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
10 | 
11 | assets_main_dir = os.path.join(luiti_dir, "webui/assets")
12 | assets_thirdparty_dir = os.path.join(luiti_dir, "webui/bower_components")
13 | 
14 | assert os.path.isdir(assets_main_dir), "%s is not exists!" % assets_main_dir
15 | assert os.path.isdir(assets_thirdparty_dir), "%s is not exists!" % assets_thirdparty_dir
16 | 


--------------------------------------------------------------------------------
/luiti/daemon/web/code_render.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | __all__ = ["CodeRender"]
 4 | 
 5 | from etl_utils import cached_property
 6 | 
 7 | 
 8 | class CodeRender(dict):
 9 |     """ Highlight luiti task code written in Python. """
10 | 
11 |     @cached_property
12 |     def highlight(self):
13 |         """ Lazy load pygments, so user dont need to load all daemon code. """
14 |         import pygments
15 |         from pygments.lexers import PythonLexer
16 |         lexer = PythonLexer()
17 | 
18 |         return lambda source_code: pygments.highlight(source_code, lexer, self.formatter)
19 | 
20 |     @cached_property
21 |     def formatter(self):
22 |         from pygments.formatters import HtmlFormatter
23 |         return HtmlFormatter(linenos=True)
24 | 
25 |     @cached_property
26 |     def css_html(self):
27 |         return u"""<style type="text/css">%s</style>""" % self.formatter.get_style_defs('.highlight')
28 | 
29 |     def __missing__(self, source_file):
30 |         source_code = file(source_file).read()
31 | 
32 |         path_html = u"""<div>source_file: %s</div>""" % source_file
33 |         code_html = self.highlight(source_code)
34 | 
35 |         body_html = path_html + code_html + self.css_html
36 |         title = source_file.split("/")[-1]
37 | 
38 |         return u"""
39 |         <html lang="en">
40 |             <head>
41 |                 <title>%s</title>
42 |             </head>
43 |             <body>
44 |                 %s
45 |             </body>
46 |         </html>
47 |         """ % (title, body_html)
48 | 


--------------------------------------------------------------------------------
/luiti/daemon/web/handlers.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | __all__ = ["web_handlers"]
 4 | 
 5 | from etl_utils import cached_property
 6 | import pkg_resources
 7 | import tornado.web
 8 | 
 9 | from .assets import assets_main_dir, assets_thirdparty_dir
10 | from ..ptm import PTM
11 | from ..query_engine import Query
12 | from .code_render import CodeRender
13 | 
14 | 
15 | class IndexHandler(tornado.web.RequestHandler):
16 | 
17 |     def get(self):
18 |         # one query key has multiple values
19 |         self.render("index.html")
20 | 
21 |     def get_template_path(self):
22 |         return pkg_resources.resource_filename(__name__, "../../webui")
23 | 
24 | 
25 | class InitDataHandler(tornado.web.RequestHandler):
26 | 
27 |     @cached_property
28 |     def query_engine(self):
29 |         return Query(PTM)
30 | 
31 |     def get(self):
32 |         params = self.request.query_arguments
33 |         data = self.query_engine.get_env(params)
34 | 
35 |         self.write(data)
36 | 
37 | 
38 | class CodeShowHandler(tornado.web.RequestHandler):
39 | 
40 |     @cached_property
41 |     def code_render(self):
42 |         return CodeRender()
43 | 
44 |     def get(self, package_name, task_cls_name):
45 |         # assert package and task exist!
46 |         assert package_name in PTM.task_package_names
47 |         assert task_cls_name in PTM.task_clsname_to_package
48 | 
49 |         source_file = PTM.task_clsname_to_source_file[task_cls_name]
50 |         source_code = self.code_render[source_file]
51 |         self.write(source_code)
52 | 
53 | 
54 | web_handlers = [
55 |     # make a static HTML vis URL
56 |     (r'/luiti/bower_components/(.*)', tornado.web.StaticFileHandler, {'path': assets_thirdparty_dir}),
57 |     (r'/luiti/assets/(.*)', tornado.web.StaticFileHandler, {'path': assets_main_dir}),
58 | 
59 |     (r'/luiti/code/([^/]+)/([^/]+)', CodeShowHandler, {}),
60 |     (r'/luiti/dag_visualiser', IndexHandler, {}),
61 |     (r'/luiti/init_data.json', InitDataHandler, {}),
62 |     (r'/', tornado.web.RedirectHandler, {"url": "/luiti/dag_visualiser"})
63 | ]
64 | 


--------------------------------------------------------------------------------
/luiti/daemon/web/server.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | """
 4 | A DAG timely visualiser.
 5 | 
 6 | Draw DAG tasks under selected parameters.
 7 | """
 8 | 
 9 | from __future__ import unicode_literals
10 | 
11 | __all__ = ["Server"]
12 | 
13 | from etl_utils import cached_property
14 | import tornado.httpclient
15 | import tornado.httpserver
16 | import tornado.ioloop
17 | import tornado.netutil
18 | import tornado.web
19 | import tornado.escape
20 | from tornado.log import enable_pretty_logging
21 | enable_pretty_logging()
22 | 
23 | 
24 | import logging
25 | logger = logging.getLogger("luiti.server")
26 | 
27 | 
28 | # 1. Setup business package env
29 | # list current package's related tasks, group by package name.
30 | from .handlers import web_handlers
31 | 
32 | 
33 | class Server(object):
34 |     """ A tornado server.  """
35 | 
36 |     welcome_doc = u"""
37 | ( \      |\     /|\__   __/\__   __/\__   __/
38 | | (      | )   ( |   ) (      ) (      ) (
39 | | |      | |   | |   | |      | |      | |
40 | | |      | |   | |   | |      | |      | |
41 | | |      | |   | |   | |      | |      | |
42 | | (____/\| (___) |___) (___   | |   ___) (___
43 | (_______/(_______)\_______/   )_(   \_______/
44 |     """
45 | 
46 |     def __init__(self, host, port):
47 |         self.host = host
48 |         self.port = port
49 | 
50 |         # Fix cant open http://0.0.0.0 on browser.
51 |         self.url = "http://%s:%s" % (self.host.replace("0.0.0.0", "localhost"), self.port)
52 | 
53 |         print self.welcome_doc
54 |         print "Luiti WebUI is mounted on %s" % self.url
55 | 
56 |     def run(self):
57 |         """
58 |         Runs one instance of the API server.
59 |         """
60 |         api_sockets = tornado.netutil.bind_sockets(self.port, address=self.host)
61 |         server = tornado.httpserver.HTTPServer(self.app)
62 |         server.add_sockets(api_sockets)
63 | 
64 |         logger.info("Scheduler starting up")
65 |         tornado.ioloop.IOLoop.instance().start()
66 | 
67 |     @cached_property
68 |     def app(self):
69 |         """ return a API app instance. """
70 |         settings = {
71 |             "unescape": tornado.escape.xhtml_unescape,
72 |             # "autoreload": True
73 |         }
74 | 
75 |         return tornado.web.Application(web_handlers, **settings)
76 | 


--------------------------------------------------------------------------------
/luiti/java/MultipleTextFiles.java:
--------------------------------------------------------------------------------
 1 | package com.voxlearning.bigdata.MrOutput;
 2 | 
 3 | import org.apache.hadoop.fs.Path;
 4 | import org.apache.hadoop.io.Text;
 5 | import org.apache.hadoop.mapred.lib.MultipleTextOutputFormat;
 6 | 
 7 | public class MultipleTextFiles extends MultipleTextOutputFormat<Text, Text> {
 8 |     /**
 9 |      * Currently, the `reducer` function in luiti use below data format.
10 |      *     yield "", "{"json key": "json value"}"
11 |      *  If need multiple file output, then we use the unused yield key.
12 |      *
13 |      * Ref code: http://blog.csdn.net/lmc_wy/article/details/7532213
14 |      */
15 | 
16 |     protected String generateFileNameForKeyValue(Text key, Text value, String name)
17 |     {
18 |         String outputName = key.toString();      // Get the current filename
19 |         key.set("");                             // We just need the value, so remove the unneeded key.
20 |         return new Path(outputName, name).toString();   // 参考 https://github.com/klbostee/feathers
21 |     }
22 | 
23 | }
24 | 
25 | 
26 | /*
27 |  * deploy ref: https://github.com/klbostee/feathers/blob/master/build.sh
28 |  */
29 | 


--------------------------------------------------------------------------------
/luiti/luigi_decorators/__init__.py:
--------------------------------------------------------------------------------
1 | # -*-coding:utf-8-*-
2 | 
3 | """
4 | This folder contains functions only. Please make sure dont make any complex `import` statements.
5 | 
6 | See import logic at luiti/luigi_extensions/manage_decorators.py
7 | """
8 | 


--------------------------------------------------------------------------------
/luiti/luigi_decorators/as_a_luiti_task.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | __all__ = ["as_a_luiti_task"]
 4 | 
 5 | import luigi
 6 | from ..luigi_extensions import TaskBase, TaskInit
 7 | from ..utils import ExtUtils
 8 | 
 9 | # Extensions to luigi.Task
10 | task_base_members = [k1 for k1 in sorted(TaskBase.__dict__.keys()) if not k1.startswith("__")]
11 | task_base_members = [k1 for k1 in task_base_members if not k1.startswith("_abc")]
12 | """ member list, see details at TaskBase
13 | >>> ['_persist_files', '_ref_tasks', 'data_dir', 'data_file', 'data_name', 'date_str', 'date_type', 'date_value', 'date_value_by_type_in_begin', 'date_value_by_type_in_end', 'date_value_by_type_in_last', 'errput', 'instances_by_date_range', 'is_external', 'is_reach_the_edge', 'output', 'package_name', 'pre_task_by_self', 'requires', 'reset_date', 'root_dir', 'run', 'run_mode', 'task_class', 'task_clsname', 'task_namespace']
14 | """
15 | 
16 | 
17 | def as_a_luiti_task(**opts):  # Decorator
18 |     """
19 |     Luigi's contrib are really Great, luiti would like to Reuse them through just a decorator.
20 | 
21 |     Usage:
22 | 
23 |         @luigi.as_a_luiti_task()
24 |         class AnotherHiveDay(HiveQueryTask):
25 |             pass
26 | 
27 | 
28 |     https://github.com/spotify/luigi/tree/master/luigi/contrib
29 |     """
30 | 
31 |     def func(task_cls):
32 |         """ Main reason is to fix not overwrite `__init__` function. """
33 |         # Make sure it's a luigi.contrib
34 |         assert issubclass(task_cls, luigi.Task), task_cls
35 | 
36 |         # copy members to target class
37 |         for member in task_base_members:
38 |             base_val = getattr(TaskBase, member)
39 |             target_val = getattr(task_cls, member, NotImplementedError)
40 |             if target_val in [NotImplementedError, NotImplemented]:
41 |                 setattr(task_cls, member, base_val)
42 | 
43 |         # let `isinstance` works for this wrap task class
44 |         class wrap_cls(task_cls, TaskBase, ExtUtils.ExtendClass):
45 |             def __init__(self, *args, **kwargs):
46 |                 super(wrap_cls, self).__init__(*args, **kwargs)
47 |                 TaskInit.setup(self)
48 | 
49 |         wrap_cls.__doc__ = task_cls.__doc__
50 |         wrap_cls.__module__ = task_cls.__module__
51 |         wrap_cls.__name__ = task_cls.__name__
52 |         task_cls = wrap_cls
53 | 
54 |         return task_cls
55 |     return func
56 | 


--------------------------------------------------------------------------------
/luiti/luigi_decorators/check_date_range.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | __all__ = ["check_date_range"]
 4 | 
 5 | from ..luigi_extensions import ArrowParameter
 6 | 
 7 | 
 8 | def check_date_range():  # 装饰器
 9 |     """
10 |     从数据库导数据时，必须注意时间范围内的所有数据是否都齐全了。如果未齐全，
11 |     即在当前时间范围里导的话，那么就会缺失数据了，相当于提前导了。
12 | 
13 |     比如在周六就把这周的关联数据导出来，那么周日的数据就没包含在里面。应该在下周一后才开始导。
14 |     """
15 |     def decorator(orig_run):
16 |         def new_run(self):
17 |             # 说明时间未到，然后就直接退出
18 |             if ArrowParameter.now() < self.date_value_by_type_in_end:
19 |                 return False
20 |             return orig_run(self)
21 |         return new_run
22 | 
23 |     def func(cls):
24 |         cls.run = decorator(cls.run)
25 |         return cls
26 |     return func
27 | # TODO support Hadoop
28 | 


--------------------------------------------------------------------------------
/luiti/luigi_decorators/check_runtime_range.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | __all__ = ["check_runtime_range"]
 4 | 
 5 | from ..luigi_extensions import ArrowParameter
 6 | 
 7 | 
 8 | def check_runtime_range(**opts_1):  # 装饰器
 9 |     """
10 |     Support hour/weekday indexed range.
11 | 
12 |     Optional params:
13 |     1. hour_num
14 |     2. weekday_num
15 |     3. now
16 |     """
17 |     def decorator(orig_run):
18 |         def new_run(self):
19 |             default_opts = {
20 |                 "hour_num": range(1, 25),
21 |                 "weekday_num": range(1, 8),
22 |             }
23 |             opts = dict(default_opts.items() + opts_1.items())
24 | 
25 |             now = ArrowParameter.now()           # get current time
26 |             hour_24 = int(now.format("H"))  # 0, 1, 2, ..., 23, 24
27 |             day_of_week_7 = int(now.format("d"))  # 1, 2, 3, ..., 6, 7
28 | 
29 |             is_false = False
30 |             if hour_24 not in opts['hour_num']:
31 |                 is_false = True
32 |             if day_of_week_7 not in opts['weekday_num']:
33 |                 is_false = True
34 |             if is_false:
35 |                 print "[info]", now, " is not in ", opts, \
36 |                       ", so the task exited."
37 |                 return False
38 | 
39 |             return orig_run(self)
40 |         return new_run
41 | 
42 |     def func(cls):
43 |         cls.run = decorator(cls.run)
44 |         return cls
45 |     return func
46 | 


--------------------------------------------------------------------------------
/luiti/luigi_decorators/mr_local.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | __all__ = ["mr_local"]
 4 | 
 5 | from collections import defaultdict
 6 | from etl_utils import process_notifier
 7 | from ..utils import TargetUtils
 8 | 
 9 | 
10 | def mr_local(**opts):
11 |     """
12 |     Sometimes Hadoop streaming sucks, so we only use the solid HDFS, and turn
13 |     MapReduce job into local mode.
14 | 
15 |     And `mr_local` is optimized by a fixed chunk write operation.
16 |     """
17 | 
18 |     def mr_run(self):
19 |         """ Overwrite BaseHadoopJobTask#run function. """
20 | # TODO maybe model cache
21 |         map_kv_dict = defaultdict(list)
22 | 
23 |         inputs = self.input()
24 |         if not isinstance(inputs, list):
25 |             inputs = [inputs]
26 |         for input_hdfs_1 in inputs:
27 |             for line2 in TargetUtils.line_read(input_hdfs_1):
28 |                 for map_key_3, map_val_3 in self.mapper(line2):
29 |                     map_kv_dict[map_key_3].append(map_val_3)
30 | 
31 |         with self.output().open("w") as output1:
32 |             fixed_chunk = list()
33 |             for reduce_key_2 in process_notifier(map_kv_dict.keys()):
34 |                 reduce_vals_2 = map_kv_dict[reduce_key_2]
35 |                 for _, reduce_val_2 in self.reducer(
36 |                         reduce_key_2, reduce_vals_2):
37 |                     fixed_chunk.append(reduce_val_2)
38 | 
39 |                     if len(fixed_chunk) % self.chunk_size == 0:
40 |                         output1.write("\n".join(fixed_chunk) + "\n")
41 |                         fixed_chunk = list()
42 |                 del map_kv_dict[reduce_key_2]
43 |             output1.write("\n".join(fixed_chunk) + "\n")
44 | 
45 |     def wrap(cls):
46 |         cls.run = mr_run
47 |         cls.run_mode = "mr_local"
48 | 
49 |         opts["chunk_size"] = opts.get("chunk_size", 100)
50 |         for k1, v1 in opts.iteritems():
51 |             setattr(cls, k1, v1)
52 | 
53 |         return cls
54 |     return wrap
55 | 


--------------------------------------------------------------------------------
/luiti/luigi_decorators/multiple_text_files.py:
--------------------------------------------------------------------------------
  1 | # -*-coding:utf-8-*-
  2 | 
  3 | __all__ = ["multiple_text_files"]
  4 | 
  5 | import os
  6 | import commands
  7 | from etl_utils import cached_property
  8 | from ..utils import CommandUtils
  9 | import luigi
 10 | 
 11 | 
 12 | def multiple_text_files(opts=dict()):
 13 |     """
 14 |     Let current task class's result can support outputing into multiple files.
 15 | 
 16 |     Usage:
 17 | 
 18 |     ```python
 19 |     @luigi.multiple_text_files
 20 |     class ManAndWomanDay(TaskDayHadoop):
 21 |         def mapper(self, line1):
 22 |             item1 = MRUtils.json_parse(line1)
 23 |             yield item1['uid'], item1
 24 | 
 25 |         def reducer(self, uid1, vals_1):
 26 |             for item1 in vals_1:
 27 |                 yield item1["gender"], MRUtils.str_dump(item1)
 28 |     ```
 29 | 
 30 |     So above code separate man and woman into two files. File name such as
 31 |     1. man_and_woman_day.json/man
 32 |     2. man_and_woman_day.json/woman
 33 | 
 34 |     But not the default one
 35 |     1. man_and_woman_day.json/part-00000
 36 | 
 37 |     WARN:
 38 |         when use `@luigi.multiple_text_files`, consider to wrap subfolders with
 39 |         StaticFile task class.
 40 |     """
 41 |     def func(task_cls):
 42 |         cjc = CompileJavaCode()
 43 | 
 44 |         def compile_java_code(self):
 45 |             """ compile java code dynamically. """
 46 |             if not os.path.exists(cjc.target_jar):
 47 |                 CommandUtils.execute(cjc.compile_cmd)
 48 | 
 49 |         setattr(task_cls, "output_format", cjc.output_format)
 50 |         setattr(task_cls, "libjars", [cjc.target_jar, ])
 51 |         setattr(task_cls, "compile_java_code", compile_java_code)
 52 |         return task_cls
 53 | 
 54 |     # Comptible with old API.
 55 |     if isinstance(opts, dict):
 56 |         return func
 57 |     if issubclass(opts, luigi.Task):
 58 |         return func(opts)
 59 |     raise ValueError(opts)
 60 | 
 61 | 
 62 | class CompileJavaCode(object):
 63 |     """
 64 |     assemble jar.
 65 |     """
 66 | 
 67 |     java_namespace = "com.voxlearning.bigdata.MrOutput"
 68 |     java_lib = "MultipleTextFiles"
 69 |     output_format = ".".join([java_namespace, java_lib])
 70 |     root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 71 | 
 72 |     @cached_property
 73 |     def java_file(self):
 74 |         return self.java_lib + ".java"
 75 | 
 76 |     @cached_property
 77 |     def target_class(self):
 78 |         return self.java_lib + ".class"
 79 | 
 80 |     @cached_property
 81 |     def target_jar(self):
 82 |         return os.path.join(self.root_dir, "java", self.java_lib + ".jar")
 83 | 
 84 |     @cached_property
 85 |     def compile_cmd(self):
 86 |         classes_dir = self.java_namespace.replace(".", "/")
 87 |         javac_cmd = commands.getoutput("which javac")
 88 |         java_classpath = commands.getoutput("hadoop classpath")
 89 |         jar_cmd = commands.getoutput("which jar")
 90 | 
 91 |         compile_cmd = ";\n".join([
 92 |             # no absolute path, compact with java namespace.
 93 |             "cd %s/java" % self.root_dir,
 94 | 
 95 |             """%s -classpath "%s" %s""" % (javac_cmd,
 96 |                                            java_classpath, self.java_file, ),
 97 |             "rm -rf %s" % classes_dir,
 98 |             "mkdir -p %s" % classes_dir,
 99 |             "cp %s %s" % (self.target_class, classes_dir),
100 |             "%s cvf %s %s/*.class" % (jar_cmd, self.target_jar, classes_dir, ),
101 |         ])
102 |         return compile_cmd
103 | 


--------------------------------------------------------------------------------
/luiti/luigi_decorators/persist_files.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | __all__ = ["persist_files"]
 4 | 
 5 | import os
 6 | from luigi import Event
 7 | from ..utils import IOUtils
 8 | 
 9 | 
10 | # NOTE deprecated
11 | def persist_files(*files):  # 装饰器
12 |     """ 多个data_file 可以用 DSL 描述，然后和 event_handler(Event.FAILURE) 绑定在一起 """
13 |     def func(cls):
14 |         # 1. 设置 持久化文件属性
15 |         def wrap(file1):  # 这样才可以保存 file1 变量，而不至于被覆写。
16 |             def _file(self):
17 |                 return os.path.join(self.data_dir, file1 + ".json")
18 |             return _file
19 | 
20 |         setattr(cls, "__persist_files", files)
21 |         for file1 in getattr(cls, "__persist_files"):
22 |             setattr(cls, file1, property(wrap(file1)))  # @decorator
23 | 
24 |         # 2. 绑定 失败时删除这些文件
25 |         def clean_tmp(task, exception):
26 |             for file1 in files:
27 |                 IOUtils.remove_files(getattr(task, file1))
28 |             # IOUtils.remove_files(task.data_file)
29 |             # NOTE 好像 Hadoop 会自动处理失败任务的输出文件的，否则就会导致其在N次重试一直在running。
30 |         cls.event_handler(Event.FAILURE)(clean_tmp)
31 | 
32 |         return cls
33 | 
34 |     return func
35 | 


--------------------------------------------------------------------------------
/luiti/luigi_decorators/plug_packages.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | __all__ = ["plug_packages"]
 4 | 
 5 | from ..manager import luiti_config
 6 | 
 7 | 
 8 | def plug_packages(*package_names):
 9 |     """
10 |     Let luigi know which packages should be attached, and can send to
11 |     YARN, etc.
12 | 
13 |     Package format can be any valid Python package name, such as "project_B" or
14 |     "project_C==0.0.2", etc.
15 | 
16 |     Usage: use `active_packages` decorator to notice luigi that these packages
17 |     should include.
18 |     """
19 |     for p1 in package_names:
20 |         if p1:
21 |             # load all packages's depended pacakges.
22 |             luiti_config.attached_package_names.add(p1)
23 | # TODO why should do `luigi.hadoop.attach` in `active_packages`
24 | 


--------------------------------------------------------------------------------
/luiti/luigi_decorators/ref_tasks.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | __all__ = ["ref_tasks"]
 4 | 
 5 | from ..manager import load_a_task_by_name, luiti_config
 6 | 
 7 | 
 8 | def ref_tasks(*tasks):  # 装饰器
 9 |     """
10 |     自动把依赖 Task 链接起来，通过属性访问。
11 | 
12 |     Example:
13 | 
14 |     ```python
15 |     @ref_tasks("TaskB", "TaskC")
16 |     class TaskA(TaskWeekBase):
17 |         pass
18 | 
19 |     TaskA().TaskB == TaskB
20 |     TaskA().TaskC == TaskC
21 |     ```
22 |     """
23 |     def wrap_cls(ref_task_name):
24 |         def _func(self):
25 |             v1 = self.__dict__.get(ref_task_name, None)
26 |             if v1 is None:
27 |                 v1 = load_a_task_by_name(ref_task_name)
28 |                 self.__dict__[ref_task_name] = v1
29 |             return v1
30 |         return _func
31 | 
32 |     def wrap_instance(ref_task_name, task_name):
33 |         def _func(self):
34 |             v1 = self.__dict__.get(task_name, None)
35 |             if v1 is None:
36 |                 v1 = getattr(self, ref_task_name)(self.date_value)
37 |                 self.__dict__[task_name] = v1
38 |             return v1
39 |         return _func
40 | 
41 |     # Fix pickle dump, but it maybe unneeded.
42 |     def __getstate__(self):
43 |         """ Fix luiti_tasks module namespace conflicts. """
44 |         for ref_task1 in self._ref_tasks:
45 |             cname = ref_task1           # class    name
46 |             iname = ref_task1 + "_task"  # instance name
47 | 
48 |             if cname in self.__dict__:
49 |                 del self.__dict__[cname]
50 |             if iname in self.__dict__:
51 |                 del self.__dict__[iname]
52 |         return self.__dict__
53 | 
54 |     def __setstate__(self, d1):
55 |         # 1. default
56 |         self.__dict__.update(d1)
57 |         # 2. plug other package in `.__init_luiti`
58 |         luiti_config.curr_project_name = self.package_name
59 |         luiti_config.link_packages()
60 | 
61 | # cached_property 捕获不了 ref_task_name 变量, 被重置为某一个了。。
62 | # property 可以捕获 ref_task_name 变量。
63 |     def func(cls):
64 |         setattr(cls, "_ref_tasks", tasks)
65 |         for ref_task_name in cls._ref_tasks:
66 |             setattr(cls, ref_task_name, property(wrap_cls(ref_task_name)))
67 | 
68 |             # TODO 根据当前日期返回。
69 |             task_name = "%s_%s" % (ref_task_name, "task")
70 |             setattr(cls, task_name,
71 |                     property(wrap_instance(ref_task_name, task_name)))
72 | 
73 |             # clear ref task info when pickle.dump
74 |             setattr(cls, "__getstate__", __getstate__)
75 |         return cls
76 |     return func
77 | 


--------------------------------------------------------------------------------
/luiti/luigi_extensions/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | __all__ = ["TaskInit", "ArrowParameter", "TaskBase", "HadoopExt", "RootTask", "luigi"]
 4 | 
 5 | 
 6 | from .task_init import TaskInit
 7 | from .parameter import ArrowParameter
 8 | from .task_base import TaskBase
 9 | from .hadoop_ext import HadoopExt
10 | from .root_task import RootTask
11 | 
12 | from .create_python_package import luigi
13 | from .manage_decorators import ManageDecorators
14 | ManageDecorators.bind_to(luigi)
15 | 


--------------------------------------------------------------------------------
/luiti/luigi_extensions/create_python_package.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | __all__ = ["create_packages_archive_with_support_egg"]
 4 | 
 5 | import os
 6 | from .luigi_root_context import luigi
 7 | 
 8 | orig_create_packages_archive = luigi.hadoop.create_packages_archive
 9 | 
10 | 
11 | def create_packages_archive_with_support_egg(packages, filename):
12 |     """
13 |     Fix original luigi's `create_packages_archive` cannt attach egg packages
14 |     (zip file type) to tarfile, Cause it's coping file mechanism by absolute
15 |     path.
16 |     """
17 |     # 1. original create tar file
18 |     orig_create_packages_archive(packages, filename)
19 | 
20 |     # 2. append python egg packages that 1. not covered
21 |     import tarfile
22 |     tar = tarfile.open(filename, "a")  # Force append
23 | 
24 |     logger = luigi.hadoop.logger
25 |     fake_exists_path = "/"  # root is awlays exists
26 | 
27 |     def get_parent_zip_file_within_absolute_path(path1):
28 |         path2 = path1[:]
29 |         is_success = False
30 |         while path2 != fake_exists_path:
31 |             path2 = os.path.dirname(path2)
32 |             if os.path.isfile(path2):
33 |                 is_success = True
34 |                 break
35 |         return is_success, path2
36 | 
37 |     def add(src, dst):
38 |         logger.debug('adding to tar: %s -> %s', src, dst)
39 |         tar.add(src, dst)
40 | 
41 |     import zipfile
42 |     import tempfile
43 |     for package1 in packages:
44 |         path2 = (getattr(package1, "__path__", []) + [fake_exists_path])[0]
45 |         if os.path.exists(path2):
46 |             continue  # so luigi can import it.
47 |         if not path2.startswith("/"):
48 |             continue  # we only care about libraries.
49 | 
50 |         is_success, zipfilename3 = \
51 |             get_parent_zip_file_within_absolute_path(path2)
52 |         if is_success:
53 |             tmp_dir3 = tempfile.mkdtemp()
54 |             zipfile.ZipFile(zipfilename3).extractall(tmp_dir3)
55 | 
56 |             for root4, dirs4, files4 in os.walk(tmp_dir3):
57 |                 for file5 in files4:
58 |                     if file5.endswith(".pyc"):
59 |                         continue
60 |                     add(
61 |                         os.path.join(root4, file5),
62 |                         os.path.join(
63 |                             root4.replace(tmp_dir3, "").lstrip("/"), file5))
64 | 
65 |     client_cfg = os.path.join(os.getcwd(), "client.cfg")
66 |     if os.path.exists(client_cfg):
67 |         tar.add(client_cfg, "client.cfg")
68 |     tar.close()
69 | 
70 | luigi.hadoop.create_packages_archive = create_packages_archive_with_support_egg  # wrap old function
71 | 


--------------------------------------------------------------------------------
/luiti/luigi_extensions/hadoop_ext.py:
--------------------------------------------------------------------------------
  1 | # -*-coding:utf-8-*-
  2 | 
  3 | from __future__ import print_function
  4 | 
  5 | __all__ = ['HadoopExt']
  6 | 
  7 | import sys
  8 | import luigi.hadoop
  9 | from luigi.hadoop import flatten
 10 | from itertools import groupby
 11 | from etl_utils import cached_property
 12 | 
 13 | from ..utils import ExtUtils, TargetUtils
 14 | from .task_init import TaskInit
 15 | 
 16 | # See benchmark at https://gist.github.com/mvj3/02dca2bcc8b0ef1bbfb5
 17 | # force to use faster ujson, or it's meaningless to use JSON format with no performance gained.
 18 | import ujson as json
 19 | import jsonpickle
 20 | 
 21 | 
 22 | class LuitiHadoopJobRunner(luigi.hadoop.HadoopJobRunner):
 23 |     """ overwrite DefaultHadoopJobRunner.class """
 24 | 
 25 |     # params are copied from HadoopJobRunner
 26 |     def __init__(self, libjars=None, output_format=None):
 27 |         config = luigi.hadoop.configuration.get_config()
 28 |         opts = {
 29 |             "streaming_jar": config.get('hadoop', 'streaming-jar'),
 30 |             "output_format": output_format,
 31 |             "libjars": libjars,
 32 |         }
 33 |         super(LuitiHadoopJobRunner, self).__init__(**opts)
 34 | 
 35 | 
 36 | DataInterchange = {
 37 |     "python": {"serialize": str,
 38 |                "internal_serialize": repr,
 39 |                "deserialize": eval},
 40 |     "json": {"serialize": json.dumps,
 41 |              "internal_serialize": json.dumps,
 42 |              "deserialize": json.loads},
 43 |     "jsonpickle": {"serialize": jsonpickle.dumps,
 44 |                    "internal_serialize": jsonpickle.dumps,
 45 |                    "deserialize": jsonpickle.loads}
 46 | }
 47 | 
 48 | 
 49 | class HadoopExt(luigi.hadoop.JobTask, ExtUtils.ExtendClass):
 50 | 
 51 |     # available formats are "python" and "json".
 52 |     data_interchange_format = "python"
 53 | 
 54 |     @cached_property
 55 |     def serialize(self):
 56 |         return DataInterchange[self.data_interchange_format]['serialize']
 57 | 
 58 |     @cached_property
 59 |     def internal_serialize(self):
 60 |         return DataInterchange[self.data_interchange_format]['internal_serialize']
 61 | 
 62 |     @cached_property
 63 |     def deserialize(self):
 64 |         return DataInterchange[self.data_interchange_format]['deserialize']
 65 | 
 66 |     def writer(self, outputs, stdout, stderr=sys.stderr):
 67 |         """
 68 |         Writer format is a method which iterates over the output records
 69 |         from the reducer and formats them for output.
 70 | 
 71 |         The default implementation outputs tab separated items.
 72 |         """
 73 |         for output in outputs:
 74 |             try:
 75 |                 output = flatten(output)
 76 |                 if self.data_interchange_format == "json":
 77 |                     # Only dump one json string, and skip another one, maybe key or value.
 78 |                     output = filter(lambda x: x not in ["", None], output)
 79 |                 else:
 80 |                     # JSON is already serialized, so we put `self.serialize` in a else statement.
 81 |                     output = map(self.serialize, output)
 82 |                 print("\t".join(map(str, output)), file=stdout)
 83 |             except:
 84 |                 print(output, file=stderr)
 85 |                 raise
 86 | 
 87 |     def _reduce_input(self, inputs, reducer, final=NotImplemented):
 88 |         """
 89 |         Iterate over input, collect values with the same key, and call the reducer for each unique key.
 90 |         """
 91 |         for key, values in groupby(inputs, key=lambda x: self.internal_serialize(x[0])):
 92 |             for output in reducer(self.deserialize(key), (v[1] for v in values)):
 93 |                 yield output
 94 |         if final != NotImplemented:
 95 |             for output in final():
 96 |                 yield output
 97 |         self._flush_batch_incr_counter()
 98 | 
 99 |     def internal_reader(self, input_stream):
100 |         """
101 |         Reader which uses python eval on each part of a tab separated string.
102 |         Yields a tuple of python objects.
103 |         """
104 |         for input_line in input_stream:
105 |             yield list(map(self.deserialize, input_line.split("\t")))
106 | 
107 |     def internal_writer(self, outputs, stdout):
108 |         """
109 |         Writer which outputs the python repr for each item.
110 |         """
111 |         for output in outputs:
112 |             print("\t".join(map(self.internal_serialize, output)), file=stdout)
113 | 
114 |     run_mode = "mr_distribute"
115 |     n_reduce_tasks = 1  # 体现在 输出的part-00000数量为reduce数量
116 | 
117 |     output_format = [
118 |         # 单路输出。这个版本有问题。
119 |         # "org.apache.hadoop.mapreduce.lib.output.TextOutputFormat",
120 |         "org.apache.hadoop.mapred.TextOutputFormat",  # 单路输出
121 |         "org.apache.hadoop.mapred.lib.MultipleTextOutputFormat",  # 多路输出
122 |     ][0]  # 默认是 单路输出
123 |     output_format_default = output_format[:]
124 |     libjars = []
125 | 
126 |     def __init__(self, *args, **kwargs):
127 |         """ 参考 TaskBase, 确保在 继承时还可以有TaskBase的覆写日期功能。 """
128 |         super(HadoopExt, self).__init__(*args, **kwargs)
129 |         TaskInit.setup(self)
130 | 
131 |     # overwrite
132 |     def job_runner(self):
133 |         """ will be wraped in `run` function. """
134 |         # Auto compile java code
135 |         if self.output_format != self.output_format_default:
136 |             self.compile_java_code()
137 | 
138 |         return LuitiHadoopJobRunner(
139 |             output_format=self.output_format, libjars=self.libjars)
140 | 
141 |     def output(self):
142 |         return TargetUtils.hdfs(self.data_file)
143 | 
144 |     def jobconfs_opts(self):
145 |         return [
146 |             "mapreduce.framework.name=yarn",
147 |             'mapred.reduce.tasks=%s' % self.n_reduce_tasks,
148 |         ]
149 | 
150 |     def jobconfs(self):
151 |         jcs = super(luigi.hadoop.JobTask, self).jobconfs()
152 |         for conf_opt_1 in self.jobconfs_opts():
153 |             jcs.append(conf_opt_1)
154 |         return jcs
155 | 
156 |     # TestCase related attrs
157 |     def mrtest_input(self):
158 |         raise NotImplementedError
159 | 
160 |     def mrtest_output(self):
161 |         raise NotImplementedError
162 | 
163 |     def mrtest_attrs(self):
164 |         return dict()
165 | 
166 |     def reader(self, input_stream):
167 |         """
168 |         Overwrite luigi, skip blank line
169 |         """
170 |         for line in input_stream:
171 |             line = line.strip()
172 |             if line:
173 |                 yield line,
174 | 


--------------------------------------------------------------------------------
/luiti/luigi_extensions/luigi_root_context.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | __all__ = ["luigi"]
 4 | 
 5 | """
 6 | Bind all things to `luigi` root namespace.
 7 | """
 8 | 
 9 | 
10 | import luigi.hdfs
11 | luigi.hdfs = luigi.hdfs  # just make a link
12 | 
13 | import luigi.hadoop
14 | luigi.hadoop = luigi.hadoop  # just make a ref
15 | 
16 | from .hadoop_ext import HadoopExt
17 | luigi.hadoop.HadoopExt = HadoopExt  # write back
18 | # NOTE 对 luigi.hadoop 兼容 "track the job: "
19 | 
20 | luigi.debug = False
21 | 
22 | luigi.tmp_dir = "/tmp"  # default one
23 | 
24 | # TODO lazily
25 | from ..utils import TargetUtils
26 | luigi.HDFS = TargetUtils.hdfs  # 本来就是需要读取全局配置，所以索性就绑定在 luigi 命名空间了吧。
27 | 
28 | 
29 | from ..manager import luiti_config, active_packages
30 | luigi.ensure_active_packages = lambda: active_packages  # make a wrap
31 | luigi.luiti_config = luiti_config
32 | luiti_config.linked_luigi = luigi
33 | 


--------------------------------------------------------------------------------
/luiti/luigi_extensions/manage_decorators.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | import os
 4 | import glob
 5 | 
 6 | 
 7 | class ManageDecorators(object):
 8 | 
 9 |     @staticmethod
10 |     def bind_to(luigi):
11 |         root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
12 |         decorator_dir = os.path.join(root_dir, "luigi_decorators")
13 |         files = glob.glob(os.path.join(decorator_dir, "*.py"))
14 | 
15 |         # The decorator name Must as the same as the filename.
16 |         decorator_names = map(lambda i1: i1.split("/")[-1].split(".")[0], files)
17 |         decorator_names = filter(lambda i1: not i1.startswith("__"), decorator_names)
18 |         assert len(decorator_names) > 0, decorator_names
19 | 
20 |         for name in decorator_names:
21 |             try:
22 |                 mod = __import__("luiti.luigi_decorators." + name, fromlist=[name])
23 |             except ImportError:
24 |                 print "[Import error decorator name]", name
25 |                 exit()
26 |             func = getattr(mod, name)
27 |             setattr(luigi, name, func)
28 | 
29 |         return luigi
30 | 


--------------------------------------------------------------------------------
/luiti/luigi_extensions/parameter.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | __all__ = ['ArrowParameter', "arrow"]
 4 | 
 5 | import luigi
 6 | import arrow
 7 | from dateutil import tz
 8 | 
 9 | 
10 | class ArrowParameter(luigi.DateParameter):
11 | 
12 |     """
13 |     Convert date or time type into Arrow type.
14 | 
15 |     "2014-11-24T00:00:00+00:00" # => len 25
16 |     "2014-11-24"                # => len 10
17 |     """
18 | 
19 |     arrow = arrow  # make a ref
20 | 
21 |     def parse(self, s):
22 |         """ overwrite default implement. """
23 |         s = str(s)         # ensure `s` is a str
24 |         assert len(s) in [25, 10], \
25 |             "Date format must be 2014-11-24T00:00:00+00:00 or 2014-11-24 !"
26 |         return ArrowParameter.get(s)
27 | 
28 |     @staticmethod
29 |     def get(*strs):
30 |         """ 把原始的 `arrow.get` 兼容 tzlocal """
31 |         return arrow.get(*strs).replace(tzinfo=tz.tzlocal())
32 | 
33 |     @staticmethod
34 |     def now():
35 |         return ArrowParameter.get(arrow.now())
36 | 


--------------------------------------------------------------------------------
/luiti/luigi_extensions/root_task.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import os
 4 | import luigi
 5 | from luigi import LocalTarget
 6 | 
 7 | 
 8 | class RootTask(luigi.Task):
 9 | 
10 |     def output(self):
11 |         return LocalTarget(os.path.realpath(__file__))  # exist for ever
12 | 


--------------------------------------------------------------------------------
/luiti/luigi_extensions/task_base.py:
--------------------------------------------------------------------------------
  1 | # -*-coding:utf-8-*-
  2 | 
  3 | __all__ = ['TaskBase']
  4 | 
  5 | import os
  6 | import arrow
  7 | from inflector import Inflector
  8 | from etl_utils import cached_property
  9 | 
 10 | from .luigi_root_context import luigi
 11 | from .root_task import RootTask
 12 | from ..utils import DateUtils, ExtUtils, IOUtils
 13 | from ..manager import luiti_config
 14 | 
 15 | from .parameter import ArrowParameter
 16 | from .task_init import TaskInit
 17 | 
 18 | 
 19 | class TaskBase(luigi.Task, ExtUtils.ExtendClass):
 20 |     """ 继承的子类在类名后 必须加 **时间类型**, 如 Day, Week, ... """
 21 | 
 22 |     run_mode = ["local", "mr_distribute", "mr_local"][0]
 23 | 
 24 |     date_value = ArrowParameter()  # **统一** 时间类型, 防止同时跑多个任务
 25 | 
 26 |     # will overwritten by @decorator
 27 |     # 不能以 **两个 __ 开头**, 否则会被 Python 当作隐私变量而无法继承。TODO 隐私变量 可能是错的。
 28 |     _persist_files = []
 29 |     _ref_tasks = []
 30 | 
 31 |     is_external = False  # mark current task as a External Task, same to luigi.ExternalTask
 32 | 
 33 |     root_dir = NotImplementedError
 34 | 
 35 |     # Default one, always return True
 36 |     def requires(self):
 37 |         return RootTask()
 38 | 
 39 |     run = NotImplementedError
 40 | 
 41 |     def __init__(self, *args, **kwargs):
 42 |         # Fix date_value type
 43 |         if "date_value" in kwargs:
 44 |             kwargs["date_value"] = ArrowParameter.get(kwargs["date_value"])
 45 |         if len(args) == 1:  # just the luiti's date_value parameter
 46 |             args = (ArrowParameter.get(args[0]), )
 47 | 
 48 |         super(TaskBase, self).__init__(*args, **kwargs)
 49 |         TaskInit.setup(self)
 50 | 
 51 |     @cached_property
 52 |     def data_dir(self):
 53 |         assert self.root_dir, "self.root_dir should not be None!"
 54 |         return os.path.join(self.root_dir, self.date_str)
 55 | 
 56 |     @cached_property
 57 |     def data_file(self):
 58 |         return os.path.join(self.data_dir, self.data_name + ".json")
 59 | 
 60 |     @cached_property
 61 |     def data_name(self):
 62 |         return Inflector().underscore(self.__class__.__name__)
 63 | 
 64 |     def output(self):
 65 |         return IOUtils.local_target(self.data_file)
 66 | 
 67 |     def errput(self):
 68 |         return IOUtils.local_target(self.data_file + ".err")
 69 | 
 70 |     @cached_property
 71 |     def date_str(self):
 72 |         return self.date_value.strftime("%Y-%m-%d")
 73 | 
 74 |     @cached_property
 75 |     def date_type(self):
 76 |         return luiti_config.get_date_type(self.__class__.__name__)
 77 | 
 78 |     @cached_property
 79 |     def date_value_by_type_in_last(self):
 80 |         return DateUtils.date_value_by_type_in_last(
 81 |             self.date_value, self.date_type)
 82 | 
 83 |     @cached_property
 84 |     def date_value_by_type_in_begin(self):
 85 |         return ArrowParameter.get(self.date_value).floor(self.date_type)
 86 | 
 87 |     @cached_property
 88 |     def date_value_by_type_in_end(self):
 89 |         return ArrowParameter.get(self.date_value).ceil(self.date_type)
 90 | 
 91 |     @cached_property
 92 |     def pre_task_by_self(self):
 93 |         """ 如果跨了两个周期就没有上次数据文件了 """
 94 |         return RootTask() if self.is_reach_the_edge else \
 95 |             self.__class__(self.date_value_by_type_in_last)
 96 | 
 97 |     @cached_property
 98 |     def is_reach_the_edge(self):
 99 |         return False  # default. e.g. add semester
100 | 
101 |     def reset_date(self):
102 |         # **强制** 写为统一时间格式(arrow格式)，这样luigi就不会同时跑两个任务了。
103 |         self.date_value = ArrowParameter.get(self.date_value)
104 | 
105 |         orig_date = self.date_value
106 |         if self.date_type != 'range':
107 |             new_date = orig_date.floor(self.date_type)
108 |             if orig_date != new_date:
109 |                 if luigi.debug:
110 |                     print "[reset date by %s] from %s to %s" % \
111 |                         (self.date_type, orig_date, new_date)
112 |                 self.date_value = new_date
113 | 
114 |     @classmethod
115 |     def instances_by_date_range(cls, first_date, last_date):
116 |         """ 返回属于某周期里的所有当前任务实例列表 """
117 |         assert isinstance(first_date, arrow.Arrow)
118 |         assert isinstance(last_date, arrow.Arrow)
119 | 
120 |         if "Range" in cls.__name__:
121 |             # return head and tail directly
122 |             return list(set([cls(first_date), cls(last_date)]))
123 |         else:
124 |             dates = arrow.Arrow.range(
125 |                 luiti_config.get_date_type(cls.__name__),
126 |                 first_date, last_date)
127 |             return [cls(date1.datetime) for date1 in dates]
128 | 
129 |     @cached_property
130 |     def task_class(self):
131 |         return self.__class__
132 | 
133 |     @cached_property
134 |     def task_clsname(self):
135 |         return self.task_class.__name__
136 | 
137 |     @cached_property
138 |     def package_name(self):
139 |         module_name = self.task_class.__module__
140 |         package_name = module_name.split(".")[0]
141 |         return package_name
142 | 


--------------------------------------------------------------------------------
/luiti/luigi_extensions/task_init.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | from dateutil import tz
 4 | from .parameter import ArrowParameter
 5 | 
 6 | 
 7 | class TaskInit(object):
 8 | 
 9 |     @staticmethod
10 |     def setup(task_instance):
11 |         """
12 |         Let luigi'Task supports luiti's operations.
13 | 
14 |         You need to call this function, if you want to extend luigi.
15 |         """
16 |         self = task_instance
17 | 
18 |         # 在跨期的时候用于判断 该周应该是该周的哪些天。
19 |         # 比如这学期开学是 2015-02-17(星期二) 开学, 那么这周的数据只有 0217-0222。
20 |         # 而在寒假里(即run 2015-02-16(星期天) 的 task 时，那么该周的天只有 0216 一天。
21 |         d1 = ArrowParameter.get(self.date_value).replace(tzinfo=tz.tzlocal())
22 |         self.orig_date_value = d1  # exists only if this `setup` executed.
23 | 
24 |         # reset date to at the beginning of current date type here
25 |         self.reset_date()
26 | 
27 |         assert task_instance.root_dir is not NotImplementedError, [task_instance, task_instance.root_dir]
28 |         self.data_file      # force load it now, or `output` still load it.
29 |         self.package_name   # force load it now, use to serialize
30 | 
31 |         # Fix luigi.Task#__eq__
32 |         """
33 |         >>> t1.param_args
34 |         (<Arrow [2015-06-23T00:00:00+08:00]>,)
35 |         >>> map(str, t1.param_args)
36 |         ['2015-06-23T00:00:00+08:00']
37 | 
38 |         def __eq__(self, other):
39 |             return self.__class__ == other.__class__ and self.param_args == other.param_args
40 |         """
41 |         self.param_kwargs["date_value"] = ArrowParameter.get(self.param_kwargs["date_value"])
42 |         self.param_args = tuple(sorted(map(str, [value for key, value in self.param_kwargs.iteritems()])))
43 | 
44 |         # NOTE below codes are copied from luigi's Task
45 |         # Build up task id
46 |         task_id_parts = ["%s=%s" % (k1, v1) for k1, v1 in self.param_kwargs.iteritems() if k1 not in ["pool"]]
47 |         self.task_id = '%s(%s)' % (self.task_family, ', '.join(task_id_parts))
48 |         self.__hash = hash(self.task_id)
49 | 


--------------------------------------------------------------------------------
/luiti/manager/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | __all__ = [
 4 |     "ld",
 5 | 
 6 |     "load_a_task_by_name",
 7 |     "print_all_tasks",
 8 |     "new_a_project",
 9 |     "generate_a_task",
10 |     "find_dep_on_tasks",
11 | 
12 |     "active_packages",
13 | 
14 |     "luiti_config",
15 | 
16 |     "Cli",
17 |     "PackageMap",
18 | ]
19 | 
20 | from .loader import Loader
21 | from .table import Table
22 | from .dep import Dep
23 | from .files import Files
24 | 
25 | from .config import luiti_config
26 | from .package_map import PackageMap
27 | from .active_packages import active_packages
28 | 
29 | 
30 | from .generate_from_templates import GenerateFromTemplates
31 | 
32 | from .cli import Cli
33 | 
34 | 
35 | # API list
36 | find_dep_on_tasks = Dep.find_dep_on_tasks
37 | get_all_date_file_to_task_instances = Files.get_all_date_file_to_task_instances
38 | soft_delete_files = Files.soft_delete_files
39 | load_all_tasks = Loader.load_all_tasks
40 | load_a_task_by_name = Loader.load_a_task_by_name
41 | print_all_tasks = Table.print_all_tasks
42 | print_files_by_task_cls_and_date_range = \
43 |     Table.print_files_by_task_cls_and_date_range
44 | new_a_project = GenerateFromTemplates.new_a_project
45 | generate_a_task = GenerateFromTemplates.generate_a_task
46 | 
47 | 
48 | from .lazy_data import ld
49 | 


--------------------------------------------------------------------------------
/luiti/manager/active_packages.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | import os
 4 | from .config import luiti_config as lc
 5 | 
 6 | processed_package_names = set([])
 7 | 
 8 | 
 9 | def active_packages(orig_func):
10 |     """
11 |     called by `PackageMap.task_clsname_to_package`
12 |     """
13 |     def new_func(*args, **kwargs):
14 |         # 1. Setup env
15 |         lc.link_packages()
16 | 
17 |         # 2. Load related packages.
18 |         import pkg_resources
19 |         import luigi.hadoop
20 |         import re
21 | 
22 |         # fix Set changed size during iteration
23 |         for p1 in list(lc.attached_package_names):
24 |             package2, version2 = re.compile("(^[a-z0-9\_]+)(.*)", re.IGNORECASE) \
25 |                 .match(p1).groups()
26 |             if package2 in processed_package_names:
27 |                 continue
28 |             else:
29 |                 # Pip cant manage versions packages, only exist one version at
30 |                 # one time.
31 |                 try:
32 |                     if version2:
33 |                         pkg_resources.require(p1)
34 |                 except:
35 |                     pkg_resources.require(package2)
36 | 
37 |                 # TODO luiti 拷之前需要版本，之后不需要，分布式时判断目录packages即可。
38 |                 # Notice Python to import special version package.
39 |                 # if version2: pkg_resources.require(p1)
40 | 
41 |                 # Let luigi know it.
42 |                 package2_lib = lc.import2(package2)
43 |                 luigi.hadoop.attach(package2_lib)
44 | 
45 |                 # Add valid package which has .luiti_tasks
46 |                 #   compact with package with a plain python file.
47 |                 try:
48 |                     path = (package2_lib.__path__ + [""])[0]
49 |                 except:
50 |                     print "[package2_lib load error]", package2_lib
51 |                     path = "/package/load/error"
52 |                 # TODO 兼容 egg zip 格式，看看里面有没有 luiti_tasks
53 |                 #      文件，然后提示加 zip_safe=False
54 |                 if os.path.exists(path + "/luiti_tasks"):
55 |                     # .__init_luiti Maybe not exists, so execute this first
56 |                     lc.luiti_tasks_packages.add(package2_lib)
57 |             processed_package_names.add(p1)
58 |         return orig_func(*args, **kwargs)  # call it at last.
59 |     new_func.func_name = orig_func.func_name
60 |     return new_func
61 | 


--------------------------------------------------------------------------------
/luiti/manager/config.py:
--------------------------------------------------------------------------------
  1 | # -*-coding:utf-8-*-
  2 | 
  3 | import os
  4 | import sys
  5 | from inflector import Inflector
  6 | from etl_utils import singleton, cached_property
  7 | import arrow
  8 | 
  9 | 
 10 | @singleton()
 11 | class LuitiConfigClass(object):
 12 | 
 13 |     """ Make sure init variables only once. """
 14 |     # arrow.Arrow._ATTRS = ['year', 'month', 'day', 'hour', 'minute', 'second', 'microsecond']
 15 |     DateTypes = ["range", "week", "biweekly", "quarter"] + arrow.Arrow._ATTRS
 16 | 
 17 |     curr_project_name = None
 18 |     curr_project_dir = None
 19 | 
 20 |     linked_luigi = None
 21 | 
 22 |     @cached_property
 23 |     def attached_package_names(self):
 24 |         return set(['luiti'])
 25 | 
 26 |     @cached_property
 27 |     def luiti_tasks_packages(self):
 28 |         return set([])
 29 | 
 30 |     @staticmethod
 31 |     def import2(a_package):
 32 |         return __import__(a_package, None, None, 'non_empty')
 33 | 
 34 |     @staticmethod
 35 |     def get_date_type(name1):
 36 |         """ Inherit class must be in TaskBase{Day,Week,Month,Range} style.  """
 37 |         assert isinstance(name1, (str, unicode))
 38 |         str1 = Inflector().underscore(name1).split("_")[-1].lower()
 39 |         assert str1 in luiti_config.DateTypes, [str1, luiti_config.DateTypes]
 40 |         return str1
 41 | 
 42 |     @staticmethod
 43 |     def get_time_task(name1):
 44 |         """ return e.g. TaskDay """
 45 |         type2 = luiti_config.get_date_type(name1)
 46 |         return "Task" + Inflector().camelize(type2)
 47 | 
 48 |     @staticmethod
 49 |     def link_packages():
 50 |         """
 51 |         called by `active_packages`
 52 |         """
 53 |         is_in_luigi_distributed = False
 54 | 
 55 |         # 1. unmornal task class
 56 |         if luiti_config.curr_project_name == "__main__":
 57 |             return False
 58 | 
 59 |         # 2. setup current project as root
 60 |         if luiti_config.curr_project_dir is None:
 61 |             luiti_config.curr_project_dir = os.getcwd()  # auto from current class
 62 |         luiti_config.fix_project_dir()
 63 | 
 64 |         def exists(filename1):
 65 |             return os.path.exists(os.path.join(luiti_config.curr_project_dir, filename1))
 66 | 
 67 |         # These files are created by luigi.
 68 |         if exists("job-instance.pickle") and exists("job.jar") and \
 69 |                 exists("packages.tar") and exists("luigi"):
 70 |             is_in_luigi_distributed = True
 71 | 
 72 |         # compact with no-luiti project
 73 |         is_a_luiti_project = exists("luiti_tasks")
 74 | 
 75 |         if luiti_config.curr_project_name is None:
 76 |             if is_in_luigi_distributed:
 77 |                 for item1 in os.listdir(luiti_config.curr_project_dir):
 78 |                     # is a valid python package
 79 |                     if exists(item1 + "/__init__.py") and \
 80 |                             exists(item1 + "/luiti_tasks"):
 81 |                         luiti_config.luiti_tasks_packages.add(luiti_config.import2(item1))
 82 |             else:
 83 |                 # "project_A"
 84 |                 curr_project_name = luiti_config.get_curr_project_name()
 85 |                 luiti_config.curr_project_name = curr_project_name
 86 | 
 87 |                 # project_A/
 88 |                 curr_project_syspath = os.path.dirname(luiti_config.curr_project_dir)
 89 |                 if curr_project_syspath not in sys.path:
 90 |                     sys.path.insert(0, curr_project_syspath)
 91 | 
 92 |                 luiti_config.luiti_tasks_packages.add(luiti_config.import2(luiti_config.curr_project_name))
 93 | 
 94 |                 # 3. ensure other luiti tasks packages can be loaded.
 95 |                 if is_a_luiti_project:
 96 |                     luiti_config.import2(
 97 |                         luiti_config.curr_project_name + ".luiti_tasks.__init_luiti")
 98 | 
 99 |     def get_curr_project_path(self):
100 |         curr_package_name = self.get_curr_project_name()
101 |         curr_path = luiti_config.curr_project_dir
102 |         dir1 = curr_path.rstrip("/")
103 |         if dir1.split("/").count(curr_package_name) == 2:
104 |             dir1 = os.path.dirname(dir1)
105 |         return dir1
106 | 
107 |     def get_curr_project_name(self):
108 |         """ a valid Python package path. """
109 |         assert isinstance(luiti_config.curr_project_dir, str), luiti_config.curr_project_dir
110 |         return os.path.basename(luiti_config.curr_project_dir)
111 | 
112 |     def fix_project_dir(self):
113 |         """ Fix project_A/project_A/luiti_tasks dir """
114 |         _try_dir = os.path.join(
115 |             luiti_config.curr_project_dir,
116 |             os.path.basename(luiti_config.curr_project_dir))
117 |         if os.path.exists(_try_dir):  # cause of the same name
118 |             luiti_config.curr_project_dir = _try_dir
119 | 
120 | 
121 | luiti_config = LuitiConfigClass()
122 | 


--------------------------------------------------------------------------------
/luiti/manager/dep.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | from collections import defaultdict
 4 | 
 5 | 
 6 | class Dep(object):
 7 | 
 8 |     @staticmethod
 9 |     def find_dep_on_tasks(curr_task_1, task_classes_1):
10 |         """ return all task classes. """
11 |         # 找到的DAG库没有对应功能或不好用，比如 dagger。只能自己实现了。
12 |         task_name_to_instance = {task_instance_1.__name__: task_instance_1
13 |                                  for task_instance_1 in
14 |                                  (task_classes_1 + [curr_task_1])}
15 | 
16 |         linked_dict = defaultdict(list)  # dep_task => next_task
17 |         for task_2 in task_classes_1:
18 |             for ref_task_name_3 in task_2._ref_tasks:
19 |                 linked_dict[ref_task_name_3].append(task_2.__name__)
20 | 
21 |         # filter linked to self
22 |         result = set(
23 |             linked_dict[curr_task_1.__name__] +
24 |             [curr_task_1.__name__])
25 |         _is_add = True
26 |         while True:
27 |             for next_task_name_1 in list(result):  # make a copy
28 |                 next_task_names_2 = linked_dict[next_task_name_1]
29 |                 # 1. 没数据
30 |                 if len(next_task_names_2) == 0:
31 |                     _is_add = False
32 |                 # 2. 有数据
33 |                 else:
34 |                     for next_task_name_2 in next_task_names_2:
35 |                         if next_task_name_2 in result:
36 |                             _is_add = False
37 |                         else:
38 |                             result.add(next_task_name_2)
39 | 
40 |             if not _is_add:
41 |                 break
42 | 
43 |         result = [task_name_to_instance[name_1] for name_1 in result]
44 |         result.remove(curr_task_1)
45 |         return result
46 | 


--------------------------------------------------------------------------------
/luiti/manager/files.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | from ..luigi_extensions import ArrowParameter
 4 | import luigi.hdfs
 5 | from datetime import datetime
 6 | 
 7 | 
 8 | class Files(object):
 9 | 
10 |     """ Get all outputs which generated by luiti tasks. """
11 | 
12 |     @staticmethod
13 |     def get_all_date_file_to_task_instances(date_range, task_classes):
14 |         """ return all instances in date range. """
15 |         assert_msg = "[error] correct format is \"20140901-20140905\", " \
16 |                      "but the input is %s" % date_range
17 |         assert len(date_range) == 17, assert_msg
18 | 
19 |         first_date, last_date = date_range[0:8], date_range[9:]
20 |         first_date, last_date = ArrowParameter.get(
21 |             first_date, "YYYYMMDD"), ArrowParameter.get(last_date, "YYYYMMDD")
22 | 
23 |         return dict({file_3: task_instance_2
24 |                      for task1 in task_classes
25 |                      for task_instance_2 in task1.instances_by_date_range(
26 |                          first_date, last_date)
27 |                      for file_3 in task_instance_2._persist_files +
28 |                      [task_instance_2.data_file]})
29 | 
30 |     @staticmethod
31 |     def soft_delete_files(*files):
32 |         delete_at_str = datetime.now().strftime("-deleted-at-%Y%m%d-%H%M%S")
33 | 
34 |         for file1 in sorted(files):
35 |             print "[delete file]", file1
36 |             if luigi.hdfs.clients.exists(file1):
37 |                 luigi.hdfs.clients.rename(file1, file1 + delete_at_str)
38 |                 print
39 |             else:
40 |                 print "[err] doesnt exist!"
41 | 
42 |         print "\nDone!"
43 |         return 0
44 | 


--------------------------------------------------------------------------------
/luiti/manager/generate_from_templates.py:
--------------------------------------------------------------------------------
  1 | # -*-coding:utf-8-*-
  2 | # :PEP8 -E221 -W603
  3 | 
  4 | __all__ = ['GenerateFromTemplates']
  5 | 
  6 | import os
  7 | from inflector import Inflector
  8 | from .config import luiti_config
  9 | 
 10 | join = os.path.join
 11 | exists = os.path.exists
 12 | 
 13 | 
 14 | class GenerateFromTemplates(object):
 15 | 
 16 |     @staticmethod
 17 |     def new_a_project(project_name):
 18 |         project_name = Inflector().underscore(project_name)
 19 |         readme_path = join(project_name, "README.markdown")
 20 |         setup_path = join(project_name, "setup.py")
 21 |         package_dir = join(project_name, project_name)
 22 |         package_init = join(package_dir, "__init__.py")
 23 |         package_luiti_tasks_init = join(package_dir, "luiti_tasks/__init__.py")
 24 |         package_luiti_tasks_luiti = join(
 25 |             package_dir, "luiti_tasks/__init_luiti.py")
 26 |         tests_dir = join(project_name, "tests")
 27 |         tests_test_main = join(tests_dir, "test_main.py")
 28 | 
 29 |         write_content_to_file(a_project_readme(project_name), readme_path)
 30 |         write_content_to_file(a_project_setup(project_name), setup_path)
 31 |         write_content_to_file(u"", package_init)
 32 |         write_content_to_file(u"", package_luiti_tasks_init)
 33 |         write_content_to_file(
 34 |             a_project_init_luiti(), package_luiti_tasks_luiti)
 35 |         write_content_to_file(
 36 |             a_project_test_main(project_name), tests_test_main)
 37 | 
 38 |         # important files
 39 |         return [readme_path, setup_path,
 40 |                 package_luiti_tasks_luiti, tests_test_main]
 41 | 
 42 |     @staticmethod
 43 |     def generate_a_task(task_name, project_dir=None,):
 44 |         path = join('luiti_tasks', Inflector().underscore(task_name) + ".py")
 45 |         if project_dir:
 46 |             path = join(project_dir, path)
 47 |         content = write_content_to_file(
 48 |             a_task_template(Inflector().classify(task_name)),
 49 |             path,
 50 |         )
 51 |         return content
 52 | 
 53 | 
 54 | """ 1. Project """
 55 | a_project_readme = lambda project_name: u"""
 56 | %s
 57 | =======================
 58 | 
 59 | TODO ...
 60 | """.strip() % (Inflector().titleize(project_name), )
 61 | 
 62 | a_project_setup = lambda project_name: u"""
 63 | # -*-coding:utf-8-*-
 64 | 
 65 | from setuptools import setup
 66 | 
 67 | setup(
 68 |     name="%s",
 69 |     version="0.0.1",
 70 |     packages=[
 71 |         "%s",
 72 |         "%s/luiti_tasks", ],
 73 |     zip_safe=False,
 74 | )
 75 | """.strip() % (project_name, project_name, project_name, )
 76 | 
 77 | """ has bugs ...
 78 | from setuptools import setup, find_packages
 79 |     packages=find_packages("%s"),
 80 |     package_dir = {"": "%s"},
 81 | """
 82 | 
 83 | 
 84 | a_project_init_luiti = lambda: u"""
 85 | # -*-coding:utf-8-*-
 86 | 
 87 | from luiti import *
 88 | luigi.plug_packages("package_a", "package_b==4.2")
 89 | """.strip()
 90 | 
 91 | 
 92 | a_project_test_main = lambda project_name: u"""
 93 | # -*- coding: utf-8 -*-
 94 | 
 95 | import os
 96 | import sys
 97 | root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 98 | sys.path.insert(0, root_dir)
 99 | 
100 | import unittest
101 | from luiti import MrTestCase
102 | 
103 | 
104 | @MrTestCase
105 | class TestMapReduce(unittest.TestCase):
106 |     mr_task_names = [
107 |             ]
108 | 
109 | if __name__ == '__main__':
110 |     unittest.main()
111 | """.strip()
112 | 
113 | 
114 | """ 2. Task """
115 | a_task_template = lambda task_clsname: u"""
116 | # -*-coding:utf-8-*-
117 | 
118 | from .__init_luiti import *
119 | 
120 | 
121 | @luigi.ref_tasks()
122 | class %s(%s):
123 | 
124 |     root_dir = "/foobar"
125 | """.strip() % (task_clsname, luiti_config.get_time_task(task_clsname), )
126 | 
127 | 
128 | def write_content_to_file(content, path):
129 |     if exists(path):
130 |         raise ValueError("path [%s] is already exists!" % path)
131 | 
132 |     dir1 = os.path.dirname(path)
133 |     if not exists(dir1):
134 |         os.mkdir(dir1)
135 | 
136 |     f1 = open(path, 'w')
137 |     f1.write(content.encode("UTF-8"))
138 |     f1.close()
139 | 
140 |     print "[info] generate %s file." % path
141 | 
142 |     return content
143 | 


--------------------------------------------------------------------------------
/luiti/manager/lazy_data.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | __all__ = ["ld"]
 4 | 
 5 | 
 6 | from etl_utils import singleton, cached_property
 7 | 
 8 | from .loader import Loader
 9 | from .dep import Dep
10 | from .table import Table
11 | 
12 | 
13 | @singleton()
14 | class LazyData(object):
15 | 
16 |     @cached_property
17 |     def all_task_classes(self):
18 |         return [i1['task_cls'] for i1 in self.result['success']]
19 | 
20 |     @cached_property
21 |     def result(self):
22 |         return Loader.load_all_tasks()
23 | 
24 | ld = LazyData()
25 | Dep.ld = ld
26 | Table.ld = ld
27 | 


--------------------------------------------------------------------------------
/luiti/manager/loader.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | import sys
 4 | import traceback
 5 | from inflector import Inflector
 6 | 
 7 | from .config import luiti_config as lc
 8 | from .active_packages import active_packages
 9 | from .package_map import PackageMap
10 | 
11 | 
12 | class Loader(object):
13 | 
14 |     @staticmethod
15 |     @active_packages
16 |     def load_all_tasks():
17 |         result = {"success": list(), "failure": list()}
18 | 
19 |         task_clsnames = sorted(PackageMap.task_clsname_to_package.keys())
20 |         for task_clsname_1 in task_clsnames:
21 |             is_success = False
22 |             task_cls = None
23 |             err = None
24 | 
25 |             try:
26 |                 task_cls = Loader.load_a_task_by_name(task_clsname_1)
27 |                 is_success = True
28 |             except Exception:
29 |                 err = list(sys.exc_info())
30 |                 err[2] = "".join(traceback.format_tb(err[2]))
31 |                 err = str(err[0]) + ": " + str(err[1]) + "\n" + err[2]
32 | 
33 |             if is_success:
34 |                 result['success'].append({"task_cls": task_cls})
35 |             else:
36 |                 result['failure'].append(
37 |                     {"err": err, "task_clsname": task_clsname_1})
38 | 
39 |         return result
40 | 
41 |     @staticmethod
42 |     @active_packages
43 |     def load_a_task_by_name(s1):
44 |         task_clsname_1 = Inflector().classify(s1)    # force convert
45 |         task_filename_1 = Inflector().underscore(s1)  # force convert
46 | 
47 |         assert task_clsname_1 in PackageMap.task_clsname_to_package, u"""
48 |         "%s" cannt be found. Auto converted class name is "%s", file name
49 |         is "luiti_tasks/%s.py", please check it carefully.
50 | 
51 |         Already loaded PackageMap.task_clsname_to_package is %s.
52 |         """ % (s1, task_clsname_1, task_filename_1, PackageMap.task_clsname_to_package)
53 | 
54 |         package_path = PackageMap.task_clsname_to_package[task_clsname_1].__name__ + \
55 |             ".luiti_tasks." + task_filename_1
56 |         task_lib = lc.import2(package_path)
57 |         return getattr(task_lib, task_clsname_1)
58 | 


--------------------------------------------------------------------------------
/luiti/manager/package_map.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | import os
 4 | import glob
 5 | from inflector import Inflector
 6 | from etl_utils import singleton, cached_property
 7 | from collections import defaultdict
 8 | 
 9 | from .config import luiti_config as lc
10 | from .active_packages import active_packages
11 | 
12 | 
13 | @singleton()
14 | class PackageMapClass(object):
15 | 
16 |     @cached_property
17 |     @active_packages
18 |     def task_clsname_to_package(self):
19 | 
20 |         assert lc.luiti_tasks_packages, "At least have one project!"
21 | 
22 |         result = dict()
23 |         for project1 in lc.luiti_tasks_packages:
24 |             project_dir2 = project1.__path__[0]
25 | 
26 |             # if it's not a zip file, but a normal package directory
27 |             is_zip_file = os.path.exists(
28 |                 os.path.join(project_dir2, "__init__.py"))
29 |             if not is_zip_file:
30 |                 raise Exception(
31 |                     """[setup.py format error] make sure """
32 |                     """project "%s" zip_safe=False option exists!"""
33 |                     % project1.__name__)
34 | 
35 |             task_path_pattern = os.path.join(
36 |                 project_dir2, "luiti_tasks/[a-z]*.py")
37 | 
38 |             for f2 in glob.glob(task_path_pattern):
39 |                 task_filename3 = os.path.basename(f2).rsplit(".", 1)[0]
40 |                 task_clsname4 = Inflector().classify(task_filename3)
41 |                 result[task_clsname4] = project1
42 |         return result
43 | 
44 |     @cached_property
45 |     def package_to_task_clsnames(self):
46 |         result = defaultdict(set)
47 |         for task_clsname, package in self.task_clsname_to_package.iteritems():
48 |             result[package].add(task_clsname)
49 |         return result
50 | 
51 | PackageMap = PackageMapClass()
52 | 


--------------------------------------------------------------------------------
/luiti/manager/sys_argv.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | __all__ = ["SysArgv"]
 4 | 
 5 | 
 6 | class SysArgv(object):
 7 |     """
 8 |     Modify sys.argv to fix luigi's command interface.
 9 |     """
10 | 
11 |     @staticmethod
12 |     def convert_to_luigi_accepted_argv(subparsers, argv):
13 |         luigi_keep_opts = ["--date-value"]
14 | 
15 |         def fetch_keys(parser1):
16 |             return parser1.__dict__['_option_string_actions'].keys()
17 | 
18 |         luiti_only_opts = subparsers.choices.keys() + \
19 |             list(set(
20 |                 [k3 for p2 in subparsers._name_parser_map.values()
21 |                     for k3 in fetch_keys(p2)]))
22 |         luiti_only_opts = [i1 for i1 in luiti_only_opts
23 |                            if i1 not in luigi_keep_opts]
24 | 
25 |         delete_argv_idxes = set([])
26 |         for idx1, arg1 in enumerate(argv):
27 |             if idx1 in delete_argv_idxes:
28 |                 continue
29 |             # 1. remove tasks, files, run, etc.
30 |             if (not arg1.startswith("--")) and (arg1 in luiti_only_opts):
31 |                 delete_argv_idxes.add(idx1)
32 |                 continue
33 |             # 2. process --task-name and more params
34 |             if "=" in arg1:
35 |                 arg2, val2 = arg1.split("=", 1)
36 |                 if arg2 in luiti_only_opts:
37 |                     delete_argv_idxes.add(idx1)
38 |             else:
39 |                 if (arg1 in luiti_only_opts) and (arg1 not in luigi_keep_opts):
40 |                     delete_argv_idxes.add(idx1)
41 |                     delete_argv_idxes.add(idx1 + 1)
42 |         argv = [arg1 for idx1, arg1 in enumerate(argv)
43 |                 if idx1 not in delete_argv_idxes]
44 |         return argv
45 | 


--------------------------------------------------------------------------------
/luiti/manager/table.py:
--------------------------------------------------------------------------------
  1 | # -*-coding:utf-8-*-
  2 | 
  3 | import os
  4 | from .dep import Dep
  5 | import luigi
  6 | 
  7 | 
  8 | class Table(object):
  9 |     """
 10 |     print task and package info.
 11 |     """
 12 | 
 13 |     @staticmethod
 14 |     def puts(task_body, task_headers, **opts):
 15 |         from tabulate import tabulate
 16 |         result = tabulate(task_body, task_headers, **opts)
 17 |         print
 18 |         print result
 19 |         print
 20 |         return result
 21 | 
 22 |     @staticmethod
 23 |     def print_all_tasks(result):
 24 |         """ input from Loader.load_all_tasks """
 25 | 
 26 |         def task_inspect(task_cls, order):
 27 |             return [
 28 |                 order,
 29 |                 task_cls.__name__,
 30 |                 task_cls.__module__.split(".")[0]
 31 |             ]
 32 | 
 33 |         task_headers = ["", "All Tasks", "luiti_package"]
 34 |         task_body = [task_inspect(item1['task_cls'], idx1 + 1)
 35 |                      for idx1, item1 in enumerate(sorted(result['success']))]
 36 |         task_body.extend([["total", len(result['success']), ""]])
 37 | 
 38 |         Table.puts(task_body, task_headers, tablefmt="grid")
 39 | 
 40 |         if result['failure']:
 41 |             print
 42 |             print "[warn] failure parsed files"
 43 |             print
 44 |             for failure1 in result['failure']:
 45 |                 print "[task_file] ", failure1['task_clsname']
 46 |                 print "[err] ", failure1['err']
 47 |                 print
 48 |         return (task_body, task_headers)
 49 | 
 50 |     @staticmethod
 51 |     def print_files_by_task_cls_and_date_range(curr_task, args, opts=None):
 52 |         opts = opts or dict()
 53 |         # 打印 依赖类 和 执行配置 信息
 54 |         task_headers = ["Current Env Key", "Current Env Value"]
 55 |         task_body = [
 56 |             ["task name", args.task_name],
 57 |             ["task date range", args.date_range],
 58 |             ["task execute mode", "DRY=" + str(args.dry)],
 59 |             ["task dep mode", "DEP=" + str(args.dep)],
 60 |             ["related task classes total count", opts['task_classes_count']],
 61 |         ]
 62 |         print
 63 |         print "Tasks related infos"
 64 |         Table.puts(task_body, task_headers, tablefmt="grid")
 65 | 
 66 |         # 打印 要删除的文件列表
 67 |         file_headers = ["Generated from task", "Storage",
 68 |                         "Date value", "Filename"]
 69 | 
 70 |         dep_file_to_task_instances = opts['dep_file_to_task_instances']
 71 |         file_table = [
 72 |             [dep_file_to_task_instances[f1].__class__.__name__,
 73 |              'HDFS', dep_file_to_task_instances[f1].date_str,
 74 |              os.path.basename(f1), ]
 75 |             for f1 in sorted(dep_file_to_task_instances.keys())]
 76 |         file_table.append(
 77 |             ['', '', '', "Total count %s" % len(dep_file_to_task_instances)])
 78 |         file_table.append(['', '', '', ''])
 79 |         file_uniq_root_dir = set(
 80 |             [t1.root_dir for t1 in opts['dep_tasks_on_curr_task']])
 81 |         file_table.append(
 82 |             ['All root dirs', '', '',
 83 |              'Total count %s' % len(file_uniq_root_dir)])
 84 |         for dir1 in file_uniq_root_dir:
 85 |             file_table.append(['', '', '', dir1])
 86 | 
 87 |         print
 88 |         print "Files related infos"
 89 |         Table.puts(file_table, file_headers, tablefmt="grid")
 90 |         print "\n" * 3
 91 |         return (file_table, file_headers)
 92 | 
 93 |     @staticmethod
 94 |     def print_task_info(curr_task):
 95 |         assert issubclass(curr_task, luigi.Task)
 96 | 
 97 |         dep_tasks_on_curr_task = Dep.find_dep_on_tasks(
 98 |             curr_task, Table.ld.all_task_classes)
 99 | 
100 |         task_headers = ["Task name", curr_task.__name__]
101 |         task_content = [
102 |             ["Tasks self dep on", str(list(curr_task._ref_tasks))],
103 |             ["Tasks dep on self",
104 |              str(sorted([t2.__name__ for t2 in dep_tasks_on_curr_task]))],
105 |         ]
106 |         Table.puts(task_content, task_headers, tablefmt="grid")
107 |         return (task_content, task_headers)
108 | 


--------------------------------------------------------------------------------
/luiti/schedule/__init__.py:
--------------------------------------------------------------------------------
1 | # -*-coding:utf-8-*-
2 | 
3 | __all__ = ["SensorSchedule"]
4 | 
5 | from .sensor_schedule import SensorSchedule
6 | 


--------------------------------------------------------------------------------
/luiti/task_templates/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | __all__ = ["TaskHour",
 4 |            "TaskHourHadoop",
 5 |            "TaskDay",
 6 |            "TaskDayHadoop",
 7 |            "TaskWeek",
 8 |            "TaskWeekHadoop",
 9 |            "TaskBiweekly",
10 |            "TaskBiweeklyHadoop",
11 |            "TaskMonth",
12 |            "TaskMonthHadoop",
13 |            "TaskQuarter",
14 |            "TaskQuarterHadoop",
15 |            "TaskYear",
16 |            "TaskYearHadoop",
17 |            "TaskRange",
18 |            "TaskRangeHadoop",
19 | 
20 |            "StaticFile",
21 |            "HiveTask",
22 |            "MongoImportTask", ]
23 | 
24 | 
25 | from .time.task_hour import TaskHour
26 | from .time.task_day import TaskDay
27 | from .time.task_week import TaskWeek
28 | from .time.task_biweekly import TaskBiweekly
29 | from .time.task_month import TaskMonth
30 | from .time.task_quarter import TaskQuarter
31 | from .time.task_year import TaskYear
32 | from .time.task_range import TaskRange
33 | 
34 | from .time.task_hour_hadoop import TaskHourHadoop
35 | from .time.task_day_hadoop import TaskDayHadoop
36 | from .time.task_week_hadoop import TaskWeekHadoop
37 | from .time.task_biweekly_hadoop import TaskBiweeklyHadoop
38 | from .time.task_month_hadoop import TaskMonthHadoop
39 | from .time.task_quarter_hadoop import TaskQuarterHadoop
40 | from .time.task_year_hadoop import TaskYearHadoop
41 | from .time.task_range_hadoop import TaskRangeHadoop
42 | 
43 | from .other.static_file import StaticFile
44 | from .other.mongo_import_task import MongoImportTask
45 | from .other.hive_task import HiveTask
46 | 


--------------------------------------------------------------------------------
/luiti/task_templates/other/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dchentech/luiti/11a5c62b265a92910a1d4c82431e3697b8b06814/luiti/task_templates/other/__init__.py


--------------------------------------------------------------------------------
/luiti/task_templates/other/hive_task.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | __all__ = ["HiveTask"]
 4 | 
 5 | 
 6 | from etl_utils import cached_property
 7 | from luigi.contrib.hive import HiveQueryTask
 8 | 
 9 | from ...utils import TargetUtils
10 | from ...luigi_extensions import luigi, TaskBase
11 | 
12 | 
13 | @luigi.as_a_luiti_task()
14 | class HiveTask(HiveQueryTask, TaskBase):
15 |     """
16 |     Hive SQL Template, follows luiti `date_value` date mode。
17 | 
18 | 
19 |     Implement:
20 |       1. hive_db
21 |       2. sql_main
22 | 
23 |     Example:
24 |       from luiti.task_templates import HiveTask
25 | 
26 |       class AnotherHiveDay(HiveTask):
27 |           root_dir = "/another/hive/result/"
28 |           use_hive_db = "main_hive_database"
29 | 
30 |           @cached_property
31 |           def sql_main(self):
32 |               return "select * from example_table;"
33 | 
34 |     """
35 | 
36 |     run_mode = "mr_distribute"
37 | 
38 |     def output(self):
39 |         """ Hive query default output directory has no _SUCCESS, not chunk filename is not MR style, see more details at `TargetUtils.hdfs_dir` . """
40 |         assert "ValueError" not in self.data_file, self.data_file
41 |         return TargetUtils.hdfs_dir(self.data_file)
42 | 
43 |     def query(self):
44 |         sql = u"""
45 | USE %s;
46 | INSERT OVERWRITE DIRECTORY "%s" %s
47 | """.replace("\n", " ") % (self.use_hive_db, self.data_file, self.sql_main.strip())
48 | 
49 |         if self.run_mode == "mr_distribute":
50 |             print "[info.luiti] run Hive SQL := %s" % sql
51 | 
52 |         return sql.strip()
53 | 
54 |     @cached_property
55 |     def data_root(self):
56 |         raise ValueError("Old API. Please use luiti's standard property `root_dir` instead.")
57 | 
58 |     @cached_property
59 |     def root_dir(self):
60 |         # or a cached_property
61 |         if self.__class__.data_root not in [NotImplementedError, ValueError]:
62 |             return self.data_root  # from instance
63 |         raise ValueError
64 | 
65 |     @cached_property
66 |     def use_hive_db(self):
67 |         if self.hive_db is not NotImplementedError:
68 |             return self.hive_db
69 |         raise ValueError
70 | 
71 |     # Deprecated API, use `use_hive_db` instead.
72 |     hive_db = NotImplementedError
73 | 
74 |     @cached_property
75 |     def sql_main(self):
76 |         """
77 |         Need to implemented in subclass
78 |         """
79 |         raise ValueError
80 | 


--------------------------------------------------------------------------------
/luiti/task_templates/other/mongo_import_task.py:
--------------------------------------------------------------------------------
  1 | # -*-coding:utf-8-*-
  2 | 
  3 | from etl_utils import process_notifier, cached_property
  4 | import luigi
  5 | import os
  6 | import arrow
  7 | import json
  8 | 
  9 | from ...luigi_extensions import TaskBase
 10 | from ...utils import CommandUtils, TargetUtils
 11 | 
 12 | 
 13 | class MongoImportTask(TaskBase):
 14 |     """
 15 |     Copy stat files to MongoDB.
 16 | 
 17 |     Steps:
 18 |         1. download file from HDFS.
 19 |         2. Make some indexes on MongoDB if needed.
 20 |         3. Run `mongoimport` to import data.
 21 |         4. Update report_status collection in MongoDB.
 22 | 
 23 |     Required:
 24 |         1. Must be JSON file
 25 |     """
 26 | 
 27 |     report_status_collection_name = "report_status"
 28 |     report_status_namespace = "latestCollection"
 29 |     report_name = NotImplementedError
 30 | 
 31 |     system_tmp = "/tmp"  # default
 32 | 
 33 |     @cached_property
 34 |     def mongodb_db(self):
 35 |         return self.mongodb_connection[self.database_name]
 36 | 
 37 |     @cached_property
 38 |     def mongodb_connection_address(self):
 39 |         """ e.g. ('192.168.20.111', 37001) """
 40 |         methods = dir(self.mongodb_connection)
 41 |         result = None
 42 | 
 43 |         # Compact with new pymongo API
 44 |         if "address" in methods:
 45 |             result = getattr(self.mongodb_connection, "address")
 46 |         if "connection" in methods:
 47 |             result = getattr(self.mongodb_connection, "connection").address
 48 |         if ("port" in methods) and ("host" in methods):
 49 |             result = (self.mongodb_connection.host, self.mongodb_connection.port)
 50 |         if result:
 51 |             assert len(result) == 2, result
 52 |             return result
 53 |         else:
 54 |             raise ValueError(self.mongodb_connection)
 55 | 
 56 |     @cached_property
 57 |     def mongodb_connection_host(self):
 58 |         return self.mongodb_connection_address[0]
 59 | 
 60 |     @cached_property
 61 |     def mongodb_connection_port(self):
 62 |         return self.mongodb_connection_address[1]
 63 | 
 64 |     @cached_property
 65 |     def report_status_collection_model(self):
 66 |         return self.mongodb_db[self.report_status_collection_name]
 67 | 
 68 |     @cached_property
 69 |     def data_file_collection_model(self):
 70 |         return self.mongodb_db[self.collection_name]
 71 | 
 72 |     # 1. config
 73 |     @cached_property
 74 |     def source_task(self):
 75 |         raise NotImplementedError
 76 | 
 77 |     @cached_property
 78 |     def mongodb_connection(self):
 79 |         raise NotImplementedError
 80 | 
 81 |     @cached_property
 82 |     def database_name(self):
 83 |         raise NotImplementedError
 84 | 
 85 |     @cached_property
 86 |     def index_schema(self):
 87 |         raise NotImplementedError
 88 | 
 89 |     def run_before_hook(self):
 90 |         pass
 91 | 
 92 |     def run_after_hook(self):
 93 |         pass
 94 | 
 95 |     # 2. common
 96 |     def requires(self):
 97 |         return [getattr(self, _ref_task_1)(self.date_value)
 98 |                 for _ref_task_1 in self._ref_tasks]
 99 | 
100 |     def run(self):
101 |         self.run_before_hook()
102 | 
103 |         # 1. check is already done.
104 |         if self.is_collection_exists():
105 |             print "[info] %s already exists!" % (self.data_file_collection_model, )
106 |             return False
107 | 
108 |         # 2. check report status collection is valid
109 |         if self.report_status_collection_model.count() == 0:
110 |             self.report_status_collection_model.insert(
111 |                 {self.report_status_namespace: {}})
112 |         assert self.report_status_collection_model.count() == 1, "更新纪录 只能有一条！"
113 | 
114 |         # 3. output json with err
115 |         data_file1 = self.source_task_instance.data_file
116 |         source1 = luigi.HDFS(data_file1)
117 |         tmp_file1 = open(self.tmp_filepath, 'w')
118 | 
119 |         for line1 in process_notifier(
120 |                 TargetUtils.line_read(source1), u"[read lines] %s" % source1):
121 |             tmp_file1.write(line1 + "\n")
122 |         tmp_file1.close()
123 | 
124 |         # 4. upload to mongodb
125 |         CommandUtils.execute(self.mongo_ensure_index)
126 |         CommandUtils.execute(self.mongoimport_command)
127 | 
128 |         # 5. clean tmp
129 |         CommandUtils.execute("rm -f %s" % self.tmp_filepath)
130 | 
131 |         # 6. update report status
132 |         item1 = self.report_status_collection_model.find()[0]
133 |         del item1['_id']
134 |         item1[self.report_status_namespace][self.report_name] = {
135 |             'collection_name': self.collection_name,
136 |             'updated_at': arrow.now().datetime,
137 |         }
138 |         self.report_status_collection_model.find_and_modify(
139 |             query={},
140 |             update={"$set": item1},
141 |             full_response=True
142 |         )
143 | 
144 |         self.run_after_hook()
145 | 
146 |         return True
147 | 
148 |     def is_collection_exists(self):
149 |         return self.data_file_collection_model.count() > 0
150 | 
151 |     @cached_property
152 |     def source_task_instance(self):
153 |         return self.source_task(self.date_value)
154 | 
155 |     @cached_property
156 |     def mongoimport_command(self):
157 |         return "/usr/bin/mongoimport " + \
158 |             ("--host %s " % self.mongodb_connection_host) + \
159 |             ("--port %s " % self.mongodb_connection_port) + \
160 |             ("--db %s " % self.database_name) + \
161 |             ("--collection %s " % self.collection_name) + \
162 |             ("--file %s" % self.tmp_filepath)
163 | 
164 |     @cached_property
165 |     def mongo_ensure_index(self):
166 |         if not isinstance(self.index_schema, basestring):
167 |             self.index_schema = json.dumps(self.index_schema)
168 |         js_str = "db.%s.ensureIndex(%s)" % \
169 |             (self.collection_name, self.index_schema)
170 |         return self.mongo_eval(js_str)
171 | 
172 |     def mongo_eval(self, js_str):
173 |         return "/usr/bin/mongo " + \
174 |             ("%s:%s/%s " % (self.mongodb_connection_host, self.mongodb_connection_port, self.database_name)) + \
175 |             ("--eval \"%s\" " % js_str)
176 | 
177 |     @cached_property
178 |     def collection_name(self):
179 |         """ e.g. redmine5954_parent_report_week_20140901 """
180 |         return self.data_name + "_" + self.date_value.strftime("%Y%m%d")
181 | 
182 |     @cached_property
183 |     def tmp_filepath(self):
184 |         return self.tmp_dir + "/" + self.date_value.strftime("%Y%m%d")
185 | 
186 |     @cached_property
187 |     def tmp_dir(self):
188 |         dir1 = os.path.join(self.system_tmp, self.task_class.__name__)
189 |         os.system("mkdir -p %s" % dir1)
190 |         return dir1
191 | 


--------------------------------------------------------------------------------
/luiti/task_templates/other/static_file.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | 
 4 | from etl_utils import cached_property
 5 | from ...luigi_extensions import luigi
 6 | from ...utils import TargetUtils
 7 | 
 8 | 
 9 | class StaticFile(luigi.ExternalTask):
10 |     """
11 |     By default, luigi don't have the ability to operate that tasks's outputs are generated by outside system
12 | 
13 |     So let luiti to schedule the task DAG, it allows to task to wait before submit to `luigid`. Check more details at luiti.schedule.
14 |     """
15 | 
16 |     is_external = True  # see more documents at TaskBase
17 |     data_file = None  # The same as luiti.TaskBase
18 |     filepath = None  # Deprecated
19 | 
20 |     # Mimic default luigi.ExternalTask
21 |     def run(self):
22 |         pass
23 | 
24 |     def complete(self):
25 |         return True
26 | 
27 |     def output(self):
28 |         # Compatible with old API `filepath`
29 |         if (self.data_file in [NotImplementedError, None]) \
30 |                 and isinstance(self.filepath, basestring):
31 |             self.data_file = self.filepath
32 | 
33 |         assert self.data_file, u"Please assign `data_file` !"
34 |         return self.IODevice(self.data_file)
35 | 
36 |     @cached_property
37 |     def IODevice(self):
38 |         return self.io_devices[0]  # default is HDFS
39 | 
40 |     io_devices = [TargetUtils.hdfs, luigi.LocalTarget]
41 | 


--------------------------------------------------------------------------------
/luiti/task_templates/time/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | API are listed at parent __ini__.py .
 3 | 
 4 | 
 5 | Example:
 6 |     class TaskDayHadoop(luigi.hadoop.HadoopExt, TaskDay):
 7 |         pass
 8 | 
 9 | TaskDay.__init__ will overwrite luigi.hadoop.HadoopExt's.
10 | 
11 | 
12 | NOTE: luigi.hadoop.HadoopExt will overwrite TaskDay
13 | 
14 | """
15 | 


--------------------------------------------------------------------------------
/luiti/task_templates/time/task_biweekly.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | __all__ = ['TaskBiweekly']
 4 | 
 5 | from ...luigi_extensions import TaskBase
 6 | 
 7 | 
 8 | class TaskBiweekly(TaskBase):
 9 |     pass
10 | 


--------------------------------------------------------------------------------
/luiti/task_templates/time/task_biweekly_hadoop.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | from .task_biweekly import TaskBiweekly
 4 | from ...luigi_extensions import luigi
 5 | 
 6 | 
 7 | class TaskBiweeklyHadoop(luigi.hadoop.HadoopExt, TaskBiweekly):
 8 | 
 9 |     pass
10 | 


--------------------------------------------------------------------------------
/luiti/task_templates/time/task_day.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | __all__ = ['TaskDay']
 4 | 
 5 | from ...luigi_extensions import TaskBase
 6 | import arrow
 7 | from etl_utils import cached_property
 8 | 
 9 | class TaskDay(TaskBase):
10 | 
11 |     @cached_property
12 |     def latest_7_days(self):
13 |             return arrow.Arrow.range(
14 |                 'day',
15 |                 self.date_value.replace(days=-6),
16 |                 self.date_value,)
17 | 
18 |     @cached_property
19 |     def latest_30_days(self):
20 |             return arrow.Arrow.range(
21 |                 'day',
22 |                 self.date_value.replace(days=-29),
23 |                 self.date_value,)
24 | 


--------------------------------------------------------------------------------
/luiti/task_templates/time/task_day_hadoop.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | from .task_day import TaskDay
 4 | from ...luigi_extensions import luigi
 5 | 
 6 | 
 7 | class TaskDayHadoop(luigi.hadoop.HadoopExt, TaskDay):
 8 | 
 9 |     pass
10 | 


--------------------------------------------------------------------------------
/luiti/task_templates/time/task_hour.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | __all__ = ['TaskHour']
 4 | 
 5 | from ...luigi_extensions import TaskBase
 6 | 
 7 | 
 8 | class TaskHour(TaskBase):
 9 | 
10 |     pass
11 | 


--------------------------------------------------------------------------------
/luiti/task_templates/time/task_hour_hadoop.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | from .task_hour import TaskHour
 4 | from ...luigi_extensions import luigi
 5 | 
 6 | 
 7 | class TaskHourHadoop(luigi.hadoop.HadoopExt, TaskHour):
 8 | 
 9 |     pass
10 | 


--------------------------------------------------------------------------------
/luiti/task_templates/time/task_month.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | __all__ = ['TaskMonth']
 4 | 
 5 | from etl_utils import cached_property
 6 | from ...luigi_extensions import TaskBase
 7 | import arrow
 8 | 
 9 | 
10 | class TaskMonth(TaskBase):
11 | 
12 |     @cached_property
13 |     def days_in_month(self):
14 |             return arrow.Arrow.range(
15 |                 'day',
16 |                 self.date_value.floor('month'),
17 |                 self.date_value.ceil('month'),)


--------------------------------------------------------------------------------
/luiti/task_templates/time/task_month_hadoop.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | from .task_month import TaskMonth
 4 | from ...luigi_extensions import luigi
 5 | 
 6 | 
 7 | class TaskMonthHadoop(luigi.hadoop.HadoopExt, TaskMonth):
 8 | 
 9 |     pass
10 | 


--------------------------------------------------------------------------------
/luiti/task_templates/time/task_quarter.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | __all__ = ['TaskQuarter']
 4 | 
 5 | from ...luigi_extensions import TaskBase
 6 | 
 7 | 
 8 | class TaskQuarter(TaskBase):
 9 | 
10 |     pass
11 | 


--------------------------------------------------------------------------------
/luiti/task_templates/time/task_quarter_hadoop.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | from .task_quarter import TaskQuarter
 4 | from ...luigi_extensions import luigi
 5 | 
 6 | 
 7 | class TaskQuarterHadoop(luigi.hadoop.HadoopExt, TaskQuarter):
 8 | 
 9 |     pass
10 | 


--------------------------------------------------------------------------------
/luiti/task_templates/time/task_range.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | __all__ = ['TaskRange']
 4 | 
 5 | from ...luigi_extensions import TaskBase
 6 | from ...utils import DateUtils
 7 | 
 8 | 
 9 | class TaskRange(TaskBase):
10 | 
11 |     # NOTE date_value 和 date_range 两个值是必须的。
12 |     # 1. date_value 是写到那个日期目录
13 |     # 2. date_range 是指定了依赖的日期范围
14 | 
15 |     def date_range(self):
16 |         raise ValueError("Overwrite Me!")
17 |     # date_range = luigi.DateIntervalParameter()
18 |     # date_range = luigi.Parameter() # 临时现为 str 类型吧
19 | 
20 |     @property
21 |     def dates_in_range(self):
22 |         # method_1 = self.date_type + "s_in_range" # e.g. weeks_in_range
23 |         method_1 = 'week' + "s_in_range"  # NOTE 目前直接为 week, 因为是range.
24 | 
25 | # s1 = "2014-10-01-2014-10-07"
26 | # s1[0:10]  => '2014-10-01'
27 | # s1[11:21] => '2014-10-07'
28 |         date_1, date_2 = self.date_range[0:10], self.date_range[11:21]
29 | 
30 |         return list(getattr(DateUtils, method_1)(date_1, date_2))
31 | 


--------------------------------------------------------------------------------
/luiti/task_templates/time/task_range_hadoop.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | __all__ = ['TaskRangeHadoop']
 4 | 
 5 | from .task_range import TaskRange
 6 | from ...luigi_extensions import luigi
 7 | 
 8 | 
 9 | class TaskRangeHadoop(luigi.hadoop.HadoopExt, TaskRange):
10 | 
11 |     pass
12 | 


--------------------------------------------------------------------------------
/luiti/task_templates/time/task_week.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | __all__ = ['TaskWeek']
 4 | 
 5 | from etl_utils import cached_property
 6 | from ...luigi_extensions import TaskBase
 7 | from ...utils import DateUtils
 8 | 
 9 | 
10 | class TaskWeek(TaskBase):
11 | 
12 |     @cached_property
13 |     def days_in_week(self):
14 |         return list(DateUtils.days_in_week(self.date_value))
15 | 
16 |     def requires_with_prev_week(self, ref_task1):
17 |         """ require days in current week, and stat data in previous week """
18 |         total_tasks = [ref_task1(date_value=date1) for date1 in self.days_in_week]
19 | 
20 |         prev_week_stat_task1 = self.pre_task_by_self
21 |         if isinstance(prev_week_stat_task1, self.task_class):
22 |             total_tasks.append(prev_week_stat_task1)  # If it's not RootTask
23 | 
24 |         return total_tasks
25 | 


--------------------------------------------------------------------------------
/luiti/task_templates/time/task_week_hadoop.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | __all__ = ["TaskWeekHadoop"]
 4 | 
 5 | from .task_week import TaskWeek
 6 | from ...luigi_extensions import luigi
 7 | 
 8 | 
 9 | class TaskWeekHadoop(luigi.hadoop.HadoopExt, TaskWeek):
10 |     pass
11 | 


--------------------------------------------------------------------------------
/luiti/task_templates/time/task_year.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | __all__ = ['TaskYear']
 4 | 
 5 | from ...luigi_extensions import TaskBase
 6 | 
 7 | 
 8 | class TaskYear(TaskBase):
 9 | 
10 |     pass
11 | 


--------------------------------------------------------------------------------
/luiti/task_templates/time/task_year_hadoop.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | from .task_year import TaskYear
 4 | from ...luigi_extensions import luigi
 5 | 
 6 | 
 7 | class TaskYearHadoop(luigi.hadoop.HadoopExt, TaskYear):
 8 | 
 9 |     pass
10 | 


--------------------------------------------------------------------------------
/luiti/tests/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | __all__ = ['MrTestCase', "SetupLuitiPackages", "date_begin"]
 4 | 
 5 | 
 6 | from .mr_test_case import MrTestCase
 7 | from .setup_luiti_packages import SetupLuitiPackages
 8 | 
 9 | date_begin = "2014-09-01"
10 | 


--------------------------------------------------------------------------------
/luiti/tests/mr_test_case.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | __all__ = ['MrTestCase']
 4 | 
 5 | 
 6 | from collections import defaultdict
 7 | import json
 8 | 
 9 | from ..manager import Loader
10 | 
11 | 
12 | def MrTestCase(cls, verbose=False, date_value="2014-09-01"):
13 |     """
14 |     功能: 集成测试数据到 类中 ，这样就方便引用了。
15 |     """
16 | 
17 |     assert "mr_task_names" in dir(cls), "%s must assgin some task names!" % cls
18 | 
19 |     cls.maxDiff = None  # compact large json diff
20 | 
21 |     def map_lines(text):
22 |         assert isinstance(text, unicode)
23 |         result = list()
24 |         for l1 in text.split("\n"):
25 |             l1 = l1.strip()
26 |             if not l1:
27 |                 continue
28 |             result.append(l1)
29 |         return result
30 | 
31 |     def generate_closure_function(mr_task_name1):
32 |         task_cls = Loader.load_a_task_by_name(mr_task_name1)  # keep it!
33 |         if verbose:
34 |             print "[task_cls]", task_cls
35 | 
36 |         def test_mr(self):
37 |             task_instance_1 = task_cls(date_value=date_value)
38 |             if verbose:
39 |                 print "[task_instance]", task_instance_1
40 | 
41 |             task_instance_1.lines = map_lines(task_instance_1.mrtest_input())
42 |             result_expect = sorted(
43 |                 [read_json_from_mrtest_output(i2, idx + 1) for idx, i2
44 |                  in enumerate(map_lines(task_instance_1.mrtest_output()))])
45 | 
46 |             self.assertEqual(result_expect, run_map_reduce(task_instance_1))
47 |         return test_mr
48 | 
49 |     for mr_task_name1 in cls.mr_task_names:
50 |         test_method_name = "test_" + mr_task_name1
51 |         if verbose:
52 |             print
53 |         if verbose:
54 |             print "[test_method_name]", test_method_name
55 | 
56 |         setattr(
57 |             cls, test_method_name, generate_closure_function(mr_task_name1))
58 | 
59 |         if verbose:
60 |             print
61 |         if verbose:
62 |             print
63 | 
64 |     return cls
65 | 
66 | 
67 | def run_map_reduce(task_instance_1):
68 |     # 1. bind attrs
69 |     for k1, v1 in task_instance_1.mrtest_attrs().iteritems():
70 |         setattr(task_instance_1, k1, v1)
71 | 
72 |     # 2. map it!
73 |     mapper_key_to_vals = defaultdict(list)
74 |     for line1 in task_instance_1.lines:
75 |         for key_1, val_1 in task_instance_1.mapper(line1.strip()):
76 |             mapper_key_to_vals[key_1].append(val_1)
77 | 
78 |     # 3. reduce it!
79 |     result_list = list()
80 |     for key_1, vals_1 in mapper_key_to_vals.iteritems():
81 |         vals_generator = iter(vals_1)
82 |         for _, val_2 in task_instance_1.reducer(key_1, vals_generator):
83 |             result_list.append(json.loads(val_2))
84 |     return sorted(result_list)
85 | 
86 | 
87 | def read_json_from_mrtest_output(line, num):
88 |     """ print which json line error """
89 |     try:
90 |         return json.loads(line)
91 |     except Exception as e:
92 |         print u"[line#%s] %s" % (num, line)
93 |         raise e
94 | 


--------------------------------------------------------------------------------
/luiti/tests/setup_luiti_packages.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | __all__ = ['SetupLuitiPackages']
 4 | 
 5 | import os
 6 | import sys
 7 | from etl_utils import cached_property, singleton
 8 | 
 9 | 
10 | @singleton()
11 | class SetupLuitiPackagesClass(object):
12 | 
13 |     @cached_property
14 |     def config(self):
15 |         root_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
16 |         assert os.path.exists(root_dir), root_dir
17 |         parent = os.path.join(root_dir, "tests/webui_packages")
18 | 
19 |         luiti_package_names = "dump clean middle summary".split(" ")
20 |         for project_name in luiti_package_names + ["webui_tests"]:
21 |             package_path = os.path.join(parent, "luiti_" + project_name)
22 |             sys.path.insert(0, package_path)
23 | 
24 |         sys.path.insert(0, os.path.join(root_dir, "tests"))
25 |         sys.path.insert(0, os.path.join(root_dir, "tests/project_A"))
26 |         sys.path.insert(0, os.path.join(root_dir, "tests/project_B"))
27 |         sys.path.insert(0, os.path.join(root_dir, "tests/zip_package_by_luiti"))
28 | 
29 |         # setup env
30 |         from luiti import config
31 |         config.curr_project_dir = os.path.join(root_dir, "tests/webui_packages/luiti_summary")
32 | 
33 |         return config
34 | 
35 | SetupLuitiPackages = SetupLuitiPackagesClass()
36 | 


--------------------------------------------------------------------------------
/luiti/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | __all__ = [
 4 |     "IOUtils",
 5 |     "TargetUtils",
 6 |     "MRUtils",
 7 |     "MathUtils",
 8 |     "HDFSUtils",
 9 |     "CommandUtils",
10 |     "CompressUtils",
11 |     "DateUtils",
12 |     "ExtUtils",
13 |     "VisualiserEnvTemplate"
14 | ]
15 | 
16 | from .io_utils import IOUtils
17 | from .target_utils import TargetUtils
18 | from .mr_utils import MRUtils
19 | from .math_utils import MathUtils
20 | from .hdfs_utils import HDFSUtils
21 | from .command_utils import CommandUtils
22 | from .date_utils import DateUtils
23 | from .compress_utils import CompressUtils
24 | from .ext_utils import ExtUtils
25 | from .visualiser_env_template import VisualiserEnvTemplate
26 | 


--------------------------------------------------------------------------------
/luiti/utils/command_utils.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | import os
 4 | 
 5 | 
 6 | class CommandUtils:
 7 | 
 8 |     @staticmethod
 9 |     def execute(command_str, dry=False, verbose=True):
10 |         if verbose:
11 |             print "[command]", command_str
12 |         if dry:
13 |             return False
14 | 
15 |         # return commands.getstatusoutput(command_str)
16 |         return os.system(command_str)  # print logs in realtime.
17 | 


--------------------------------------------------------------------------------
/luiti/utils/compress_utils.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | import os
 4 | import glob
 5 | from .command_utils import CommandUtils
 6 | from .hdfs_utils import HDFSUtils
 7 | 
 8 | 
 9 | class CompressUtils:
10 | 
11 |     @staticmethod
12 |     def unzip_with_upload(orig_filepath, hdfs_filepath,
13 |                           tmp_dir=NotImplementedError, tmp_name=NotImplementedError):
14 |         """
15 |         1. Download zip file from HDFS
16 |         2. Unzip it
17 |         3. Reupload to the same place on HDFS
18 |         """
19 |         # 1. check
20 |         if not HDFSUtils.exists(orig_filepath):
21 |             raise ValueError("[hdfs] %s not exists!" % orig_filepath)
22 | 
23 |         # 2. pull file from hdfs
24 |         tmp_local_target = os.path.join(tmp_dir, tmp_name)
25 |         HDFSUtils.copyToLocal(orig_filepath, tmp_local_target)
26 | 
27 |         # 3. unzip
28 |         unzip_dir = tmp_dir + "/unzip"
29 |         CommandUtils.execute("mkdir -p %s" % unzip_dir)
30 |         CommandUtils.execute(
31 |             "tar xzvf %s -C %s" % (tmp_local_target, unzip_dir))
32 | 
33 |         unzip_file = unzip_dir
34 |         # 兼容 zip 文件是多层级目录
35 |         while (os.path.isdir(unzip_file)):
36 |             next_dirs = glob.glob(unzip_file + "/*")
37 |             if len(next_dirs) > 1:
38 |                 raise ValueError(
39 |                     "%s should only one dir in a zip file!" % unzip_file)
40 |             if len(next_dirs) == 0:
41 |                 raise ValueError(
42 |                     "%s must always exists one file or one dir in a zip file, "
43 |                     "but there are %s ." % (unzip_file, str(next_dirs)))
44 |             unzip_file = next_dirs[0]
45 | 
46 |         # 4. push file to hdfs
47 |         HDFSUtils.copyFromLocal(unzip_file, hdfs_filepath)
48 |         CommandUtils.execute("rm -rf %s" % unzip_dir)
49 |         CommandUtils.execute("rm -rf %s" % tmp_local_target)
50 |         return True
51 | 


--------------------------------------------------------------------------------
/luiti/utils/date_utils.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | __all__ = ['DateUtils']
 4 | 
 5 | import arrow
 6 | 
 7 | 
 8 | class DateUtils:
 9 |     arrow = arrow
10 | 
11 |     @staticmethod
12 |     def arrow_str(arrow1):
13 |         return arrow.get(arrow1).datetime.strftime("%Y-%m-%d")
14 | 
15 |     @staticmethod
16 |     def days_in_week(arrow1):
17 |         arrow1 = arrow.get(arrow1)
18 |         return arrow.Arrow.range(
19 |             'day',
20 |             arrow1.floor('week'),
21 |             arrow1.ceil('week'),)
22 | 
23 |     @staticmethod
24 |     def weeks_in_range(arrow1, arrow2):
25 |         return arrow.Arrow.range(
26 |             'week',
27 |             arrow.get(arrow1).floor('week'),
28 |             arrow.get(arrow2).ceil('week'),)
29 | 
30 |     @staticmethod
31 |     def fixed_weeks_in_range(date_range_str):
32 |         """ 修复 一个范围内所有全部覆盖的weeks，即最坏情况是掐头去尾。"""
33 |         # 兼容如果date_range的最后一个不是星期天，那该周日志就不完整。
34 |         assert len(date_range_str) == 21  # e.g. "2014-09-01-2014-11-19"
35 |         first_date = arrow.get(date_range_str[0:10])
36 |         last_date = arrow.get(date_range_str[11:21])
37 |         dates = DateUtils.weeks_in_range(first_date, last_date)
38 |         if len(dates) > 0:
39 |             if last_date.weekday() != 6:    # 6 index is Sunday
40 |                 dates = dates[:-1]
41 |             if first_date.weekday() != 0:  # 0 index is Monday
42 |                 dates = dates[1:]
43 |         return dates
44 | 
45 |     @staticmethod
46 |     def date_value_by_type_in_last(date_value_1, date_type_1):
47 |         val1 = arrow.get(date_value_1).replace(**{(date_type_1 + 's'): -1}) \
48 |                                       .floor(date_type_1)
49 |         return val1
50 | 


--------------------------------------------------------------------------------
/luiti/utils/ext_utils.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | from etl_utils import cached_property
 4 | 
 5 | 
 6 | class ExtUtils(object):
 7 | 
 8 |     class ExtendClass(object):
 9 | 
10 |         """
11 |         Extend a class dynamically, and compact with `property` and
12 |         `cached_property` in a unified call mechanism.
13 |         """
14 | 
15 |         @classmethod
16 |         def extend(cls, attrs):
17 |             assert isinstance(attrs, dict), attrs
18 | 
19 |             for attr_k1, attr_v1 in attrs.iteritems():
20 |                 orig_attr = getattr(cls, attr_k1, None)
21 | 
22 |                 # convert input to original value type
23 |                 if isinstance(orig_attr, property) and \
24 |                         (not isinstance(attr_v1, property)):
25 |                     new_v1 = property(attr_v1)
26 |                 elif isinstance(orig_attr, cached_property) and \
27 |                         (not isinstance(attr_v1, cached_property)):
28 |                     new_v1 = cached_property(attr_v1)
29 |                 else:
30 |                     new_v1 = attr_v1
31 | 
32 |                 setattr(cls, attr_k1, new_v1)
33 | 


--------------------------------------------------------------------------------
/luiti/utils/hdfs_utils.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | from .command_utils import CommandUtils
 4 | from .target_utils import TargetUtils
 5 | 
 6 | 
 7 | class HDFSUtils:
 8 | 
 9 |     hdfs_cli = NotImplemented
10 | 
11 |     @staticmethod
12 |     def exists(path1):
13 |         return TargetUtils.exists(path1)
14 | 
15 |     @staticmethod
16 |     def copy(path1, path2):
17 |         command1 = HDFSUtils.hdfs_cli + " -cp %s %s" % (path1, path2)
18 |         print "[command]", command1
19 |         CommandUtils.execute(command1)
20 | 
21 |     @staticmethod
22 |     def copyFromLocal(path1, path2):
23 |         command1 = HDFSUtils.hdfs_cli + \
24 |             " -copyFromLocal %s %s" % (path1, path2)
25 |         print "[command]", command1
26 |         CommandUtils.execute(command1)
27 | 
28 |     @staticmethod
29 |     def copyToLocal(path1, path2):
30 |         command1 = HDFSUtils.hdfs_cli + " -copyToLocal %s %s" % (path1, path2)
31 |         print "[command]", command1
32 |         CommandUtils.execute(command1)
33 | 
34 |     @staticmethod
35 |     def chown(path1):
36 |         command1 = HDFSUtils.hdfs_cli + " -chown -R primary_user " + path1
37 |         print "[command]", command1
38 |         CommandUtils.execute(command1)
39 | 
40 |     @staticmethod
41 |     def mkdir_p(dir1):
42 |         command1 = HDFSUtils.hdfs_cli + " -mkdir -p " + dir1
43 |         print "[command]", command1
44 |         CommandUtils.execute(command1)
45 | 
46 |     @staticmethod
47 |     def mkdir(dir1):
48 |         command1 = HDFSUtils.hdfs_cli + " -mkdir " + dir1
49 |         print "[command]", command1
50 |         CommandUtils.execute(command1)
51 | 
52 |     @staticmethod
53 |     def mv(src, dst):
54 |         command1 = HDFSUtils.hdfs_cli + (" -mv %s %s " % (src, dst))
55 |         print "[command]", command1
56 |         CommandUtils.execute(command1)
57 | 
58 | 
59 | # TODO 用装饰器来包装 print, CommandUtils.execute等
60 | 


--------------------------------------------------------------------------------
/luiti/utils/io_utils.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | import json
 4 | import luigi
 5 | import luigi.hdfs
 6 | from luigi import LocalTarget
 7 | from etl_utils import JsonUtils
 8 | from .target_utils import TargetUtils
 9 | 
10 | 
11 | class IOUtils:
12 | 
13 |     SQL_RANGE_LIMIT = 1000
14 | 
15 |     @staticmethod
16 |     def json_dump(o1):
17 |         m1 = lambda item1: json.dumps(list(item1))
18 |         m2 = lambda item1: JsonUtils.unicode_dump(item1).encode("UTF-8")
19 |         if isinstance(o1, (list, set,)):
20 |             # Comptible with JsonUtils.unicode_dump dont support list
21 |             method = m1
22 |         else:
23 |             method = m2
24 |         return method(o1)
25 | 
26 |     @staticmethod
27 |     def write_json_to_output(result, output1):
28 |         """
29 |         Support multiple lines.
30 |         """
31 |         if isinstance(result, dict):
32 |             result = [result]
33 |         if isinstance(result, set):
34 |             result = list(result)
35 |         assert isinstance(result, list), result
36 |         assert len(result) > 0, result
37 |         assert isinstance(result[0], dict), result
38 | 
39 |         with output1.open('w') as output_hdfs:
40 |             for o1 in result:
41 |                 output_hdfs.write(IOUtils.json_dump(o1) + "\n")
42 |         return 0
43 |     write_jsons_to_output = write_json_to_output  # make a alias
44 | 
45 |     @staticmethod
46 |     def read_json_from_output(output1):
47 |         # only one line
48 |         item1 = None
49 |         read_line_count = 0
50 |         for json1 in TargetUtils.json_read(output1):
51 |             read_line_count += 1
52 |             item1 = json1
53 |             if read_line_count >= 2:
54 |                 raise ValueError("[multiple line error]"
55 |                                  " %s should contain only one line!" % output1)
56 |         return item1
57 | 
58 |     @staticmethod
59 |     def remove_files(*files):  # 兼容 写入中途失败
60 |         for file1 in files:
61 |             if luigi.hdfs.exists(file1):
62 |                 luigi.hdfs.remove(file1)
63 |         return True
64 | 
65 |     @staticmethod
66 |     def local_target(path1):
67 |         return LocalTarget(path1)
68 | 


--------------------------------------------------------------------------------
/luiti/utils/math_utils.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | 
 4 | class MathUtils:
 5 | 
 6 |     @staticmethod
 7 |     def percent(a, b):
 8 |         # reset other False type obj to 0, e.g. None.
 9 |         if not b:
10 |             b = 0
11 |         if not a:
12 |             a = 0
13 | 
14 |         if b == 0:
15 |             return 0.0
16 |         result = a / float(b)
17 |         return result
18 | 
19 |         # 注释原因: 实际存储还是用高精度吧 from @连华
20 |         # return int(round(result * 10000)) / 10000.0
21 | 


--------------------------------------------------------------------------------
/luiti/utils/mr_utils.py:
--------------------------------------------------------------------------------
  1 | # -*-coding:utf-8-*-
  2 | 
  3 | import json
  4 | from etl_utils import JsonUtils
  5 | 
  6 | 
  7 | class MRUtils:
  8 | 
  9 |     map_key_split = u"@@"              # map 多维度键 分隔符
 10 |     map_key_escape = u"\""              # map 字符串默认 JSON dump
 11 |     mr_separator = u"\t"              # map reduce 分隔符
 12 | 
 13 |     @staticmethod
 14 |     def mr_key(item1, postfix=''):
 15 |         """ example is "104017@@37771707" """
 16 | # TODO 业务代码应该剥离
 17 |         str1 = u"%s%s%s" % (
 18 |             item1.get('class_id', 0),
 19 |             MRUtils.map_key_split, item1.get('uid', 0),)
 20 |         if postfix:
 21 |             str1 += (MRUtils.map_key_split + unicode(postfix))
 22 |         return str1
 23 | 
 24 |     @staticmethod
 25 |     def json_parse(line1):
 26 |         line1 = line1.strip()
 27 |         if isinstance(line1, str):
 28 |             line1 = line1.decode("UTF-8")
 29 |         return json.loads(line1)
 30 | 
 31 |     @staticmethod
 32 |     def is_mr_line(line1):
 33 |         # 1. 目前标准的 MapReduce 输出
 34 |         head = line1[0:30]
 35 |         is_true_1 = (MRUtils.map_key_split in head) or \
 36 |             (MRUtils.mr_separator in head)
 37 |         # 2. value 必须是 } 或 ]
 38 |         is_true_2 = (line1.endswith("}") or line1.endswith("]"))
 39 |         # 3. 外部Python程序写的一行一行JSON, 没有 map key 。
 40 |         is_true_3 = (not line1.startswith("{")) and (not line1.startswith("["))
 41 |         return is_true_1 and is_true_2 and is_true_3
 42 | 
 43 |     @staticmethod
 44 |     def unicode_value(item1, key1):
 45 |         val1 = item1.get(key1, u"")
 46 |         if isinstance(val1, str):
 47 |             val1 = val1.decode("UTF-8")
 48 |         return val1
 49 | 
 50 |     @staticmethod
 51 |     def split_mr_kv(line1):
 52 |         """ 返回一个 解析好的 [k,v] 数组。 """
 53 |         if isinstance(line1, str):
 54 |             line1 = line1.decode("UTF-8")
 55 |         k_str, v_str = line1.split(MRUtils.mr_separator, 1)
 56 | 
 57 |         return [
 58 |             MRUtils.select_prefix_keys(k_str),
 59 |             json.loads(v_str),
 60 |         ]
 61 | 
 62 |     # key related
 63 |     @staticmethod
 64 |     def merge_keys_in_dict(vals_1, keys_1):
 65 |         """ 合并多个键的整数值。 """
 66 |         merge = {key_1: 0 for key_1 in keys_1}
 67 |         for v_2 in vals_1:
 68 |             for key_1 in keys_1:
 69 |                 merge[key_1] += v_2[key_1]
 70 |         return merge
 71 | 
 72 |     @staticmethod
 73 |     def concat_prefix_keys(*keys):
 74 |         items_str = map(unicode, keys)
 75 |         return MRUtils.map_key_split.join(items_str)
 76 | 
 77 |     @staticmethod
 78 |     def split_prefix_keys(line_part_a):
 79 |         """ return list """
 80 |         fixed_str = MRUtils.select_prefix_keys(line_part_a)
 81 |         return fixed_str.split(MRUtils.map_key_split)
 82 | 
 83 |     @staticmethod
 84 |     def select_prefix_keys(line_part_a, idxes=None):
 85 |         """
 86 |         根据索引数组 转化出新的 map key
 87 |         e.g. select_prefix_keys("232@@8923802@@afenti", [0,1])
 88 |                 # => "232@8923802"
 89 |         """
 90 |         if isinstance(line_part_a, str):
 91 |             line_part_a = line_part_a.decode("UTF-8")
 92 |         # 兼容解析格式错误的jsonkey
 93 |         if line_part_a.startswith(MRUtils.map_key_escape) and \
 94 |                 (not line_part_a.endswith(MRUtils.map_key_escape)):
 95 |             line_part_a = line_part_a[1:]
 96 |         if line_part_a.startswith(MRUtils.map_key_escape):  # is a json
 97 |             line_part_a = json.loads(line_part_a)
 98 | 
 99 |         if idxes is None:
100 |             return line_part_a
101 |         else:
102 |             parts = line_part_a.split(MRUtils.map_key_split)
103 |             new_parts = []
104 |             for idx_1 in idxes:
105 |                 new_parts.append(parts[idx_1])
106 |             return MRUtils.map_key_split.join(new_parts)
107 | 
108 |     @staticmethod
109 |     def str_dump(result_dict):
110 |         return JsonUtils.unicode_dump(result_dict).encode("UTF-8")
111 | 
112 |     @staticmethod
113 |     def filter_dict(d1, keys):
114 |         if not isinstance(keys, list):
115 |             keys = [keys]
116 |         return {k1: d1[k1] for k1 in keys}
117 | 


--------------------------------------------------------------------------------
/luiti/utils/target_utils.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | import json
 4 | import luigi
 5 | from etl_utils import singleton, cached_property
 6 | 
 7 | 
 8 | @singleton()
 9 | class TargetUtilsClass(object):
10 | 
11 |     def line_read(self, hdfs1):
12 |         with hdfs1.open('r') as data1:
13 |             for line1 in data1:
14 |                 line1 = line1.decode("UTF-8").strip()
15 |                 # filter blank line
16 |                 if len(line1) == 0:
17 |                     continue
18 |                 yield line1
19 | 
20 |     def json_read(self, hdfs1):
21 |         for line1 in TargetUtils.line_read(hdfs1):
22 |             yield json.loads(line1)  # as item1
23 | 
24 |     def hdfs(self, data_file1):
25 |         # [兼容] 可以判断出 data_file1 是否包含 part-00000 的目录。
26 | 
27 |         # 兼容 snakebite 对 不存在目录的 test 有bug，或者是因为从hadoop用户切换到primary_user导致。
28 |         f1 = luigi.hdfs.HdfsTarget(data_file1)
29 | 
30 |         # isdir 在 luigi/hdfs.py 没有实现哦
31 |         is_curr_dir = lambda: len(list(f1.fs.listdir(data_file1))) > 1
32 | 
33 |         if f1.exists() and is_curr_dir():
34 |             # There's no part-000 when use multiple text output in streaming
35 |             def _exists(name):
36 |                 return luigi.hdfs.HdfsTarget(data_file1 + name).exists()
37 |             is_mr_output_root = _exists("/_SUCCESS")
38 |             has_part_000000 = _exists("/part-00000")
39 |             if is_mr_output_root or has_part_000000:
40 |                 return luigi.hdfs.HdfsTarget(data_file1,
41 |                                              format=luigi.hdfs.PlainDir)
42 | 
43 |         return f1
44 | 
45 |     def hdfs_dir(self, path1):
46 |         """
47 |         Compact with someone use 000000_0 file naming style, but not the default MR part-00000。
48 |         """
49 |         return luigi.hdfs.HdfsTarget(path1, format=luigi.hdfs.PlainDir)
50 | 
51 |     def isdir(self, path1):
52 |         return self.client.get_bite().test(path1, directory=True)
53 | 
54 |     def exists(self, path1):
55 |         return self.client.exists(path1)
56 | 
57 |     @cached_property
58 |     def client(self):
59 |         return HdfsClient.client
60 | 
61 | TargetUtils = TargetUtilsClass()
62 | 
63 | 
64 | @singleton()
65 | class HdfsClientClass(object):
66 |     # TODO use delegate
67 | 
68 |     @cached_property
69 |     def client(self):
70 |         import luigi.hdfs
71 |         return luigi.hdfs.clients
72 | HdfsClient = HdfsClientClass()
73 | TargetUtils.HdfsClient = HdfsClient
74 | 


--------------------------------------------------------------------------------
/luiti/utils/visualiser_env_template.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | from etl_utils import cached_property
 4 | from ..luigi_extensions import ArrowParameter
 5 | 
 6 | 
 7 | class VisualiserEnvTemplate(object):
 8 |     """
 9 |     Setup luiti webui.
10 | 
11 |     Overwrite below attributes, see keys and their examples in `data`.
12 |     """
13 |     def __init__(self, kwargs=dict()):
14 |         assert isinstance(kwargs, dict), kwargs
15 | 
16 |         for k1, v1 in kwargs.iteritems():
17 |             if not hasattr(self, k1):
18 |                 raise ValueError("%s dont has attribute \"%s\"" % self, k1)
19 |             setattr(self, k1, v1)
20 | 
21 |     @cached_property
22 |     def data(self):
23 |         def maybe_call(o1):
24 |             if callable(o1):
25 |                 o1 = o1()
26 |             return o1
27 | 
28 |         result = {
29 |             "file_web_url_prefix": maybe_call(self.file_web_url_prefix),
30 |             "date_begin": maybe_call(self.date_begin),
31 |             "additional_task_parameters": maybe_call(self.additional_task_parameters),
32 |             "package_config": maybe_call(self.package_config),
33 |         }
34 | 
35 |         # check data valid
36 |         assert isinstance(result["additional_task_parameters"], dict)
37 |         if len(result["additional_task_parameters"]) > 0:
38 |             val = result["additional_task_parameters"].values()[0]
39 |             assert "values" in val
40 |             assert "default" in val
41 | 
42 |         return result
43 | 
44 |     def __getitem__(self, k1):
45 |         return self.data[k1]
46 | 
47 |     # API list
48 |     file_web_url_prefix = ""
49 |     date_begin = ArrowParameter.now().replace(weeks=-1).format("YYYY-MM-DD")
50 | 
51 |     def additional_task_parameters(self):
52 |         """
53 |         Example is
54 | 
55 |         {
56 |             "subject": {
57 |                 "values": ["english", "math"],
58 |                 "default": "english",
59 |             }
60 |         }
61 |         """
62 |         return dict()
63 | 
64 |     def package_config(self):
65 |         return {
66 |             "default_selected": []
67 |         }
68 | 


--------------------------------------------------------------------------------
/luiti/webui/INSTALL.markdown:
--------------------------------------------------------------------------------
1 | Install by http://bower.io/
2 | ==============
3 | ```bash
4 | bower install
5 | ```
6 | 


--------------------------------------------------------------------------------
/luiti/webui/assets/javascripts/luiti.js:
--------------------------------------------------------------------------------
  1 | (function() {
  2 |   'use strict';
  3 | 
  4 |   // mark color, when select a task, separate in and out.
  5 |   var colors = {
  6 |       "requires": "lime",
  7 |       "self": "#7BE141",
  8 |       "upons": "green",
  9 |   };
 10 | 
 11 |   var render_network = function(nodes, edges, container_id, click_event) {
 12 |       nodes = _.map(nodes, function(node) {
 13 |           if (_.contains(queryparams.selected_query.task_cls, node.label)) {
 14 |             node.color = colors.self;
 15 |           } else {
 16 |             node.color = colors.requires;
 17 |           };
 18 |           return node;
 19 |       });
 20 | 
 21 |       // NOTE: original code is http://visjs.org/examples/network/nodeStyles/customGroups.html
 22 |       var container = $(container_id)[0];  // create a network
 23 |       var data = {
 24 |           nodes: nodes,
 25 |           edges: edges
 26 |       };
 27 |       var options = {
 28 |           nodes: {
 29 |               shape: 'dot',
 30 |               size: 20,
 31 |               font: {
 32 |                   size: 15,
 33 |                   color: '#000000'
 34 |               },
 35 |               borderWidth: 2
 36 |           },
 37 |           edges: {
 38 |               width: 2
 39 |           }
 40 |       };
 41 | 
 42 |       var network = new vis.Network(container, data, options);
 43 |       network.on("click", click_event);
 44 |   };
 45 | 
 46 | 
 47 |   var render_visualSearch = function(container_id, default_query, selected_query, vs_accepted_params) {
 48 |     var env_config_visualSearch = {
 49 |       "facet_values": (function() {
 50 |           var task_namespaces = _.map(["task_cls", "luiti_package"], function(param) {
 51 |             return {"label": param, "category": "Namespaces"};
 52 |           });
 53 |           var task_params= _.map(_.keys(default_query), function(param) {
 54 |             return {"label": param, "category": "Params"};
 55 |           });
 56 |         return task_params.concat(task_namespaces);
 57 |       })(),
 58 |     };
 59 | 
 60 |     var get_current_query = function(visualSearch) {
 61 |       var result = {};
 62 | 
 63 |       _.map(visualSearch.searchQuery.facets(), function(facet) {
 64 |         var kv = _.pairs(facet)[0];
 65 |         if (_.has(result, kv[0])) {
 66 |           result[kv[0]].push(kv[1]);
 67 |         } else {
 68 |           result[kv[0]] = [kv[1]];
 69 |         };
 70 |       });
 71 | 
 72 |       return result;
 73 |     }
 74 | 
 75 |     var vs_config = {
 76 |       container: $(container_id),
 77 |       query: '',
 78 |       autosearch: true,
 79 |       callbacks: {
 80 |         search: function(query, searchCollection) {
 81 |           return false;
 82 |         },
 83 |         facetMatches: function(callback) {
 84 |           callback(env_config_visualSearch["facet_values"]);
 85 |         },
 86 |         valueMatches: function(facet, searchTerm, callback) {
 87 |           // support smart match, from any position of strs.
 88 |           var orig_array = vs_accepted_params[facet];
 89 |           searchTerm = searchTerm.toLowerCase();
 90 |           var result = _.filter(orig_array , function(str) {
 91 |             return s.contains(str.toLowerCase(), searchTerm);
 92 |           });
 93 |           // dont work, see more details at search_fact.js#autocompleteValues
 94 |           return callback(result);
 95 |         },
 96 |         blur: function() {
 97 |           var result = get_current_query(visualSearch);
 98 | 
 99 |           // Update a React view.
100 |           group_summary.setState({"selected_luiti_packages": result["luiti_package"]})
101 |         },
102 |       }
103 |     };
104 | 
105 |     // Example format is: visualSearch.searchBox.value("Country: US State: \"New York\" Key: Value")
106 |     var load_params = function(query_opts) {
107 |       // support same key with multiple values.
108 |       var vs_values = [];
109 |       _.each(query_opts, function(opt_values, opt_key) {
110 |         _.each(opt_values, function(opt_value) {
111 |           vs_values = vs_values.concat(JSON.stringify(opt_key) + ": " + JSON.stringify(opt_value));
112 |         });
113 |       });
114 |       return vs_values.join(" ");
115 |     };
116 | 
117 |     // Run it!
118 |     var visualSearch = VS.init(vs_config);
119 | 
120 |     visualSearch.current_query = (function() {
121 |       var result = _.extend({}, selected_query, URI.parseQuery(URI(window.location)._parts.query));
122 |       // wrap value in a Array.
123 |       _.each(_.keys(result), function(key) {
124 |         if (!_.isArray(result[key])) {
125 |           result[key] = [result[key]];
126 |         };
127 |       });
128 |       return result;
129 |     })();
130 | 
131 |     visualSearch.setValue = function(opts) {
132 |       return visualSearch.searchBox.value(load_params(opts));
133 |     };
134 |     visualSearch.setValue(visualSearch.current_query);
135 | 
136 |     // support click query
137 |     var searchBox = visualSearch.options.container.find(".VS-icon-search");
138 |     searchBox.click(function(event) {
139 |       var result = get_current_query(visualSearch);
140 | 
141 |       // build a url query
142 |       var url = URI(window.location);
143 |       url._parts.query = "";
144 |       url.setQuery(result);
145 |       window.location = url.build();
146 | 
147 |       return false;
148 |     });
149 |     searchBox.css("cursor", "pointer");
150 | 
151 |     return visualSearch;
152 |   };
153 | 
154 | 
155 |   var render_header_title = function(title) {
156 |     $("head title").html(title);
157 |     $("body #header .title").html(title);
158 |   };
159 | 
160 |   var render_all = function(env) {
161 |     // 1. render network
162 |     render_network(nodeedge.nodes,
163 |                    nodeedge.edges,
164 |                    "#network",
165 |                    function (params) {
166 |                      console.log("[click a node on #network]", params);
167 |                      var task_id = params["nodes"][0]; // only one task can be clicked.
168 |                      // Delegate to show TaskDetailView
169 |                      $("#nodes_groups").find('.nodes_group li[data-task-id="' + task_id + '"]').click();
170 |                    });
171 | 
172 |     // 2. render visualSearch
173 |     env.visualSearch = render_visualSearch(".visual_search", queryparams.default_query, queryparams.selected_query, queryparams.accepted);
174 | 
175 |     // Other views.
176 |     render_header_title(title);
177 |   };
178 | 
179 |   var init_data_url = "init_data.json" + location.search;
180 | 
181 |   $.getJSON(init_data_url, function(data) {
182 |     // bind env's first level key to global `window` object.
183 |     _.each(data, function(value, key) {
184 |       window[key] = value;
185 |     });
186 |     window.env = data;
187 |     console.log("load data", env);
188 | 
189 |     // transform data
190 |     nodeedge.nodeid_to_node_dict = _.reduce(nodeedge.nodes, function(dict, node) {
191 |       dict[node.id] = node;
192 |       return dict;
193 |     }, {});
194 | 
195 |     render_all(env);
196 | 
197 |     // orig is <script type="text/jsx">, but we want to load jsx scripts manually here, iteract with Ajax loading JSON data.
198 |     $.get("assets/jsx/luiti.jsx", function(jsx_orig) {
199 |         var jsx_js = JSXTransformer.transform(jsx_orig).code;
200 |         window.renders = eval(jsx_js).renders;
201 | 
202 |         if (errors.load_tasks.length) {
203 |           renders.LoadTasksErrors(errors);
204 |         };
205 | 
206 |         renders.TaskGroupsSummary(ptm.task_package_names, ptm.package_to_task_clsnames, queryparams.selected_query.luiti_package);
207 |         renders.TaskGroups(nodeedge.nodes_groups);
208 | 
209 |         // Select first task instance.
210 |         var lis = $("#nodes_groups").find(".nodes_group ul li");
211 |         var selector_attrs_task_cls = _.map((queryparams.selected_query.task_cls || []), function(task_cls) {
212 |             return "[data-task_cls=" + task_cls + "]";
213 |           });
214 |         // e.g. "[data-task_cls*=Profile], [data-task_cls*=Dump]"
215 |         var selected_lis = lis.filter(selector_attrs_task_cls.join(", "));
216 |         if (selected_lis.length == 0) {
217 |           selected_lis = lis;
218 |         };
219 |         selected_lis.first().click();
220 |     });
221 |   });
222 | })(window);
223 | 


--------------------------------------------------------------------------------
/luiti/webui/assets/stylesheets/luiti.css:
--------------------------------------------------------------------------------
 1 | #header {
 2 |   margin-left: 15px;
 3 | }
 4 | #header .selected_params {
 5 |   padding: 20px;
 6 |   min-width: 400px;
 7 |   max-width: 800px;
 8 | }
 9 | #network_left {
10 |   width: 70%;
11 | }
12 | #network {
13 |   width: 100%;
14 |   height: 800px;
15 |   border: 0px solid #444444;
16 |   background-color: white;
17 | }
18 | #task_groups {
19 |   width: 30%;
20 | }
21 | #nodes_groups .nodes_group ul li, #task_groups_summary ul li {
22 |   cursor: pointer;
23 | }
24 | #nodes_groups .nodes_group ul li:hover {
25 |   background-color: yellow;
26 | }
27 | #nodes_groups .nodes_group ul li.highlighted {
28 |   background-color: yellow;
29 | }
30 | #task_groups_summary ul li {
31 |   list-style-type: none;
32 | }
33 | #task_groups_summary ul li input {
34 |   margin-right: 10px;
35 | }
36 | 
37 | #task_detail table.table {
38 |   table-layout: fixed;
39 | }
40 | #task_detail table.table td:first-child {
41 |   width: 15%;
42 | }
43 | 
44 | #task_detail table td .task-link {
45 |   margin: 5px;
46 | }
47 | #task_detail table td .task-link a {
48 |   color: white;
49 | }
50 | 


--------------------------------------------------------------------------------
/luiti/webui/bower.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "luiti",
 3 |   "version": "0.1.4",
 4 |   "homepage": "https://github.com/luiti/luiti",
 5 |   "authors": [
 6 |     "David Chen <mvjome@gmail.com>"
 7 |   ],
 8 |   "description": "luiti server's web ui.",
 9 |   "main": "luiti.js",
10 |   "moduleType": [
11 |     "amd"
12 |   ],
13 |   "dependencies": {
14 |     "bootstrap": "~= 3.3",
15 |     "vis": "~= 4.2",
16 |     "uri.js": "~= 1.15",
17 |     "underscore.string": "~= 3.1",
18 |     "visualsearch": "~= 0.5",
19 |     "react": "~= 0.13"
20 |   },
21 |   "devDependencies": {
22 |   },
23 |   "keywords": [
24 |     "luiti",
25 |     "luigi",
26 |     "DAG",
27 |     "visualiser"
28 |   ],
29 |   "license": "MIT",
30 |   "ignore": [
31 |     "**/.*",
32 |     "node_modules",
33 |     "bower_components",
34 |     "test",
35 |     "tests"
36 |   ]
37 | }
38 | 


--------------------------------------------------------------------------------
/luiti/webui/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | 
 4 |     <head>
 5 |         <title></title>
 6 | 
 7 |         <script src="bower_components/visualsearch/build-min/dependencies.js"></script> <!-- jQuery & jQuery UI & underscore & Backbone -->
 8 | 
 9 |         <link href="bower_components/bootstrap/dist/css/bootstrap.min.css" rel="stylesheet">
10 |         <script src="bower_components/bootstrap/dist/js/bootstrap.min.js"></script>
11 | 
12 |         <script src="bower_components/vis/dist/vis.min.js"></script>
13 |         <link href="bower_components/vis/dist/vis.min.css" rel="stylesheet" >
14 | 
15 |         <link media="screen" type="text/css" rel="stylesheet" href="bower_components/visualsearch/build-min/visualsearch.css">
16 |         <link media="screen" type="text/css" rel="stylesheet" href="bower_components/visualsearch/build-min/visualsearch-datauri.css">
17 | 
18 |         <script src="bower_components/visualsearch/build-min/visualsearch.js"></script>
19 |         <script src="bower_components/uri.js/src/URI.min.js"></script>
20 |         <script src="bower_components/underscore.string/dist/underscore.string.js"></script>
21 | 
22 |         <script src="bower_components/react/react-with-addons.min.js"></script>
23 |         <script src="bower_components/react/JSXTransformer.js"></script>
24 | 
25 |         <link media="screen" type="text/css" rel="stylesheet" href="assets/stylesheets/luiti.css">
26 | 
27 |         <meta name="viewport" content="width=device-width, initial-scale=1.0">
28 |     </head>
29 | 
30 |     <body>
31 | 
32 |       <div id="load_tasks_errors"></div>
33 | 
34 |       <div id="header">
35 |         <h1 class="title pull-left"></h1>
36 |         <div class="visual_search selected_params pull-right"></div>
37 |         <div class="clearfix"></div>
38 |       </div>
39 | 
40 |       <div id="network_wrap">
41 |         <div id="network_left" class="pull-left">
42 |           <div id="task_detail" class=""></div>
43 |           <div id="network"></div>
44 |         </div>
45 |         <div id="task_groups" class="pull-right">
46 |           <div id="task_groups_summary"></div>
47 |           <div id="nodes_groups"></div>
48 |         </div>
49 |         <div class="clearfix"></div>
50 |       </div>
51 | 
52 |       <script src="assets/javascripts/luiti.js" type="text/javascript"></script>
53 | 
54 |     </body>
55 | </html>
56 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dchentech/luiti/11a5c62b265a92910a1d4c82431e3697b8b06814/requirements.txt


--------------------------------------------------------------------------------
/screenshots/README.markdown:
--------------------------------------------------------------------------------
 1 | Luiti WebUI screenshots
 2 | ===========================
 3 | 
 4 | 
 5 | reduce PNG size
 6 | ---------------------------
 7 | ```bash
 8 | brew install pngquant
 9 | pngquant --quality 20-70 ~/Desktop/luiti\ screenshots\ copy/*.png
10 | ```
11 | 


--------------------------------------------------------------------------------
/screenshots/luiti_code_show.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dchentech/luiti/11a5c62b265a92910a1d4c82431e3697b8b06814/screenshots/luiti_code_show.png


--------------------------------------------------------------------------------
/screenshots/luiti_webui_list.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dchentech/luiti/11a5c62b265a92910a1d4c82431e3697b8b06814/screenshots/luiti_webui_list.png


--------------------------------------------------------------------------------
/screenshots/luiti_webui_show.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dchentech/luiti/11a5c62b265a92910a1d4c82431e3697b8b06814/screenshots/luiti_webui_show.png


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | 
 4 | import os
 5 | from setuptools import setup
 6 | 
 7 | 
 8 | def get_static_files(root):
 9 |     return [os.path.join(path, name)
10 |             for path, subdirs, files in os.walk(root)
11 |             for name in files]
12 | package_data = sum(map(get_static_files,
13 |                        ["luiti/java/",
14 |                         "luiti/webui/assets/",
15 |                         "luiti/webui/bower_components/",
16 |                         ]), [])
17 | package_data += ["luiti/webui/index.html"]
18 | 
19 | 
20 | setup(
21 |     name='luiti',
22 |     version='0.2.2',
23 |     url='http://github.com/luiti/luiti/',
24 |     license='MIT',
25 |     author='David Chen',
26 |     author_email=''.join(reversed("moc.liamg@emojvm")),
27 |     description='Luiti = Luigi + time',
28 |     long_description=open("README.markdown").read(),
29 |     packages=[
30 |                 'luiti',
31 |                 'luiti/daemon',
32 |                 'luiti/daemon/query_engine',
33 |                 'luiti/daemon/utils',
34 |                 'luiti/daemon/web',
35 |                 'luiti/luigi_decorators',
36 |                 'luiti/luigi_extensions',
37 |                 'luiti/manager',
38 |                 'luiti/schedule/',
39 |                 'luiti/task_templates/',
40 |                 'luiti/task_templates/time',
41 |                 'luiti/task_templates/other',
42 |                 'luiti/tests',
43 |                 'luiti/utils', ],
44 |     scripts=[
45 |         'bin/luiti',
46 |     ],
47 | 
48 |     package_data={'luiti': package_data},
49 |     include_package_data=True,
50 | 
51 |     zip_safe=False,
52 |     platforms='any',
53 |     install_requires=[
54 |         # 1. luigi related
55 |         "luigi         >=2.0,<2.2",
56 |         "snakebite>=2.5,<2.6",
57 |         "protobuf>=2.6,<2.7",
58 |         "tornado>=4.0,<4.1",
59 |         "mechanize>=0.2,<0.3",
60 |         "python-daemon>=1.6,<1.7",
61 |         "MySQL-python>=1.2,<1.3",
62 |         "pymongo>=3.0",
63 | 
64 |         # 2. luiti self
65 |         "etl_utils>=0.1,<0.2",
66 |         "arrow>=0.4,<0.5",
67 |         "inflector>=2.0,<2.1",
68 |         "pygments>=2.0,<2.1",
69 |         "ujson",
70 |         "jsonpickle",
71 |         "six",
72 |         "tabulate",
73 |         "toposort>=1.0,<1.1",
74 |     ],
75 |     classifiers=[
76 |         'Intended Audience :: Developers',
77 |         'Operating System :: OS Independent',
78 |         'Programming Language :: Python',
79 |         'Topic :: Software Development :: Libraries :: Python Modules'
80 |     ],
81 | )
82 | 


--------------------------------------------------------------------------------
/tests/client.cfg:
--------------------------------------------------------------------------------
1 | [hdfs]
2 | client: snakebite
3 | namenode_host: localhost
4 | namenode_port: 8020
5 | 
6 | 


--------------------------------------------------------------------------------
/tests/jsons_data/mr_local.json:
--------------------------------------------------------------------------------
1 | {"uid": 1}
2 | {"uid": 1}
3 | {"uid": 1}
4 | {"uid": 2}
5 | {"uid": 3}
6 | 


--------------------------------------------------------------------------------
/tests/project_A/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dchentech/luiti/11a5c62b265a92910a1d4c82431e3697b8b06814/tests/project_A/__init__.py


--------------------------------------------------------------------------------
/tests/project_A/luiti_tasks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dchentech/luiti/11a5c62b265a92910a1d4c82431e3697b8b06814/tests/project_A/luiti_tasks/__init__.py


--------------------------------------------------------------------------------
/tests/project_A/luiti_tasks/__init_luiti.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | __all__ = ["luigi", "TaskDay", "cached_property", "TaskDayHadoop",
 4 |            "json", "MRUtils", ]
 5 | 
 6 | import os
 7 | import sys
 8 | root_dir = os.path.dirname(
 9 |     os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
10 | sys.path.insert(0, root_dir)
11 | os.environ['LUIGI_CONFIG_PATH'] = root_dir + '/tests/client.cfg'
12 | 
13 | 
14 | from luiti import luigi, TaskDay, cached_property, TaskDayHadoop, json, MRUtils
15 | luigi.plug_packages(
16 |     "project_B",        # dep project
17 |     "etl_utils==0.1.10",  # just for test import
18 |     "zip_package_by_luiti",  # zip file package
19 | )
20 | 


--------------------------------------------------------------------------------
/tests/project_A/luiti_tasks/a_day.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | from .__init_luiti import luigi, TaskDay, cached_property
 4 | 
 5 | 
 6 | @luigi.ref_tasks("BDay", "CDay")
 7 | class ADay(TaskDay):
 8 | 
 9 |     root_dir = "/foobar"
10 | 
11 |     def requires(self):
12 |         return [self.BDay_task, self.CDay_task]
13 | 
14 |     @cached_property
15 |     def count(self):
16 |         return 1
17 | 
18 |     @cached_property
19 |     def total_count(self):
20 |         return self.count + self.BDay_task.count + self.CDay_task.count
21 | 


--------------------------------------------------------------------------------
/tests/project_A/luiti_tasks/b_day.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | from .__init_luiti import cached_property, TaskDay
 4 | 
 5 | 
 6 | class BDay(TaskDay):
 7 | 
 8 |     root_dir = "/foobar"
 9 | 
10 |     @cached_property
11 |     def count(self):
12 |         return 2
13 | 


--------------------------------------------------------------------------------
/tests/project_A/luiti_tasks/c_day.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | from .__init_luiti import luigi, cached_property, TaskDay
 4 | 
 5 | 
 6 | @luigi.ref_tasks("FoobarDay")
 7 | class CDay(TaskDay):
 8 | 
 9 |     root_dir = "/foobar"
10 | 
11 |     def requires(self):
12 |         self.FoobarDay_task
13 | 
14 |     @cached_property
15 |     def count(self):
16 |         return 3
17 | 


--------------------------------------------------------------------------------
/tests/project_A/luiti_tasks/d_day.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | from .__init_luiti import cached_property, TaskDay, luigi
 4 | 
 5 | 
 6 | @luigi.ref_tasks("HDay")
 7 | class DDay(TaskDay):
 8 | 
 9 |     root_dir = "/foobar"
10 | 
11 |     def requires(self):
12 |         return [self.HDay_task]
13 | 
14 |     @cached_property
15 |     def count(self):
16 |         return 4
17 | 
18 |     @cached_property
19 |     def total_count(self):
20 |         return self.count + self.HDay_task.count
21 | 


--------------------------------------------------------------------------------
/tests/project_A/luiti_tasks/foobar_day.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | from .__init_luiti import TaskDayHadoop, MRUtils
 4 | 
 5 | 
 6 | class FoobarDay(TaskDayHadoop):
 7 |     """
 8 |     A MapReduce Python Program written in Luiti Task Style, including test case.
 9 |     """
10 | 
11 |     root_dir = "/foobar"
12 | 
13 |     def mapper(self, line1):
14 |         d2 = MRUtils.json_parse(line1)
15 |         yield d2['uid'], d2
16 | 
17 |     def reducer(self, uid1, d1):
18 |         yield '', MRUtils.str_dump({
19 |             "uid": uid1,
20 |             "total": sum([i2['count'] for i2 in d1]),
21 |             "ref": self.ref,
22 |         })
23 | 
24 |     ref = NotImplementedError
25 | 
26 |     def mrtest_input(self):
27 |         return u"""
28 | {"uid": 1, "count": 2}
29 | {"uid": 1, "count": 3}
30 | {"uid": 2, "count": 1}
31 | """
32 | 
33 |     def mrtest_output(self):
34 |         return u"""
35 | {"uid": 1, "total": 5, "ref": "foobar"}
36 | {"uid": 2, "total": 1, "ref": "foobar"}
37 | """
38 | 
39 |     def mrtest_attrs(self):
40 |         return {
41 |             "ref": "foobar",
42 |         }
43 | 


--------------------------------------------------------------------------------
/tests/project_A/luiti_tasks/import_packages_day.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | from .__init_luiti import TaskDay, cached_property
 4 | 
 5 | 
 6 | class ImportPackagesDay(TaskDay):
 7 | 
 8 |     root_dir = "/foobar"
 9 | 
10 |     @cached_property
11 |     def egg_library(self):
12 |         import zip_package_by_luiti  # test import library from zip file
13 |         return zip_package_by_luiti
14 | 


--------------------------------------------------------------------------------
/tests/project_A/luiti_tasks/multiple_dependent_day.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | from .__init_luiti import luigi, TaskDay
 4 | 
 5 | 
 6 | @luigi.ref_tasks("FoobarDay")
 7 | class MultipleDependentDay(TaskDay):
 8 | 
 9 |     root_dir = "/foobar"
10 | 
11 |     def requires(self):
12 |         return self.FoobarDay_task
13 | 


--------------------------------------------------------------------------------
/tests/project_B/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dchentech/luiti/11a5c62b265a92910a1d4c82431e3697b8b06814/tests/project_B/__init__.py


--------------------------------------------------------------------------------
/tests/project_B/luiti_tasks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dchentech/luiti/11a5c62b265a92910a1d4c82431e3697b8b06814/tests/project_B/luiti_tasks/__init__.py


--------------------------------------------------------------------------------
/tests/project_B/luiti_tasks/__init_luiti.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 
3 | __all__ = ["luigi", "TaskDay", "cached_property"]
4 | 
5 | from luiti import luigi, TaskDay, cached_property
6 | 


--------------------------------------------------------------------------------
/tests/project_B/luiti_tasks/h_day.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | from luiti import TaskDay, cached_property, luigi
 4 | 
 5 | 
 6 | @luigi.ref_tasks("MultipleDependentDay")
 7 | class HDay(TaskDay):
 8 | 
 9 |     root_dir = "/foobar"
10 | 
11 |     def requires(self):
12 |         return self.MultipleDependentDay_task
13 | 
14 |     @cached_property
15 |     def count(self):
16 |         return 8
17 | 


--------------------------------------------------------------------------------
/tests/test_main.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import os
 4 | import sys
 5 | root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 6 | sys.path.insert(0, root_dir)
 7 | os.environ['LUIGI_CONFIG_PATH'] = root_dir + '/tests/client.cfg'
 8 | 
 9 | import unittest
10 | 
11 | 
12 | class TestLuiti(unittest.TestCase):
13 | 
14 |     def test_check_date_range(self):
15 |         from luiti import luigi, TaskHour, arrow
16 | 
17 |         @luigi.check_date_range()
18 |         class CheckDateRangeExampleHour(TaskHour):
19 |             root_dir = "/foobar"
20 | 
21 |             def run(self):
22 |                 return "data"
23 | 
24 |         prev_hour = arrow.now().replace(hours=-1)
25 | 
26 |         # 这周得数据得下周跑
27 |         self.assertEqual(CheckDateRangeExampleHour(prev_hour).run(), 'data')
28 |         self.assertEqual(CheckDateRangeExampleHour(arrow.now()).run(), False)
29 | 
30 |     def test_check_runtime_range(self):
31 |         from luiti import luigi, TaskWeek, arrow
32 | 
33 |         @luigi.check_runtime_range(hour_num=[5, 6, 7, 8], weekday_num=[1], )
34 |         class CheckRuntimeRangeExampleWeek(TaskWeek):
35 |             root_dir = "/foobar"
36 | 
37 |             def run(self):
38 |                 return "data"
39 | 
40 |         day_1 = arrow.get("2014-09-01 06:28")  # valid
41 |         self.assertTrue(day_1)
42 | 
43 |         def func(d1):
44 |             # overwrite arrow's method directly.
45 |             arrow.now = lambda: arrow.get(d1)
46 |             return CheckRuntimeRangeExampleWeek(d1).run()
47 | 
48 |         self.assertEqual(func("2014-09-01 09:00"), False)
49 |         self.assertEqual(func("2014-09-02 06:28"), False)
50 |         self.assertEqual(func("2014-09-01 04:28"), False)
51 |         self.assertEqual(func("2014-09-01 05:00"), "data")
52 |         self.assertEqual(func("2014-09-01 06:28"), "data")
53 |         self.assertEqual(func("2014-09-01 08:59"), "data")
54 | 
55 | 
56 | if __name__ == '__main__':
57 |     unittest.main()
58 | 


--------------------------------------------------------------------------------
/tests/test_manager.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import os
  4 | import sys
  5 | RootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
  6 | sys.path.insert(0, RootDir)
  7 | os.environ['LUIGI_CONFIG_PATH'] = RootDir + '/tests/client.cfg'
  8 | 
  9 | import unittest
 10 | import mock
 11 | 
 12 | from luiti import manager
 13 | from luiti.tests import date_begin
 14 | 
 15 | sys.path.insert(0, os.path.join(
 16 |     RootDir, "tests/zip_package_by_luiti"))
 17 | 
 18 | 
 19 | class TestManager(unittest.TestCase):
 20 | 
 21 |     def setUp(self):
 22 |         # change work dir
 23 |         os.chdir(os.path.join(RootDir, "tests/project_A"))
 24 | 
 25 |     def test_Loader(self):
 26 |         self.assertEqual(
 27 |             manager.load_a_task_by_name("ADay"),
 28 |             manager.load_a_task_by_name("a_day"),
 29 |         )
 30 | 
 31 |         self.assertRaises(
 32 |             AssertionError,
 33 |             lambda: manager.load_a_task_by_name("not_exists_day"),
 34 |         )
 35 |         os.chdir(RootDir)
 36 | 
 37 |     def test_get_all_date_file_to_task_instances(self):
 38 |         ADay = manager.load_a_task_by_name("ADay")
 39 |         BDay = manager.load_a_task_by_name("BDay")
 40 |         files = manager.get_all_date_file_to_task_instances("20140901-20140903", [ADay, BDay])
 41 |         self.assertEqual(['/foobar/2014-09-01/a_day.json',
 42 |                           '/foobar/2014-09-01/b_day.json',
 43 |                           '/foobar/2014-09-02/a_day.json',
 44 |                           '/foobar/2014-09-02/b_day.json',
 45 |                           '/foobar/2014-09-03/a_day.json',
 46 |                           '/foobar/2014-09-03/b_day.json'],
 47 |                          sorted(files.keys()))
 48 | 
 49 |     def test_load_all_tasks(self):
 50 |         all_tasks = manager.load_all_tasks()
 51 |         self.assertEqual(manager.ld.result, all_tasks)  # cause they'are linked.
 52 | 
 53 |         HDay = manager.load_a_task_by_name("HDay")
 54 |         self.assertTrue(HDay in manager.ld.all_task_classes, "project B is also loaded.")
 55 | 
 56 |     def test_find_dep_on_tasks(self):
 57 |         # simple case
 58 |         # ADay is dep on BDay, ADay is inputed into BDay.
 59 |         BDay = manager.load_a_task_by_name("BDay")
 60 |         dep_tasks_by_BDay = manager.find_dep_on_tasks(BDay, manager.ld.all_task_classes)
 61 |         self.assertEqual(len(dep_tasks_by_BDay), 1)
 62 |         self.assertEqual(dep_tasks_by_BDay[0].__name__, "ADay")
 63 | 
 64 |         # complex case
 65 |         #   MultipleDependentDay => HDay => DDay
 66 |         #   delete MultipleDependentDay, and delete HDay and DDay.
 67 |         MultipleDependentDay = manager.load_a_task_by_name("MultipleDependentDay")
 68 |         dep_tasks_by_MultipleDependentDay = manager.find_dep_on_tasks(MultipleDependentDay, manager.ld.all_task_classes)
 69 |         self.assertEqual(len(dep_tasks_by_MultipleDependentDay), 2)
 70 |         self.assertEqual(sorted(map(lambda i1: i1.__name__, dep_tasks_by_MultipleDependentDay)), ["DDay", "HDay"])
 71 | 
 72 |     def test_generate_a_task(self):
 73 |         dir1 = "/tmp/test_generate_a_task/"
 74 |         os.system("rm -rf %s" % dir1)  # clean prev error
 75 |         os.system("mkdir -p %s/luiti_tasks" % dir1)
 76 |         os.chdir(dir1)
 77 | 
 78 |         content_a = manager.generate_a_task("ADay")
 79 |         self.assertTrue("ADay" in content_a)
 80 |         self.assertTrue("TaskDay" in content_a)
 81 | 
 82 |         content_b = manager.generate_a_task("b_week")
 83 |         self.assertTrue("BWeek" in content_b)
 84 |         self.assertTrue("TaskWeek" in content_b)
 85 | 
 86 |         os.system("rm -rf %s" % dir1)
 87 |         os.chdir(RootDir)
 88 | 
 89 |     def test_new_a_project(self):
 90 |         os.chdir("/")  # fix chdir err
 91 |         dir1 = "/tmp/test_new_a_project/"
 92 |         os.system("rm -rf %s" % dir1)  # clean prev error
 93 |         os.system("mkdir -p %s" % dir1)
 94 |         os.chdir(dir1)
 95 | 
 96 |         files = manager.new_a_project("project_c")
 97 | 
 98 |         self.assertTrue("Project C" in file(files[0]).read())
 99 |         self.assertTrue("zip_safe" in file(files[1]).read())
100 |         self.assertTrue("luigi.plug_packages" in file(files[2]).read())
101 |         self.assertTrue("@MrTestCase" in file(files[3]).read())
102 | 
103 |         os.chdir("project_c")
104 |         os.system("python tests/test_main.py")
105 |         os.chdir("..")
106 | 
107 |         os.system("rm -rf %s" % dir1)
108 |         os.chdir(RootDir)
109 | 
110 |     def test_CLI(self):
111 |         from luiti.manager.cli import Cli
112 | 
113 |         cli = Cli(["luiti", "ls"])
114 |         self.assertTrue("ArgumentParser" in repr(cli.parser))
115 |         self.assertTrue(callable(cli.load_a_task_by_name))
116 | 
117 |         self.assertTrue(cli.executor)
118 | 
119 |         for subcommand in cli.subparsers.choices.keys():
120 |             # Dumb test, just test function exists.
121 |             # TODO but dont works
122 |             self.assertTrue(callable(getattr(cli.executor, subcommand)))
123 | 
124 |         from luiti.manager.cli import bool_type
125 |         self.assertEqual(bool_type("False"), False)
126 |         self.assertEqual(bool_type("false"), False)
127 | 
128 |     def test_SysArgv(self):
129 |         from luiti.manager.sys_argv import SysArgv
130 |         from luiti.manager.cli import Cli
131 | 
132 |         def func(argv_in, argv_ou):
133 |             cli = Cli(argv_in)
134 |             self.assertEqual(SysArgv.convert_to_luigi_accepted_argv(cli.subparsers, argv_in), argv_ou)
135 | 
136 |         func(["luiti", "info", "--task-name", "HelloDay", "--date-value", date_begin], ['luiti', '--date-value', date_begin])
137 |         func(["luiti", "info", "--task-name=HelloDay"], ['luiti'])
138 | 
139 |     def test_Table(self):
140 |         # TODO add more tests
141 |         from luiti.manager.table import Table
142 |         ADay = manager.load_a_task_by_name("ADay")
143 |         self.assertEqual(Table.print_task_info(ADay), ([['Tasks self dep on', "['BDay', 'CDay']"], ['Tasks dep on self', '[]']], ['Task name', 'ADay']))
144 | 
145 |         from luiti.manager.lazy_data import ld
146 |         self.assertTrue(len(Table.print_all_tasks(ld.result)[0]) > 6, """Example data is ([[1, 'ADay', 'project_A'], [2, 'BDay', 'project_A'], [3, 'CDay', 'project_A'], [4, 'DDay', 'project_A'], [5, 'FoobarDay', 'project_A'], [6, 'HDay', 'project_B'], [7, 'ImportPackagesDay', 'project_A'], [8, 'MultipleDependentDay', 'project_A'], ['total', 8, '']], ['', 'All Tasks', 'luiti_package'])""")
147 | 
148 |     @mock.patch("luigi.hdfs.clients.rename")
149 |     @mock.patch("luigi.hdfs.clients.exists")
150 |     def test_Files(self, exists, rename):
151 |         from luiti.manager.files import Files
152 | 
153 |         exists.return_value = True
154 |         rename.return_value = True
155 |         self.assertEqual(Files.soft_delete_files("hello", "world"), 0)
156 | 
157 |     def test_ManageDecorators(self):
158 |         from luiti.luigi_extensions.manage_decorators import ManageDecorators
159 |         from luiti import luigi
160 |         luigi = ManageDecorators.bind_to(luigi)  # actually it's already runned by luiti.luigi_extensions.__init__
161 |         self.assertTrue("as_a_luiti_task" in dir(luigi))
162 | 
163 | if __name__ == '__main__':
164 |     unittest.main()
165 | 


--------------------------------------------------------------------------------
/tests/test_mr_test_case.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import os
 4 | import sys
 5 | RootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 6 | sys.path.insert(0, RootDir)
 7 | os.environ['LUIGI_CONFIG_PATH'] = RootDir + '/tests/client.cfg'
 8 | 
 9 | from luiti.tests import SetupLuitiPackages
10 | config = SetupLuitiPackages.config
11 | 
12 | import unittest
13 | from luiti import MrTestCase
14 | 
15 | 
16 | @MrTestCase
17 | class TestMrTestCase(unittest.TestCase):
18 | 
19 |     mr_task_names = [
20 |         'FoobarDay',
21 |     ]
22 | 
23 | if __name__ == '__main__':
24 |     unittest.main()
25 | 


--------------------------------------------------------------------------------
/tests/test_schedule.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import os
 4 | import sys
 5 | root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 6 | sys.path.insert(0, root_dir)
 7 | 
 8 | import unittest
 9 | 
10 | from luiti.tests import SetupLuitiPackages
11 | config = SetupLuitiPackages.config
12 | 
13 | from luiti.schedule import SensorSchedule
14 | from luiti import luigi, TaskDay, manager
15 | 
16 | 
17 | class TestSensorSchedule(unittest.TestCase):
18 | 
19 |     def test_read_all_required_tasks(self):
20 |         BetaReportDay = manager.load_a_task_by_name("BetaReportDay")
21 |         ss = SensorSchedule(BetaReportDay, "2014-09-01", False)
22 | 
23 |         result = map(lambda i1: i1.task_clsname, ss.ordered_task_instances_list)
24 |         self.assertEqual(result, ['DumpBrowserMapDay', 'DumpWebLogDay', 'CleanWebLogDay', 'CounterVisitorByBrowserDay', 'CounterVisitorByRegionDay', 'CounterVisitorDay', 'BetaReportDay'])
25 | 
26 |     def test_is_external(self):
27 |         class ExampleExternalTask(luigi.ExternalTask):
28 |             pass
29 |         self.assertTrue(SensorSchedule.is_external(ExampleExternalTask()))
30 | 
31 |         class LuitiTaskDay(TaskDay):
32 |             is_external = True
33 |             root_dir = "/foobar"
34 |         self.assertTrue(SensorSchedule.is_external(LuitiTaskDay(date_value="2014-09-01")))
35 | 
36 | 
37 | if __name__ == '__main__':
38 |     unittest.main()
39 | 


--------------------------------------------------------------------------------
/tests/test_task.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import os
 4 | import sys
 5 | root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 6 | sys.path.insert(0, root_dir)
 7 | os.environ['LUIGI_CONFIG_PATH'] = root_dir + '/tests/client.cfg'
 8 | 
 9 | import unittest
10 | 
11 | 
12 | class TestLuitiUtils(unittest.TestCase):
13 | 
14 |     def test_main(self):
15 |         from luiti import TaskWeek, ArrowParameter
16 | 
17 |         class HelloWorldWeek(TaskWeek):
18 |             root_dir = "/foobar"
19 | 
20 |         # Tuesday
21 |         task1 = HelloWorldWeek("2014-09-02")
22 |         # Monday
23 |         self.assertEqual(task1.date_value, ArrowParameter.get("2014-09-01"))
24 | 
25 |         self.assertEqual(task1.data_dir, "/foobar/2014-09-01")
26 |         self.assertEqual(
27 |             task1.data_file, "/foobar/2014-09-01/hello_world_week.json")
28 |         self.assertEqual(task1.date_str, "2014-09-01")
29 |         self.assertEqual(task1.date_type, "week")
30 |         self.assertEqual(
31 |             task1.date_value_by_type_in_last, ArrowParameter.get("2014-08-25"))
32 |         self.assertEqual(task1.task_class, HelloWorldWeek)
33 | 
34 |     def test_RootTask(self):
35 |         from luiti import RootTask
36 |         output_path = RootTask().output().path
37 |         self.assertTrue("luiti/luigi_extensions/root_task.py" in output_path, output_path)
38 | 
39 | 
40 | if __name__ == '__main__':
41 |     unittest.main()
42 | 


--------------------------------------------------------------------------------
/tests/test_task_templates.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import os
  4 | import sys
  5 | root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
  6 | sys.path.insert(0, root_dir)
  7 | os.environ['LUIGI_CONFIG_PATH'] = root_dir + '/tests/client.cfg'
  8 | 
  9 | import mock
 10 | import unittest
 11 | 
 12 | from luiti.tests import date_begin
 13 | from etl_utils import cached_property
 14 | 
 15 | 
 16 | class TestLuitiUtils(unittest.TestCase):
 17 | 
 18 |     @mock.patch("os.system")
 19 |     def test_MongoImportTask(self, os_system, ):
 20 |         os_system.return_value = 0
 21 | 
 22 |         from luiti import MongoImportTask
 23 | 
 24 |         class AnotherMongoDay(MongoImportTask):
 25 |             root_dir = "/tmp"
 26 | 
 27 |             mongodb_connection_address = ('192.168.20.111', 37001)
 28 |             database_name = "17zuoye_crm"
 29 |             collection_name = "teacher_report"
 30 |             tmp_filepath = "/foobar.json"
 31 |             data_file_collection_model = "MongoCollection(foobar)"
 32 | 
 33 |             is_collection_exists = lambda self: True
 34 | 
 35 |         mongo_task = AnotherMongoDay(date_value=date_begin)
 36 | 
 37 |         self.assertEqual(mongo_task.mongodb_connection_host, "192.168.20.111")
 38 |         self.assertEqual(mongo_task.mongodb_connection_port, 37001)
 39 |         self.assertEqual(mongo_task.mongoimport_command, "/usr/bin/mongoimport --host 192.168.20.111 --port 37001 --db 17zuoye_crm --collection teacher_report --file /foobar.json")
 40 |         self.assertEqual(mongo_task.tmp_dir, "/tmp/AnotherMongoDay")
 41 | 
 42 |         self.assertFalse(mongo_task.run())
 43 | 
 44 |     def test_StaticFile(self):
 45 |         from luiti import StaticFile, luigi
 46 | 
 47 |         class FoobarFileDay(StaticFile):
 48 |             data_file = "/foobar"
 49 |             IODevice = luigi.LocalTarget
 50 |         self.assertEqual(FoobarFileDay().output().path, "/foobar")
 51 | 
 52 |         class OldFoobarFileDay(StaticFile):
 53 |             filepath = "/foobar"
 54 |             IODevice = luigi.LocalTarget
 55 |         self.assertEqual(OldFoobarFileDay().output().path, "/foobar")
 56 |         self.assertTrue(OldFoobarFileDay().complete())
 57 |         self.assertFalse(OldFoobarFileDay().run())
 58 | 
 59 |     def test_TaskDate(self):
 60 |         from luiti.task_templates import TaskMonth, TaskDay
 61 | 
 62 |         class AnotherMonthDay(TaskMonth):
 63 |             root_dir = "/tmp"
 64 | 
 65 |         class AnotherDay(TaskDay):
 66 |             root_dir = "/tmp"
 67 | 
 68 |         m1 = AnotherMonthDay(date_value=date_begin)
 69 |         self.assertEqual(len(m1.days_in_month), 30)
 70 | 
 71 |         m2 = AnotherDay(date_value="2015-07-20")
 72 |         self.assertEqual(m2.latest_30_days[0].format('YYYY-MM-DD'), '2015-06-21')
 73 |         self.assertEqual(m2.latest_30_days[-1].format('YYYY-MM-DD'), '2015-07-20')
 74 |         self.assertEquals(len(m2.latest_30_days), 30)
 75 | 
 76 |         m3 = AnotherDay(date_value="2015-07-20")
 77 |         self.assertEquals(m3.latest_7_days[0].format('YYYY-MM-DD'), '2015-07-14')
 78 |         self.assertEqual(m3.latest_7_days[-1].format('YYYY-MM-DD'), '2015-07-20')
 79 |         self.assertEquals(len(m3.latest_7_days), 7)
 80 | 
 81 |     def test_HiveTask(self):
 82 |         from luiti.task_templates import HiveTask
 83 | 
 84 |         class AnotherHiveDay(HiveTask):
 85 |             run_mode = "local"  # dont print when run unit test
 86 |             root_dir = "/another/hive/result/"
 87 |             use_hive_db = "main_hive_database"
 88 | 
 89 |             @cached_property
 90 |             def sql_main(self):
 91 |                 return "select * from example_table where dt=%s;" % self.date_str
 92 | 
 93 |         h1 = AnotherHiveDay(date_value=date_begin)
 94 |         self.assertEqual(h1.sql_main, "select * from example_table where dt=2014-09-01;")
 95 |         self.assertEqual(h1.query(), "USE main_hive_database; INSERT OVERWRITE DIRECTORY \"/another/hive/result/2014-09-01/another_hive_day.json\" select * from example_table where dt=2014-09-01;")
 96 | 
 97 |         class CompatibilityHiveDay(HiveTask):
 98 |             """ test old API """
 99 |             data_root = "/foobar"
100 |             hive_db = "foobar"
101 | 
102 |         h2 = CompatibilityHiveDay(date_value=date_begin)
103 |         self.assertEqual(h2.root_dir, "/foobar")
104 |         self.assertEqual(h2.use_hive_db, "foobar")
105 | 
106 |     def test_requires_with_prev_week(self):
107 |         from luiti.task_templates import TaskDay, TaskWeek
108 | 
109 |         class OneDay(TaskDay):
110 |             root_dir = "/tmp"
111 | 
112 |         class AnotherWeek(TaskWeek):
113 |             root_dir = "/tmp"
114 | 
115 |         w1 = AnotherWeek(date_value=date_begin)
116 |         tasks = w1.requires_with_prev_week(OneDay)
117 |         self.assertEqual(len(tasks), 8)
118 | 
119 | 
120 | if __name__ == '__main__':
121 |     unittest.main()
122 | 


--------------------------------------------------------------------------------
/tests/test_utils.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import os
  4 | import sys
  5 | root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
  6 | sys.path.insert(0, root_dir)
  7 | os.environ['LUIGI_CONFIG_PATH'] = root_dir + '/tests/client.cfg'
  8 | 
  9 | import unittest
 10 | import mock
 11 | from luigi.mock import MockTarget
 12 | 
 13 | 
 14 | class HdfsFile(MockTarget):
 15 |     pass
 16 | 
 17 | 
 18 | class TestLuitiUtils(unittest.TestCase):
 19 | 
 20 |     def test_mr(self):
 21 |         from luiti import MRUtils
 22 | 
 23 |         item1 = {"class_id": 3, "uid": 7}
 24 |         self.assertEqual(MRUtils.mr_key(item1), "3@@7")
 25 |         self.assertEqual(MRUtils.mr_key(item1, "hid009"), "3@@7@@hid009")
 26 | 
 27 |         self.assertEqual(MRUtils.json_parse("{\"你好\":\"世界\"}"), {u"你好": u"世界"})
 28 | 
 29 |         self.assertFalse(MRUtils.is_mr_line("[1,2]"))
 30 |         self.assertTrue(MRUtils.is_mr_line("hello\t{framework:luigi}"))
 31 |         self.assertTrue(MRUtils.is_mr_line("1@@" + "2" * 40 + "\t[world]"))
 32 | 
 33 |         self.assertEqual(
 34 |             MRUtils.unicode_value({u"hello": u"世界"}, "hello"), u"世界")
 35 | 
 36 |         self.assertEqual(
 37 |             MRUtils.split_mr_kv("hello\t[1,2,3,4]"), ["hello", [1, 2, 3, 4]])
 38 | 
 39 |         self.assertEqual(
 40 |             MRUtils.merge_keys_in_dict([{"a": 1}, {"a": 2}], ["a"]), {"a": 3})
 41 | 
 42 |         self.assertEqual(
 43 |             MRUtils.split_prefix_keys("1@@2@@other"), ["1", "2", "other"])
 44 | 
 45 |         prefix_str1 = "232@@8923802@@afenti"
 46 |         prefix_str2 = "\"" + prefix_str1
 47 |         self.assertEqual(
 48 |             MRUtils.select_prefix_keys(prefix_str1, [0, 1]), "232@@8923802")
 49 |         self.assertEqual(
 50 |             MRUtils.select_prefix_keys(prefix_str2, [0, 1]), "232@@8923802")
 51 | 
 52 |         self.assertEqual(
 53 |             MRUtils.str_dump({"hello": u"世界"}), """{"hello": "世界"}""")
 54 | 
 55 |         self.assertEqual(
 56 |             MRUtils.filter_dict(
 57 |                 {"hello": "world", "foobar": "barfoo"}, "hello"),
 58 |             {"hello": "world"})
 59 | 
 60 |     def test_math(self):
 61 |         from luiti import MathUtils
 62 | 
 63 |         self.assertEqual(MathUtils.percent(5, 10), 0.5)
 64 |         self.assertEqual(MathUtils.percent(5, 0), 0)
 65 |         self.assertEqual(MathUtils.percent(5, None), 0)
 66 |         self.assertEqual(MathUtils.percent(None, 1), 0)
 67 | 
 68 |     def test_date(self):
 69 |         from luiti import DateUtils
 70 |         import arrow
 71 | 
 72 |         arrow1 = DateUtils.arrow.get("2014-10-01 12:01:01")
 73 |         arrow2 = DateUtils.arrow.get("2014-10-15 12:01:01")
 74 | 
 75 |         self.assertEqual(DateUtils.arrow_str(arrow1), "2014-10-01")
 76 | 
 77 |         self.assertEqual(len(DateUtils.days_in_week(arrow1)), 7)
 78 |         self.assertTrue(arrow1.floor('day') in DateUtils.days_in_week(arrow1))
 79 | 
 80 |         self.assertEqual(len(DateUtils.weeks_in_range(arrow1, arrow2)), 3)
 81 | 
 82 |         self.assertEqual(
 83 |             len(DateUtils.fixed_weeks_in_range("2014-10-01-2014-10-15")), 1)
 84 |         self.assertEqual(
 85 |             len(DateUtils.fixed_weeks_in_range("2014-09-29-2014-10-15")), 2)
 86 | 
 87 |         self.assertEqual(
 88 |             DateUtils.date_value_by_type_in_last("2014-09-01", "week"),
 89 |             arrow.get("2014-08-25"))
 90 | 
 91 |     def test_ext(self):
 92 |         from etl_utils import cached_property
 93 |         from luiti.utils import ExtUtils
 94 |         import inspect
 95 | 
 96 |         class Foobar(ExtUtils.ExtendClass):
 97 | 
 98 |             def method_1(self):
 99 |                 return "method_1"
100 | 
101 |             @property
102 |             def property_1(self):
103 |                 return "property_1"
104 | 
105 |             @cached_property
106 |             def cached_property_1(self):
107 |                 return "cached_property_1"
108 | 
109 |         fb1 = Foobar()
110 |         self.assertEqual(fb1.method_1(), "method_1")
111 |         self.assertEqual(fb1.property_1, "property_1")
112 |         self.assertEqual(fb1.cached_property_1, "cached_property_1")
113 | 
114 |         self.assertTrue(inspect.ismethod(Foobar.method_1))
115 |         self.assertTrue(isinstance(Foobar.property_1, property))
116 |         self.assertTrue(isinstance(Foobar.cached_property_1, cached_property), Foobar.cached_property_1)
117 | 
118 |         Foobar.extend({
119 |             'not_exist_str': "not_exist_str",
120 |             'method_1': lambda self: "method_2",
121 |             'property_1': lambda self: "property_2",
122 |             'cached_property_1': lambda self: "cached_property_2",
123 |         })
124 | 
125 |         fb2 = Foobar()
126 |         self.assertEqual(fb2.method_1(), "method_2")
127 |         self.assertEqual(fb2.property_1, "property_2")
128 |         self.assertEqual(fb2.cached_property_1, "cached_property_2")
129 | 
130 |         self.assertTrue(isinstance(Foobar.not_exist_str, str))
131 |         self.assertTrue(inspect.ismethod(Foobar.method_1))
132 |         self.assertTrue(isinstance(Foobar.property_1, property))
133 |         self.assertTrue(isinstance(Foobar.cached_property_1, cached_property), Foobar.cached_property_1)
134 | 
135 |     @mock.patch("luigi.hdfs.exists")
136 |     @mock.patch("luigi.hdfs.remove")
137 |     def test_IOUtils(self, remove, exists):
138 |         remove.return_value = True
139 |         exists.return_value = True
140 | 
141 |         from luiti.utils import IOUtils
142 | 
143 |         self.assertEqual(IOUtils.json_dump({}), "{}")
144 |         self.assertEqual(IOUtils.json_dump([{}]), "[{}]")
145 | 
146 |         f1 = HdfsFile("writor")
147 |         self.assertEqual(IOUtils.write_json_to_output({}, f1), 0)
148 | 
149 |         f2 = HdfsFile("writor")
150 |         with f2.open("w") as w2:
151 |             w2.write("""{"foo":"bar"}""")
152 |         self.assertEqual(IOUtils.read_json_from_output(f2), {"foo": "bar"})
153 | 
154 |         f3 = HdfsFile("writor_error")
155 |         with f3.open("w") as w3:
156 |             w3.write("""{"foo":"bar"}\n{}""")  # two lines
157 |         self.assertRaises(ValueError, lambda: IOUtils.read_json_from_output(f3))
158 | 
159 |         self.assertTrue(IOUtils.remove_files("f1", "f2"), True)
160 | 
161 |     def test_TargetUtils(self):
162 |         from luiti.utils import TargetUtils
163 | 
164 |         def mock_test_file(filename, data):
165 |             f = HdfsFile(filename)
166 |             with f.open("w") as w:
167 |                 w.write(data)
168 | 
169 |             return f
170 |         g1 = TargetUtils.line_read(mock_test_file("g1", """\nline one\nline two\n   \n"""))
171 |         self.assertTrue(list(g1), [u"line one", u"line two"])
172 | 
173 |         g2 = TargetUtils.json_read(mock_test_file("g1", """\n{"a": 1}\n[1, "b"]  \n \n"""))
174 |         self.assertTrue(list(g2), [{"a": 1}, [1, "b"]])
175 | 
176 |     @mock.patch("luiti.utils.HDFSUtils.hdfs_cli")
177 |     @mock.patch("luiti.utils.CommandUtils.execute")
178 |     @mock.patch("luiti.utils.HDFSUtils.copyToLocal")
179 |     @mock.patch("os.path.isdir")
180 |     @mock.patch("luiti.utils.HDFSUtils.exists")
181 |     def test_CompressUtils(self, hdfs_exists, os_path_isdir, copyToLocal, execute, hdfs_cli):
182 |         """ a rough test ... """
183 |         hdfs_exists.return_value = True
184 |         os_path_isdir.return_value = False
185 |         copyToLocal.return_value = True
186 |         execute.return_value = True
187 |         hdfs_cli.return_value = "hdfs"
188 | 
189 |         from luiti.utils import CompressUtils
190 |         self.assertTrue(CompressUtils.unzip_with_upload(
191 |             "orig", "dist",
192 |             tmp_dir="/tmp",
193 |             tmp_name="foobar"))
194 | 
195 |     @mock.patch("os.system")
196 |     def test_CommandUtils(self, os_system):
197 |         os_system.return_value = 0
198 | 
199 |         from luiti.utils import CommandUtils
200 |         self.assertEqual(CommandUtils.execute("ls"), 0)
201 |         self.assertEqual(CommandUtils.execute("ls", dry=True), 0)
202 | 
203 | 
204 | if __name__ == '__main__':
205 |     unittest.main()
206 | 


--------------------------------------------------------------------------------
/tests/webui_packages/README.markdown:
--------------------------------------------------------------------------------
 1 | Test webui visualizer.
 2 | =======================
 3 | 
 4 | 
 5 | Package relations.
 6 | -----------------------
 7 | ```text
 8 |       Hierarchical data warehouse
 9 | 
10 |               /   dump
11 |              |     ||
12 |              |     \/
13 |             /    clean
14 |    Data Flow       ||
15 |             \      \/
16 |              |   middle
17 |              |     ||
18 |              |     \/
19 |               \  summary
20 | 
21 | ```
22 | 
23 | 
24 | Some overwritten configuration.
25 | -----------------------
26 | See it at `luiti_webui_tests/__init__.py`
27 | 


--------------------------------------------------------------------------------
/tests/webui_packages/luiti_clean/README.markdown:
--------------------------------------------------------------------------------
1 | Luiti Clean
2 | =======================
3 | 
4 | TODO ...


--------------------------------------------------------------------------------
/tests/webui_packages/luiti_clean/luiti_clean/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dchentech/luiti/11a5c62b265a92910a1d4c82431e3697b8b06814/tests/webui_packages/luiti_clean/luiti_clean/__init__.py


--------------------------------------------------------------------------------
/tests/webui_packages/luiti_clean/luiti_clean/luiti_tasks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dchentech/luiti/11a5c62b265a92910a1d4c82431e3697b8b06814/tests/webui_packages/luiti_clean/luiti_clean/luiti_tasks/__init__.py


--------------------------------------------------------------------------------
/tests/webui_packages/luiti_clean/luiti_clean/luiti_tasks/__init_luiti.py:
--------------------------------------------------------------------------------
1 | # -*-coding:utf-8-*-
2 | 
3 | __all__ = ["WebuiDay", "luigi"]
4 | 
5 | 
6 | from luiti_webui_tests import WebuiDay, luigi
7 | luigi.plug_packages("luiti_dump", "luiti_clean", "luiti_middle", "luiti_summary")
8 | 


--------------------------------------------------------------------------------
/tests/webui_packages/luiti_clean/luiti_clean/luiti_tasks/clean_web_log_day.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | from .__init_luiti import WebuiDay, luigi
 4 | 
 5 | 
 6 | @luigi.ref_tasks("DumpWebLogDay")
 7 | class CleanWebLogDay(WebuiDay):
 8 |     """
 9 |     Clean web log
10 |     """
11 | 
12 |     def requires(self):
13 |         return self.DumpWebLogDay_task
14 | 


--------------------------------------------------------------------------------
/tests/webui_packages/luiti_clean/setup.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | from setuptools import setup
 4 | 
 5 | setup(
 6 |     name="luiti_clean",
 7 |     version="0.0.1",
 8 |     packages=[
 9 |         "luiti_clean",
10 |         "luiti_clean/luiti_tasks", ],
11 |     zip_safe=False,
12 | )


--------------------------------------------------------------------------------
/tests/webui_packages/luiti_clean/tests/test_main.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import os
 4 | import sys
 5 | root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 6 | sys.path.insert(0, root_dir)
 7 | 
 8 | import unittest
 9 | from luiti import MrTestCase
10 | 
11 | 
12 | @MrTestCase
13 | class TestMapReduce(unittest.TestCase):
14 |     mr_task_names = [
15 |             ]
16 | 
17 | if __name__ == '__main__':
18 |     unittest.main()


--------------------------------------------------------------------------------
/tests/webui_packages/luiti_dump/README.markdown:
--------------------------------------------------------------------------------
1 | Luiti Dump
2 | =======================
3 | 
4 | TODO ...


--------------------------------------------------------------------------------
/tests/webui_packages/luiti_dump/luiti_dump/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dchentech/luiti/11a5c62b265a92910a1d4c82431e3697b8b06814/tests/webui_packages/luiti_dump/luiti_dump/__init__.py


--------------------------------------------------------------------------------
/tests/webui_packages/luiti_dump/luiti_dump/luiti_tasks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dchentech/luiti/11a5c62b265a92910a1d4c82431e3697b8b06814/tests/webui_packages/luiti_dump/luiti_dump/luiti_tasks/__init__.py


--------------------------------------------------------------------------------
/tests/webui_packages/luiti_dump/luiti_dump/luiti_tasks/__init_luiti.py:
--------------------------------------------------------------------------------
1 | # -*-coding:utf-8-*-
2 | 
3 | __all__ = ["WebuiDay", "luigi"]
4 | 
5 | 
6 | from luiti_webui_tests import WebuiDay, luigi
7 | luigi.plug_packages("luiti_dump", "luiti_clean", "luiti_middle", "luiti_summary")
8 | 


--------------------------------------------------------------------------------
/tests/webui_packages/luiti_dump/luiti_dump/luiti_tasks/dump_browser_map_day.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | from .__init_luiti import WebuiDay
 4 | from etl_utils import cached_property
 5 | 
 6 | 
 7 | class DumpBrowserMapDay(WebuiDay):
 8 |     """
 9 |     Mimic dump {int: name} format data from MySQL relational database.
10 |     """
11 | 
12 |     @cached_property
13 |     def cached_data(self):
14 |         """
15 |         Actually need to read data from self.output().
16 |         """
17 |         return {
18 |             "Google Chrome": 1,
19 |             "Mozilla Firefox": 2,
20 |             "IE": 3,
21 |         }
22 | 


--------------------------------------------------------------------------------
/tests/webui_packages/luiti_dump/luiti_dump/luiti_tasks/dump_web_log_day.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | from .__init_luiti import WebuiDay
 4 | 
 5 | 
 6 | class DumpWebLogDay(WebuiDay):
 7 |     """
 8 |     Dump web log from other database/storage.
 9 |     """
10 | 


--------------------------------------------------------------------------------
/tests/webui_packages/luiti_dump/setup.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | from setuptools import setup
 4 | 
 5 | setup(
 6 |     name="luiti_dump",
 7 |     version="0.0.1",
 8 |     packages=[
 9 |         "luiti_dump",
10 |         "luiti_dump/luiti_tasks", ],
11 |     zip_safe=False,
12 | )


--------------------------------------------------------------------------------
/tests/webui_packages/luiti_dump/tests/test_main.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import os
 4 | import sys
 5 | root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 6 | sys.path.insert(0, root_dir)
 7 | 
 8 | import unittest
 9 | from luiti import MrTestCase
10 | 
11 | 
12 | @MrTestCase
13 | class TestMapReduce(unittest.TestCase):
14 |     mr_task_names = [
15 |             ]
16 | 
17 | if __name__ == '__main__':
18 |     unittest.main()


--------------------------------------------------------------------------------
/tests/webui_packages/luiti_middle/README.markdown:
--------------------------------------------------------------------------------
1 | Luiti Middle
2 | =======================
3 | 
4 | TODO ...


--------------------------------------------------------------------------------
/tests/webui_packages/luiti_middle/luiti_middle/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dchentech/luiti/11a5c62b265a92910a1d4c82431e3697b8b06814/tests/webui_packages/luiti_middle/luiti_middle/__init__.py


--------------------------------------------------------------------------------
/tests/webui_packages/luiti_middle/luiti_middle/luiti_tasks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dchentech/luiti/11a5c62b265a92910a1d4c82431e3697b8b06814/tests/webui_packages/luiti_middle/luiti_middle/luiti_tasks/__init__.py


--------------------------------------------------------------------------------
/tests/webui_packages/luiti_middle/luiti_middle/luiti_tasks/__init_luiti.py:
--------------------------------------------------------------------------------
1 | # -*-coding:utf-8-*-
2 | 
3 | __all__ = ["WebuiDay", "luigi"]
4 | 
5 | 
6 | from luiti_webui_tests import WebuiDay, luigi
7 | luigi.plug_packages("luiti_dump", "luiti_clean", "luiti_middle", "luiti_summary")
8 | 


--------------------------------------------------------------------------------
/tests/webui_packages/luiti_middle/luiti_middle/luiti_tasks/counter_visitor_by_browser_day.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | from .__init_luiti import WebuiDay, luigi
 4 | 
 5 | 
 6 | @luigi.ref_tasks("CleanWebLogDay", "DumpBrowserMapDay")
 7 | class CounterVisitorByBrowserDay(WebuiDay):
 8 |     """
 9 |     I'm
10 |     Counter
11 |     Visitor
12 |     By
13 |     Browser
14 |     Day.
15 |     """
16 | 
17 |     def requires(self):
18 |         return [self.CleanWebLogDay_task, self.DumpBrowserMapDay_task]
19 | 


--------------------------------------------------------------------------------
/tests/webui_packages/luiti_middle/luiti_middle/luiti_tasks/counter_visitor_by_region_day.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | from .__init_luiti import WebuiDay, luigi
 4 | 
 5 | 
 6 | @luigi.ref_tasks("CleanWebLogDay")
 7 | class CounterVisitorByRegionDay(WebuiDay):
 8 | 
 9 |     def requires(self):
10 |         return self.CleanWebLogDay_task
11 | 


--------------------------------------------------------------------------------
/tests/webui_packages/luiti_middle/luiti_middle/luiti_tasks/counter_visitor_day.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | from .__init_luiti import WebuiDay, luigi
 4 | 
 5 | 
 6 | @luigi.ref_tasks("CounterVisitorByBrowserDay")
 7 | class CounterVisitorDay(WebuiDay):
 8 | 
 9 |     def requires(self):
10 |         return self.CounterVisitorByBrowserDay_task
11 | 


--------------------------------------------------------------------------------
/tests/webui_packages/luiti_middle/setup.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | from setuptools import setup
 4 | 
 5 | setup(
 6 |     name="luiti_middle",
 7 |     version="0.0.1",
 8 |     packages=[
 9 |         "luiti_middle",
10 |         "luiti_middle/luiti_tasks", ],
11 |     zip_safe=False,
12 | )


--------------------------------------------------------------------------------
/tests/webui_packages/luiti_middle/tests/test_main.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import os
 4 | import sys
 5 | root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 6 | sys.path.insert(0, root_dir)
 7 | 
 8 | import unittest
 9 | from luiti import MrTestCase
10 | 
11 | 
12 | @MrTestCase
13 | class TestMapReduce(unittest.TestCase):
14 |     mr_task_names = [
15 |             ]
16 | 
17 | if __name__ == '__main__':
18 |     unittest.main()


--------------------------------------------------------------------------------
/tests/webui_packages/luiti_summary/README.markdown:
--------------------------------------------------------------------------------
1 | Luiti Summary
2 | =======================
3 | 
4 | TODO ...


--------------------------------------------------------------------------------
/tests/webui_packages/luiti_summary/luiti_summary/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dchentech/luiti/11a5c62b265a92910a1d4c82431e3697b8b06814/tests/webui_packages/luiti_summary/luiti_summary/__init__.py


--------------------------------------------------------------------------------
/tests/webui_packages/luiti_summary/luiti_summary/luiti_tasks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dchentech/luiti/11a5c62b265a92910a1d4c82431e3697b8b06814/tests/webui_packages/luiti_summary/luiti_summary/luiti_tasks/__init__.py


--------------------------------------------------------------------------------
/tests/webui_packages/luiti_summary/luiti_summary/luiti_tasks/__init_luiti.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | __all__ = ["WebuiDay", "luigi"]
 4 | 
 5 | 
 6 | from luiti_webui_tests import WebuiDay, luigi, VisualiserEnvTemplate
 7 | luigi.plug_packages("luiti_dump", "luiti_clean", "luiti_middle", "luiti_summary")
 8 | 
 9 | 
10 | # plug more packages, compact with old tests, without to migrate them to webui_packages totally.
11 | luigi.plug_packages("project_A", "project_B", "zip_package_by_luiti")
12 | 
13 | 
14 | luiti_visualiser_env = VisualiserEnvTemplate({
15 |     "file_web_url_prefix": lambda: "http://HUE/filebrowser/#/",
16 |     "date_begin": "2014-09-01",
17 |     "additional_task_parameters": {
18 |         "language": {
19 |             "values": ["Chinese", "English"],
20 |             "default": "English",
21 |         }
22 |     },
23 |     "package_config": {
24 |         "defaults": ["luiti_dump", "luiti_clean", "luiti_middle", "luiti_summary", ],
25 |     }
26 | })
27 | 


--------------------------------------------------------------------------------
/tests/webui_packages/luiti_summary/luiti_summary/luiti_tasks/beta_report_day.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | from .__init_luiti import WebuiDay, luigi
 4 | 
 5 | 
 6 | @luigi.ref_tasks("CounterVisitorByBrowserDay", "CounterVisitorByRegionDay", "CounterVisitorDay")
 7 | class BetaReportDay(WebuiDay):
 8 |     """
 9 |     Beta report day's document.
10 |     """
11 | 
12 |     def requires(self):
13 |         return [self.CounterVisitorByBrowserDay_task,
14 |                 self.CounterVisitorByRegionDay_task,
15 |                 self.CounterVisitorDay_task]
16 | 


--------------------------------------------------------------------------------
/tests/webui_packages/luiti_summary/setup.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | from setuptools import setup
 4 | 
 5 | setup(
 6 |     name="luiti_summary",
 7 |     version="0.0.1",
 8 |     packages=[
 9 |         "luiti_summary",
10 |         "luiti_summary/luiti_tasks", ],
11 |     zip_safe=False,
12 | )


--------------------------------------------------------------------------------
/tests/webui_packages/luiti_summary/tests/test_main.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import os
 4 | import sys
 5 | root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 6 | sys.path.insert(0, root_dir)
 7 | 
 8 | import unittest
 9 | from luiti import MrTestCase
10 | 
11 | 
12 | @MrTestCase
13 | class TestMapReduce(unittest.TestCase):
14 |     mr_task_names = [
15 |             ]
16 | 
17 | if __name__ == '__main__':
18 |     unittest.main()


--------------------------------------------------------------------------------
/tests/webui_packages/luiti_webui_tests/luiti_webui_tests/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | """
 4 | Provide test environment for webui_packages.
 5 | """
 6 | 
 7 | import os
 8 | from etl_utils import cached_property
 9 | from luiti import luigi, TaskDay, VisualiserEnvTemplate
10 | from luigi.mock import MockTarget
11 | 
12 | 
13 | @cached_property
14 | def root_dir(self):
15 |     return os.path.join("/webui_packages", self.package_name)
16 | 
17 | 
18 | def data_file(self):
19 |     return os.path.join(self.root_dir, self.task_clsname, self.date_str)
20 | 
21 | 
22 | def mock_output(self):
23 |     """ Use luigi's feature. """
24 |     return MockTarget(self.data_file)
25 | 
26 | 
27 | class WebuiDay(TaskDay):
28 |     """
29 |     Don't overwrite TaskDay or TaskBase, or will fail other tests files.
30 |     """
31 |     pass
32 | 
33 | 
34 | WebuiDay.extend({
35 |     "root_dir": root_dir,
36 |     "data_file": data_file,
37 |     "output": mock_output,
38 | })
39 | 
40 | 
41 | __all__ = ["luigi", "WebuiDay", "VisualiserEnvTemplate"]
42 | 


--------------------------------------------------------------------------------
/tests/zip_package_by_luiti/setup.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf-8-*-
 2 | 
 3 | 
 4 | from setuptools import setup
 5 | 
 6 | setup(
 7 |     name='zip_package_by_luiti',
 8 |     version='0.0.1',
 9 |     packages=[
10 |         'zip_package_by_luiti',
11 |         'zip_package_by_luiti/subfold', ],
12 |     zip_safe=True,
13 | )
14 | 


--------------------------------------------------------------------------------
/tests/zip_package_by_luiti/zip_package_by_luiti/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dchentech/luiti/11a5c62b265a92910a1d4c82431e3697b8b06814/tests/zip_package_by_luiti/zip_package_by_luiti/__init__.py


--------------------------------------------------------------------------------
/tests/zip_package_by_luiti/zip_package_by_luiti/subfold/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dchentech/luiti/11a5c62b265a92910a1d4c82431e3697b8b06814/tests/zip_package_by_luiti/zip_package_by_luiti/subfold/__init__.py


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | # Tox (http://tox.testrun.org/) is a tool for running tests
 2 | # in multiple virtualenvs. This configuration file will run the
 3 | # test suite on all supported python versions. To use it, "pip install tox"
 4 | # and then run "tox" from this directory.
 5 | 
 6 | 
 7 | [tox]
 8 | envlist = py{27}-{cdh}, pep8
 9 | skipsdist = True
10 | 
11 | [testenv]
12 | usedevelop = True
13 | deps=
14 |   coverage>=3.6,<3.999
15 |   coveralls
16 |   nose
17 |   mock
18 | setenv =
19 |   COVERAGE_PROCESS_START={toxinidir}/.coveragerc
20 |   FULL_COVERAGE=true
21 | commands =
22 |   python --version
23 |   python setup.py install
24 |   nosetests --with-coverage --cover-inclusive --cover-package=luiti
25 |   coverage combine
26 |   coveralls
27 | 
28 | [testenv:clean]
29 | commands=
30 |   coverage erase
31 | 
32 | [testenv:stats]
33 | commands=
34 |   coverage report
35 |   covarage html
36 | 


--------------------------------------------------------------------------------