├── .gitignore
├── .travis.yml
├── LICENSE.txt
├── README.md
├── build.gradle
├── config
└── checkstyle
│ ├── checkstyle.xml
│ └── default.xml
├── gradle
└── wrapper
│ ├── gradle-wrapper.jar
│ └── gradle-wrapper.properties
├── gradlew
├── gradlew.bat
├── lib
└── embulk
│ └── filter
│ └── mask.rb
└── src
├── main
└── java
│ └── org
│ └── embulk
│ └── filter
│ └── mask
│ ├── MaskFilterPlugin.java
│ └── MaskPageOutput.java
└── test
└── java
└── org
└── embulk
└── filter
└── mask
└── TestMaskFilterPlugin.java
/.gitignore:
--------------------------------------------------------------------------------
1 | *~
2 | /pkg/
3 | /tmp/
4 | *.gemspec
5 | .gradle/
6 | /classpath/
7 | build/
8 | .idea
9 | /.settings/
10 | /.metadata/
11 | .classpath
12 | .project
13 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: java
2 | jdk:
3 | # Tentatively ignore these environments as we face SSL errors
4 | # https://github.com/gradle/gradle/issues/2421
5 | #
6 | # - openjdk7
7 | # - oraclejdk7
8 | - oraclejdk8
9 | script:
10 | - ./gradlew test
11 | after_success:
12 | - ./gradlew jacocoTestReport coveralls
13 |
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 |
2 | MIT License
3 |
4 | Permission is hereby granted, free of charge, to any person obtaining
5 | a copy of this software and associated documentation files (the
6 | "Software"), to deal in the Software without restriction, including
7 | without limitation the rights to use, copy, modify, merge, publish,
8 | distribute, sublicense, and/or sell copies of the Software, and to
9 | permit persons to whom the Software is furnished to do so, subject to
10 | the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be
13 | included in all copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Mask filter plugin for Embulk
2 |
3 | [](https://coveralls.io/github/beniyama/embulk-filter-mask)
4 |
5 | Mask columns with asterisks in a variety of patterns (still in initial development phase and missing basic features to use in production).
6 |
7 | ## Overview
8 |
9 | * **Plugin type**: filter
10 |
11 | ## Configuration
12 |
13 | *Caution* : Now we use `type` to specify mask types such as `all` and `email`, instead of `pattern` which was used in version 0.1.1 or earlier.
14 |
15 | - **columns**: target columns which would be replaced with asterisks (string, required)
16 | - **name**: name of the column (string, required)
17 | - **type**: mask type, `all`, `email`, `regex` or `substring` (string, default: `all`)
18 | - **paths**: list of JSON path and type, works if the column type is JSON
19 | - `[{key: $.json_path1}, {key: $.json_path2}]` would mask both `$.json_path1` and `$.json_path2` nodes
20 | - Elements under the nodes would be converted to string and then masked (e.g., `[0,1,2]` -> `*******`)
21 | - **length**: if specified, this filter replaces the column with fixed number of asterisks (integer, optional. supported only in `all`, `email`, `substring`.)
22 | - **pattern**: Regex pattern such as "[0-9]+" (string, required for `regex` type)
23 | - **start**: The beginning index for `substring` type. The value starts from 0 and inclusive (integer, default: 0)
24 | - **end**: The ending index for `substring` type. The value is exclusive (integer, default: length of the target column)
25 |
26 | ## Example
27 |
28 |
29 |
30 | If you have below data in csv or other format file,
31 |
32 | |first_name | last_name | gender | age | contact |
33 | |---|---|---|---|---|
34 | | Benjamin | Bell | male | 30 | bell.benjamin_dummy@example.com |
35 | | Lucas | Duncan | male | 20 | lucas.duncan_dummy@example.com |
36 | | Elizabeth | May | female | 25 | elizabeth.may_dummy@example.com |
37 | | Christian | Reid | male | 15 | christian.reid_dummy@example.com |
38 | | Amy | Avery | female | 40 | amy.avercy_dummy@example.com |
39 |
40 | below filter configuration
41 |
42 | ```yaml
43 | filters:
44 | - type: mask
45 | columns:
46 | - { name: last_name}
47 | - { name: age}
48 | - { name: contact, type: email, length: 5}
49 | ```
50 |
51 | would produce
52 |
53 | |first_name | last_name | gender | age | contact |
54 | |---|---|---|---|---|
55 | | Benjamin | **** | male | ** | *****@example.com |
56 | | Lucas | ****** | male | ** | *****@example.com |
57 | | Elizabeth | *** | female | ** | *****@example.com |
58 | | Christian | **** | male | ** | *****@example.com |
59 | | Amy | ***** | female | ** | *****@example.com |
60 |
61 | If you use `regex` and/or `substring` types,
62 |
63 | ```yaml
64 | filters:
65 | - type: mask
66 | columns:
67 | - { name: first_name, type: regex, pattern: "[a-z]"}
68 | - { name: contact, type: substring, start: 5, length: 5}
69 | ```
70 |
71 | would produce
72 |
73 | |first_name | last_name | gender | age | contact |
74 | |---|---|---|---|---|
75 | | B******* | Bell | male | 30 | bell.***** |
76 | | L**** | Duncan | male | 20 | lucas***** |
77 | | E******* | May | female | 25 | eliza***** |
78 | | C******** | Reid | male | 15 | chris***** |
79 | | A** | Avery | female | 40 | amy.a***** |
80 |
81 | JSON type column is also partially supported.
82 |
83 | If you have a `user` column with this JSON data structure
84 |
85 | ```json
86 | {
87 | "full_name": {
88 | "first_name": "Benjamin",
89 | "last_name": "Bell"
90 | },
91 | "gender": "male",
92 | "age": 30,
93 | "email": "test_mail@example.com"
94 | }
95 | ```
96 |
97 | below filter configuration
98 |
99 | ```yaml
100 | filters:
101 | - type: mask
102 | columns:
103 | - { name: user, paths: [{key: $.full_name.first_name}, {key: $.email, type: email}]}
104 | ```
105 |
106 | would produce
107 |
108 | ```json
109 | {
110 | "full_name": {
111 | "first_name": "********",
112 | "last_name": "Bell"
113 | },
114 | "gender": "male",
115 | "age": 30,
116 | "email": "*********@example.com"
117 | }
118 | ```
119 |
120 |
121 | ## Build
122 |
123 | ```
124 | $ ./gradlew gem # -t to watch change of files and rebuild continuously
125 | ```
126 |
--------------------------------------------------------------------------------
/build.gradle:
--------------------------------------------------------------------------------
1 | plugins {
2 | id "com.jfrog.bintray" version "1.1"
3 | id "com.github.jruby-gradle.base" version "0.1.5"
4 | id "com.github.kt3k.coveralls" version "2.8.1"
5 | id "java"
6 | id "checkstyle"
7 | id "jacoco"
8 | }
9 | import com.github.jrubygradle.JRubyExec
10 | repositories {
11 | mavenCentral()
12 | jcenter()
13 | }
14 | configurations {
15 | provided
16 | }
17 |
18 | version = "0.2.1"
19 |
20 | sourceCompatibility = 1.7
21 | targetCompatibility = 1.7
22 |
23 | dependencies {
24 | compile "org.embulk:embulk-core:0.8.29"
25 | provided "org.embulk:embulk-core:0.8.29"
26 | compile "com.jayway.jsonpath:json-path:2.+"
27 | testCompile "junit:junit:4.+"
28 | testCompile "org.embulk:embulk-core:0.8.29:tests"
29 | }
30 |
31 | jacocoTestReport {
32 | reports {
33 | xml.enabled = true // coveralls plugin depends on xml format report
34 | html.enabled = true
35 | }
36 | }
37 |
38 | task classpath(type: Copy, dependsOn: ["jar"]) {
39 | doFirst { file("classpath").deleteDir() }
40 | from (configurations.runtime - configurations.provided + files(jar.archivePath))
41 | into "classpath"
42 | }
43 | clean { delete "classpath" }
44 |
45 | checkstyle {
46 | configFile = file("${project.rootDir}/config/checkstyle/checkstyle.xml")
47 | toolVersion = '6.14.1'
48 | }
49 | checkstyleMain {
50 | configFile = file("${project.rootDir}/config/checkstyle/default.xml")
51 | ignoreFailures = true
52 | }
53 | checkstyleTest {
54 | configFile = file("${project.rootDir}/config/checkstyle/default.xml")
55 | ignoreFailures = true
56 | }
57 | task checkstyle(type: Checkstyle) {
58 | classpath = sourceSets.main.output + sourceSets.test.output
59 | source = sourceSets.main.allJava + sourceSets.test.allJava
60 | }
61 |
62 | task gem(type: JRubyExec, dependsOn: ["gemspec", "classpath"]) {
63 | jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "build"
64 | script "${project.name}.gemspec"
65 | doLast { ant.move(file: "${project.name}-${project.version}.gem", todir: "pkg") }
66 | }
67 |
68 | task gemPush(type: JRubyExec, dependsOn: ["gem"]) {
69 | jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "push"
70 | script "pkg/${project.name}-${project.version}.gem"
71 | }
72 |
73 | task "package"(dependsOn: ["gemspec", "classpath"]) << {
74 | println "> Build succeeded."
75 | println "> You can run embulk with '-L ${file(".").absolutePath}' argument."
76 | }
77 |
78 | task gemspec {
79 | ext.gemspecFile = file("${project.name}.gemspec")
80 | inputs.file "build.gradle"
81 | outputs.file gemspecFile
82 | doLast { gemspecFile.write($/
83 | Gem::Specification.new do |spec|
84 | spec.name = "${project.name}"
85 | spec.version = "${project.version}"
86 | spec.authors = ["Tetsuo Yamabe"]
87 | spec.summary = %[Mask filter plugin for Embulk]
88 | spec.description = %[Mask]
89 | spec.email = ["tetsuo.yamabe@gmail.com"]
90 | spec.licenses = ["MIT"]
91 | spec.homepage = "https://github.com/beniyama/embulk-filter-mask"
92 |
93 | spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
94 | spec.test_files = spec.files.grep(%r"^(test|spec)/")
95 | spec.require_paths = ["lib"]
96 |
97 | #spec.add_dependency 'YOUR_GEM_DEPENDENCY', ['~> YOUR_GEM_DEPENDENCY_VERSION']
98 | spec.add_development_dependency 'bundler', ['~> 1.0']
99 | spec.add_development_dependency 'rake', ['>= 10.0']
100 | end
101 | /$)
102 | }
103 | }
104 | clean { delete "${project.name}.gemspec" }
105 |
--------------------------------------------------------------------------------
/config/checkstyle/checkstyle.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
126 |
127 |
128 |
129 |
--------------------------------------------------------------------------------
/config/checkstyle/default.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
106 |
107 |
108 |
109 |
--------------------------------------------------------------------------------
/gradle/wrapper/gradle-wrapper.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/beniyama/embulk-filter-mask/b1b545087d0fa50ecdc2d72773b3c58c4e20dc0f/gradle/wrapper/gradle-wrapper.jar
--------------------------------------------------------------------------------
/gradle/wrapper/gradle-wrapper.properties:
--------------------------------------------------------------------------------
1 | #Tue Jul 12 16:30:09 JST 2016
2 | distributionBase=GRADLE_USER_HOME
3 | distributionPath=wrapper/dists
4 | zipStoreBase=GRADLE_USER_HOME
5 | zipStorePath=wrapper/dists
6 | distributionUrl=https\://services.gradle.org/distributions/gradle-2.10-all.zip
7 |
--------------------------------------------------------------------------------
/gradlew:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | ##############################################################################
4 | ##
5 | ## Gradle start up script for UN*X
6 | ##
7 | ##############################################################################
8 |
9 | # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
10 | DEFAULT_JVM_OPTS=""
11 |
12 | APP_NAME="Gradle"
13 | APP_BASE_NAME=`basename "$0"`
14 |
15 | # Use the maximum available, or set MAX_FD != -1 to use that value.
16 | MAX_FD="maximum"
17 |
18 | warn ( ) {
19 | echo "$*"
20 | }
21 |
22 | die ( ) {
23 | echo
24 | echo "$*"
25 | echo
26 | exit 1
27 | }
28 |
29 | # OS specific support (must be 'true' or 'false').
30 | cygwin=false
31 | msys=false
32 | darwin=false
33 | case "`uname`" in
34 | CYGWIN* )
35 | cygwin=true
36 | ;;
37 | Darwin* )
38 | darwin=true
39 | ;;
40 | MINGW* )
41 | msys=true
42 | ;;
43 | esac
44 |
45 | # Attempt to set APP_HOME
46 | # Resolve links: $0 may be a link
47 | PRG="$0"
48 | # Need this for relative symlinks.
49 | while [ -h "$PRG" ] ; do
50 | ls=`ls -ld "$PRG"`
51 | link=`expr "$ls" : '.*-> \(.*\)$'`
52 | if expr "$link" : '/.*' > /dev/null; then
53 | PRG="$link"
54 | else
55 | PRG=`dirname "$PRG"`"/$link"
56 | fi
57 | done
58 | SAVED="`pwd`"
59 | cd "`dirname \"$PRG\"`/" >/dev/null
60 | APP_HOME="`pwd -P`"
61 | cd "$SAVED" >/dev/null
62 |
63 | CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
64 |
65 | # Determine the Java command to use to start the JVM.
66 | if [ -n "$JAVA_HOME" ] ; then
67 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
68 | # IBM's JDK on AIX uses strange locations for the executables
69 | JAVACMD="$JAVA_HOME/jre/sh/java"
70 | else
71 | JAVACMD="$JAVA_HOME/bin/java"
72 | fi
73 | if [ ! -x "$JAVACMD" ] ; then
74 | die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
75 |
76 | Please set the JAVA_HOME variable in your environment to match the
77 | location of your Java installation."
78 | fi
79 | else
80 | JAVACMD="java"
81 | which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
82 |
83 | Please set the JAVA_HOME variable in your environment to match the
84 | location of your Java installation."
85 | fi
86 |
87 | # Increase the maximum file descriptors if we can.
88 | if [ "$cygwin" = "false" -a "$darwin" = "false" ] ; then
89 | MAX_FD_LIMIT=`ulimit -H -n`
90 | if [ $? -eq 0 ] ; then
91 | if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
92 | MAX_FD="$MAX_FD_LIMIT"
93 | fi
94 | ulimit -n $MAX_FD
95 | if [ $? -ne 0 ] ; then
96 | warn "Could not set maximum file descriptor limit: $MAX_FD"
97 | fi
98 | else
99 | warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
100 | fi
101 | fi
102 |
103 | # For Darwin, add options to specify how the application appears in the dock
104 | if $darwin; then
105 | GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
106 | fi
107 |
108 | # For Cygwin, switch paths to Windows format before running java
109 | if $cygwin ; then
110 | APP_HOME=`cygpath --path --mixed "$APP_HOME"`
111 | CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
112 | JAVACMD=`cygpath --unix "$JAVACMD"`
113 |
114 | # We build the pattern for arguments to be converted via cygpath
115 | ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
116 | SEP=""
117 | for dir in $ROOTDIRSRAW ; do
118 | ROOTDIRS="$ROOTDIRS$SEP$dir"
119 | SEP="|"
120 | done
121 | OURCYGPATTERN="(^($ROOTDIRS))"
122 | # Add a user-defined pattern to the cygpath arguments
123 | if [ "$GRADLE_CYGPATTERN" != "" ] ; then
124 | OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
125 | fi
126 | # Now convert the arguments - kludge to limit ourselves to /bin/sh
127 | i=0
128 | for arg in "$@" ; do
129 | CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
130 | CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option
131 |
132 | if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition
133 | eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
134 | else
135 | eval `echo args$i`="\"$arg\""
136 | fi
137 | i=$((i+1))
138 | done
139 | case $i in
140 | (0) set -- ;;
141 | (1) set -- "$args0" ;;
142 | (2) set -- "$args0" "$args1" ;;
143 | (3) set -- "$args0" "$args1" "$args2" ;;
144 | (4) set -- "$args0" "$args1" "$args2" "$args3" ;;
145 | (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
146 | (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
147 | (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
148 | (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
149 | (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
150 | esac
151 | fi
152 |
153 | # Split up the JVM_OPTS And GRADLE_OPTS values into an array, following the shell quoting and substitution rules
154 | function splitJvmOpts() {
155 | JVM_OPTS=("$@")
156 | }
157 | eval splitJvmOpts $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS
158 | JVM_OPTS[${#JVM_OPTS[*]}]="-Dorg.gradle.appname=$APP_BASE_NAME"
159 |
160 | exec "$JAVACMD" "${JVM_OPTS[@]}" -classpath "$CLASSPATH" org.gradle.wrapper.GradleWrapperMain "$@"
161 |
--------------------------------------------------------------------------------
/gradlew.bat:
--------------------------------------------------------------------------------
1 | @if "%DEBUG%" == "" @echo off
2 | @rem ##########################################################################
3 | @rem
4 | @rem Gradle startup script for Windows
5 | @rem
6 | @rem ##########################################################################
7 |
8 | @rem Set local scope for the variables with windows NT shell
9 | if "%OS%"=="Windows_NT" setlocal
10 |
11 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
12 | set DEFAULT_JVM_OPTS=
13 |
14 | set DIRNAME=%~dp0
15 | if "%DIRNAME%" == "" set DIRNAME=.
16 | set APP_BASE_NAME=%~n0
17 | set APP_HOME=%DIRNAME%
18 |
19 | @rem Find java.exe
20 | if defined JAVA_HOME goto findJavaFromJavaHome
21 |
22 | set JAVA_EXE=java.exe
23 | %JAVA_EXE% -version >NUL 2>&1
24 | if "%ERRORLEVEL%" == "0" goto init
25 |
26 | echo.
27 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
28 | echo.
29 | echo Please set the JAVA_HOME variable in your environment to match the
30 | echo location of your Java installation.
31 |
32 | goto fail
33 |
34 | :findJavaFromJavaHome
35 | set JAVA_HOME=%JAVA_HOME:"=%
36 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe
37 |
38 | if exist "%JAVA_EXE%" goto init
39 |
40 | echo.
41 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
42 | echo.
43 | echo Please set the JAVA_HOME variable in your environment to match the
44 | echo location of your Java installation.
45 |
46 | goto fail
47 |
48 | :init
49 | @rem Get command-line arguments, handling Windowz variants
50 |
51 | if not "%OS%" == "Windows_NT" goto win9xME_args
52 | if "%@eval[2+2]" == "4" goto 4NT_args
53 |
54 | :win9xME_args
55 | @rem Slurp the command line arguments.
56 | set CMD_LINE_ARGS=
57 | set _SKIP=2
58 |
59 | :win9xME_args_slurp
60 | if "x%~1" == "x" goto execute
61 |
62 | set CMD_LINE_ARGS=%*
63 | goto execute
64 |
65 | :4NT_args
66 | @rem Get arguments from the 4NT Shell from JP Software
67 | set CMD_LINE_ARGS=%$
68 |
69 | :execute
70 | @rem Setup the command line
71 |
72 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
73 |
74 | @rem Execute Gradle
75 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
76 |
77 | :end
78 | @rem End local scope for the variables with windows NT shell
79 | if "%ERRORLEVEL%"=="0" goto mainEnd
80 |
81 | :fail
82 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
83 | rem the _cmd.exe /c_ return code!
84 | if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
85 | exit /b 1
86 |
87 | :mainEnd
88 | if "%OS%"=="Windows_NT" endlocal
89 |
90 | :omega
91 |
--------------------------------------------------------------------------------
/lib/embulk/filter/mask.rb:
--------------------------------------------------------------------------------
1 | Embulk::JavaPlugin.register_filter(
2 | "mask", "org.embulk.filter.mask.MaskFilterPlugin",
3 | File.expand_path('../../../../classpath', __FILE__))
4 |
--------------------------------------------------------------------------------
/src/main/java/org/embulk/filter/mask/MaskFilterPlugin.java:
--------------------------------------------------------------------------------
1 | package org.embulk.filter.mask;
2 |
3 | import com.google.common.base.Optional;
4 | import com.google.common.collect.ImmutableList;
5 | import org.embulk.config.Config;
6 | import org.embulk.config.ConfigDefault;
7 | import org.embulk.config.ConfigSource;
8 | import org.embulk.config.Task;
9 | import org.embulk.config.TaskSource;
10 | import org.embulk.spi.*;
11 | import org.embulk.spi.type.Type;
12 | import org.embulk.spi.type.Types;
13 | import org.slf4j.Logger;
14 |
15 | import java.util.HashMap;
16 | import java.util.List;
17 | import java.util.Map;
18 |
19 | public class MaskFilterPlugin implements FilterPlugin {
20 | private final Logger logger = Exec.getLogger(MaskFilterPlugin.class);
21 |
22 | public interface PluginTask extends Task {
23 | @Config("columns")
24 | List getColumns();
25 |
26 | }
27 |
28 | public interface MaskColumn extends Task {
29 | @Config("name")
30 | String getName();
31 |
32 | @Config("type")
33 | @ConfigDefault("\"all\"")
34 | Optional getType();
35 |
36 | @Config("pattern")
37 | @ConfigDefault("\"all\"")
38 | Optional getPattern();
39 |
40 | @Config("length")
41 | @ConfigDefault("null")
42 | Optional getLength();
43 |
44 | @Config("start")
45 | @ConfigDefault("null")
46 | Optional getStart();
47 |
48 | @Config("end")
49 | @ConfigDefault("null")
50 | Optional getEnd();
51 |
52 | @Config("paths")
53 | @ConfigDefault("null")
54 | Optional>> getPaths();
55 | }
56 |
57 | @Override
58 | public void transaction(ConfigSource config, Schema inputSchema,
59 | FilterPlugin.Control control) {
60 | PluginTask task = config.loadConfig(PluginTask.class);
61 | Schema outputSchema = buildOutputSchema(task, inputSchema);
62 | control.run(task.dump(), outputSchema);
63 | }
64 |
65 |
66 | private Schema buildOutputSchema(PluginTask task, Schema inputSchema) {
67 | ImmutableList.Builder builder = ImmutableList.builder();
68 |
69 | Map maskColumnMap = getMaskColumnMap(task);
70 | int i = 0;
71 | for (Column inputColumn : inputSchema.getColumns()) {
72 | String name = inputColumn.getName();
73 | Type type = (maskColumnMap.containsKey(name) && inputColumn.getType() != Types.JSON) ? Types.STRING : inputColumn.getType();
74 | Column outputColumn = new Column(i++, inputColumn.getName(), type);
75 | builder.add(outputColumn);
76 | }
77 |
78 | Schema outputSchema = new Schema(builder.build());
79 | return outputSchema;
80 | }
81 |
82 | public static Map getMaskColumnMap(PluginTask task) {
83 | Map maskColumnMap = new HashMap<>();
84 | for (MaskColumn maskColumn : task.getColumns()) {
85 | maskColumnMap.put(maskColumn.getName(), maskColumn);
86 | }
87 | return maskColumnMap;
88 | }
89 |
90 | @Override
91 | public PageOutput open(TaskSource taskSource, Schema inputSchema, Schema outputSchema, PageOutput output) {
92 | return new MaskPageOutput(taskSource, inputSchema, outputSchema, output);
93 | }
94 | }
95 |
--------------------------------------------------------------------------------
/src/main/java/org/embulk/filter/mask/MaskPageOutput.java:
--------------------------------------------------------------------------------
1 | package org.embulk.filter.mask;
2 |
3 | import com.fasterxml.jackson.databind.node.TextNode;
4 | import com.jayway.jsonpath.*;
5 | import org.apache.commons.lang3.StringUtils;
6 | import org.embulk.config.TaskSource;
7 | import org.embulk.spi.*;
8 | import org.embulk.spi.json.JsonParser;
9 | import org.embulk.spi.time.Timestamp;
10 | import org.embulk.spi.type.Type;
11 | import org.embulk.spi.type.Types;
12 | import org.embulk.filter.mask.MaskFilterPlugin.*;
13 | import org.msgpack.value.Value;
14 | import org.slf4j.Logger;
15 |
16 | import java.util.ArrayList;
17 | import java.util.HashMap;
18 | import java.util.List;
19 | import java.util.Map;
20 | import java.util.regex.Matcher;
21 | import java.util.regex.Pattern;
22 |
23 | public class MaskPageOutput implements PageOutput {
24 | private final MaskFilterPlugin.PluginTask task;
25 | private final Map outputColumnMap;
26 | private final List inputColumns;
27 | private final Map maskColumnMap;
28 | private final PageReader reader;
29 | private final PageBuilder builder;
30 | private final ParseContext parseContext;
31 | private final JsonParser jsonParser;
32 | private final Logger logger = Exec.getLogger(MaskPageOutput.class);
33 |
34 | public MaskPageOutput(TaskSource taskSource, Schema inputSchema, Schema outputSchema, PageOutput output) {
35 | this.task = taskSource.loadTask(MaskFilterPlugin.PluginTask.class);
36 | this.inputColumns = inputSchema.getColumns();
37 | this.maskColumnMap = MaskFilterPlugin.getMaskColumnMap(this.task);
38 | this.reader = new PageReader(inputSchema);
39 | this.builder = new PageBuilder(Exec.getBufferAllocator(), outputSchema, output);
40 | this.outputColumnMap = new HashMap<>();
41 | for (Column column : outputSchema.getColumns()) {
42 | this.outputColumnMap.put(column.getName(), column);
43 | }
44 | this.parseContext = initializeParseContext();
45 | this.jsonParser = new JsonParser();
46 | }
47 |
48 | private ParseContext initializeParseContext() {
49 | Configuration conf = Configuration.defaultConfiguration();
50 | conf = conf.addOptions(Option.DEFAULT_PATH_LEAF_TO_NULL);
51 | conf = conf.addOptions(Option.SUPPRESS_EXCEPTIONS);
52 | return JsonPath.using(conf);
53 | }
54 |
55 | @Override
56 | public void add(Page page) {
57 | reader.setPage(page);
58 | while (reader.nextRecord()) {
59 | setValue();
60 | builder.addRecord();
61 | }
62 | }
63 |
64 | private void setValue() {
65 | for (Column inputColumn : inputColumns) {
66 | if (reader.isNull(inputColumn)) {
67 | builder.setNull(inputColumn);
68 | continue;
69 | }
70 |
71 | String name = inputColumn.getName();
72 | Type type = inputColumn.getType();
73 |
74 | if (Types.STRING.equals(type)) {
75 | final String value = reader.getString(inputColumn);
76 | if (maskColumnMap.containsKey(name)) {
77 | builder.setString(inputColumn, maskAsString(name, value));
78 | } else {
79 | builder.setString(inputColumn, value);
80 | }
81 | } else if (Types.BOOLEAN.equals(type)) {
82 | final boolean value = reader.getBoolean(inputColumn);
83 | if (maskColumnMap.containsKey(name)) {
84 | builder.setString(inputColumn, maskAsString(name, value));
85 | } else {
86 | builder.setBoolean(inputColumn, value);
87 | }
88 | } else if (Types.DOUBLE.equals(type)) {
89 | final double value = reader.getDouble(inputColumn);
90 | if (maskColumnMap.containsKey(name)) {
91 | builder.setString(inputColumn, maskAsString(name, value));
92 | } else {
93 | builder.setDouble(inputColumn, value);
94 | }
95 | } else if (Types.LONG.equals(type)) {
96 | final long value = reader.getLong(inputColumn);
97 | if (maskColumnMap.containsKey(name)) {
98 | builder.setString(inputColumn, maskAsString(name, value));
99 | } else {
100 | builder.setLong(inputColumn, value);
101 | }
102 | } else if (Types.TIMESTAMP.equals(type)) {
103 | final Timestamp value = reader.getTimestamp(inputColumn);
104 | if (maskColumnMap.containsKey(name)) {
105 | builder.setString(inputColumn, maskAsString(name, value));
106 | } else {
107 | builder.setTimestamp(inputColumn, value);
108 | }
109 | } else if (Types.JSON.equals(type)) {
110 | final Value value = reader.getJson(inputColumn);
111 | if (maskColumnMap.containsKey(name)) {
112 | builder.setJson(inputColumn, maskAsJson(name, value));
113 | } else {
114 | builder.setJson(inputColumn, value);
115 | }
116 | } else {
117 | throw new DataException("Unexpected type:" + type);
118 | }
119 | }
120 | }
121 |
122 | private String maskAsString(String name, Object value) {
123 | MaskColumn maskColumn = maskColumnMap.get(name);
124 | String type = maskColumn.getType().get();
125 | String pattern = maskColumn.getPattern().or("");
126 | Integer length = maskColumn.getLength().or(-1);
127 | Integer start = maskColumn.getStart().or(-1);
128 | Integer end = maskColumn.getEnd().or(-1);
129 |
130 | return mask(type, value, pattern, length, start, end);
131 | }
132 |
133 | private Value maskAsJson(String name, Value value) {
134 | MaskColumn maskColumn = maskColumnMap.get(name);
135 | DocumentContext context = parseContext.parse(value.toJson());
136 | List