├── .circleci
└── config.yml
├── .gitignore
├── CHANGES.md
├── LICENSE.txt
├── README.md
├── build.gradle
├── circle.yml
├── gradle
└── wrapper
│ ├── gradle-wrapper.jar
│ └── gradle-wrapper.properties
├── gradlew
├── gradlew.bat
├── lib
└── embulk
│ ├── guess
│ └── apache-custom-log.rb
│ └── parser
│ └── apache-custom-log.rb
└── src
├── main
└── java
│ └── org
│ └── embulk
│ └── parser
│ ├── ApacheCustomLogParserPlugin.java
│ └── apache
│ └── log
│ ├── LogElement.java
│ ├── LogElementFactory.java
│ ├── LogFormats.java
│ ├── LongLogElement.java
│ ├── LongLogElementFactory.java
│ ├── Patterns.java
│ ├── Replacement.java
│ ├── SimpleDateFormatTimestampLogElement.java
│ ├── StringLogElement.java
│ ├── StringLogElementFactory.java
│ ├── TimestampLogElement.java
│ └── TimestampLogElementFactory.java
└── test
├── java
└── org
│ └── embulk
│ ├── parser
│ ├── TestApacheLogParserPlugin.java
│ └── apache
│ │ └── log
│ │ ├── LogFormatsTest.java
│ │ ├── PatternsTest.java
│ │ ├── StringLogElementFactoryTest.java
│ │ └── StringLogElementTest.java
│ └── tester
│ ├── DummyConfigSource.java
│ ├── EmbulkPluginTester.java
│ └── TestExtension.java
└── resources
├── META-INF
└── services
│ └── org.embulk.spi.Extension
├── data
├── access_log_2_combined
├── access_log_combined
├── access_log_common
└── access_log_custom_time_format
├── resource.txt
├── temp
└── dummy
└── yml
├── test_combined.yml
├── test_combined2.yml
├── test_common.yml
└── test_custom_time_format.yml
/.circleci/config.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | jobs:
3 | build:
4 | docker:
5 | - image: circleci/openjdk:8-jdk
6 | working_directory: ~/repo
7 | environment:
8 | GRADLE_USER_HOME: ~/repo/.gradle
9 | TERM: dumb
10 | steps:
11 | - checkout
12 | - restore_cache:
13 | key: jar-{{ checksum "build.gradle" }}
14 | - run:
15 | command: ./gradlew -s package
16 | - save_cache:
17 | key: jar-{{ checksum "build.gradle" }}
18 | paths:
19 | - .gradle
20 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *~
2 | /pkg/
3 | /tmp/
4 | *.gemspec
5 | .gradle/
6 | /classpath/
7 | build/
8 | .idea
9 | /*.gem
10 | /*.iml
11 | /out/
12 |
--------------------------------------------------------------------------------
/CHANGES.md:
--------------------------------------------------------------------------------
1 | 0.2.0 (2015-10-01)
2 | ------------------
3 | * allow customlog format configuration.
4 |
5 |
6 |
7 | 0.1.0 (2015-05-30)
8 | ------------------
9 | * First release.
10 |
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 |
2 | MIT License
3 |
4 | Permission is hereby granted, free of charge, to any person obtaining
5 | a copy of this software and associated documentation files (the
6 | "Software"), to deal in the Software without restriction, including
7 | without limitation the rights to use, copy, modify, merge, publish,
8 | distribute, sublicense, and/or sell copies of the Software, and to
9 | permit persons to whom the Software is furnished to do so, subject to
10 | the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be
13 | included in all copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Apache **CustomLog** parser plugin for Embulk
2 |
3 | 
4 |
5 | Embulk parser plugin for apache **CustomLog**.
6 |
7 | Parser configuration based [Apache HTTPD 2.2 CustomLogFormat](http://httpd.apache.org/docs/2.2/en/mod/mod_log_config.html#formats)
8 |
9 | ## Overview
10 |
11 | * **Plugin type**: parser
12 | * **Guess supported**: no
13 |
14 | ## Configuration
15 |
16 | - **format**: Apache CustomLog Format (string, required)
17 |
18 | see: http://httpd.apache.org/docs/2.2/en/mod/mod_log_config.html#customlog
19 |
20 | each format key
21 |
22 | ## Example
23 |
24 | ```yaml
25 | in:
26 | type: any file input plugin type
27 | parser:
28 | type: apache-custom-log
29 | format: "%v %{X-Forwarded-For}i %l %u %t \"%m %U%q %H\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\" %I %O %D"
30 | ```
31 |
32 | ```
33 | $ embulk gem install embulk-parser-apache-custom-log
34 | ```
35 |
36 | ## Build
37 |
38 | ```
39 | $ ./gradlew gem
40 | ```
41 |
42 |
43 | ## Field / Column mappings
44 |
45 | see: [LogFormats.java](https://github.com/jami-i/embulk-parser-apache-custom-log/blob/develop/src/main/java/org/embulk/parser/apache/log/LogFormats.java)
46 |
47 | |flag| type | column-name |parameter|
48 | |:--:|-----------|-------------------------|:---------|
49 | | a | String | remote-ip | |
50 | | A | String | local-ip | |
51 | | b | Long | response-bytes | |
52 | | B | Long | response-bytes | |
53 | | C | String | request-cookie | cookie name ex: ```%{SESSIONID}C``` => request-cookie-SESSIONID|
54 | | D | Long | request-process-time-us | |
55 | | e | String | env | environment variable name ex: ```%{UNIQUE_ID}C``` => env-UNIQUE_ID|
56 | | f | String | file-name | |
57 | | h | String | remote-host | |
58 | | H | String | request-protocol | |
59 | | i | String | request-header | request header name ex: ```%{User-Agent}i``` => request-header-User-Agent|
60 | | l | String | remote-log-name | |
61 | | m | String | request-method | |
62 | | n | String | module-note | |
63 | | o | String | response-header | response header name ex: ```%{Location}o``` => response-header-Location|
64 | | p | Long | request-port | |
65 | | P | Long | request-process | |
66 | | q | String | request-query | |
67 | | r | String | request-line | |
68 | | s | Long | response-status | |
69 | | t | Timestamp | request-time | timestamp format defined in [strptime](http://docs.ruby-lang.org/en/2.0.0/DateTime.html#method-c-_strptime).
if format is not present, DateTimeFormatter class will be used. |
70 | | T | Long | request-process-time-s | |
71 | | u | String | request-user | |
72 | | U | String | request-path | |
73 | | v | String | request-server-name | |
74 | | V | String | canonical-server-name | |
75 | | X | String | connection-status | |
76 | | I | Long | request-total-bytes | |
77 | | O | Long | response-total-bytes | |
78 | | % | String | % | |
79 |
--------------------------------------------------------------------------------
/build.gradle:
--------------------------------------------------------------------------------
1 | plugins {
2 | id "com.jfrog.bintray" version "1.1"
3 | id "com.github.jruby-gradle.base" version "0.1.5"
4 | id "java"
5 | }
6 | import com.github.jrubygradle.JRubyExec
7 | repositories {
8 | mavenCentral()
9 | jcenter()
10 | }
11 | configurations {
12 | provided
13 | }
14 |
15 | version = "0.4.1"
16 |
17 | dependencies {
18 | compile "org.embulk:embulk-core:0.7.4"
19 | provided "org.embulk:embulk-core:0.7.4"
20 | // compile "YOUR_JAR_DEPENDENCY_GROUP:YOUR_JAR_DEPENDENCY_MODULE:YOUR_JAR_DEPENDENCY_VERSION"
21 | testCompile "junit:junit:4.+"
22 | testCompile "org.embulk:embulk-standards:0.7.4"
23 | testCompile "org.embulk:embulk-core:0.7.4:tests"
24 | }
25 |
26 | task classpath(type: Copy, dependsOn: ["jar"]) {
27 | doFirst { file("classpath").deleteDir() }
28 | from (configurations.runtime - configurations.provided + files(jar.archivePath))
29 | into "classpath"
30 | }
31 | clean { delete "classpath" }
32 |
33 | task gem(type: JRubyExec, dependsOn: ["gemspec", "classpath"]) {
34 | jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "build"
35 | script "${project.name}.gemspec"
36 | doLast { ant.move(file: "${project.name}-${project.version}.gem", todir: "pkg") }
37 | }
38 |
39 | task gemPush(type: JRubyExec, dependsOn: ["gem"]) {
40 | jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "push"
41 | script "pkg/${project.name}-${project.version}.gem"
42 | }
43 |
44 | task "package"(dependsOn: ["gemspec", "classpath"]) << {
45 | println "> Build succeeded."
46 | println "> You can run embulk with '-L ${file(".").absolutePath}' argument."
47 | }
48 |
49 | task gemspec {
50 | ext.gemspecFile = file("${project.name}.gemspec")
51 | inputs.file "build.gradle"
52 | outputs.file gemspecFile
53 | doLast { gemspecFile.write($/
54 | Gem::Specification.new do |spec|
55 | spec.name = "${project.name}"
56 | spec.version = "${project.version}"
57 | spec.authors = ["Hiroyuki Sato", "Osamu Ishikawa"]
58 | spec.summary = %[Apache Custom Log parser plugin for Embulk]
59 | spec.description = %[Parses Apache Custom Log files read by other file input plugins.]
60 | spec.email = ["hiroysato@gmail.com", "bass.duo@gmail.com"]
61 | spec.licenses = ["MIT"]
62 | spec.homepage = "https://github.com/jami-i/embulk-parser-apache-custom-log"
63 |
64 | spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
65 | spec.test_files = spec.files.grep(%r"^(test|spec)/")
66 | spec.require_paths = ["lib"]
67 |
68 | #spec.add_dependency 'YOUR_GEM_DEPENDENCY', ['~> YOUR_GEM_DEPENDENCY_VERSION']
69 | spec.add_development_dependency 'bundler', ['~> 1.0']
70 | spec.add_development_dependency 'rake', ['>= 10.0']
71 | end
72 | /$)
73 | }
74 | }
75 | clean { delete "${project.name}.gemspec" }
76 |
--------------------------------------------------------------------------------
/circle.yml:
--------------------------------------------------------------------------------
1 | machine:
2 | java:
3 | version: oraclejdk8
4 |
--------------------------------------------------------------------------------
/gradle/wrapper/gradle-wrapper.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jami-i/embulk-parser-apache-custom-log/2ca9bdbead7217a97be015fcffe6798f86587f52/gradle/wrapper/gradle-wrapper.jar
--------------------------------------------------------------------------------
/gradle/wrapper/gradle-wrapper.properties:
--------------------------------------------------------------------------------
1 | #Tue Aug 11 00:26:20 PDT 2015
2 | distributionBase=GRADLE_USER_HOME
3 | distributionPath=wrapper/dists
4 | zipStoreBase=GRADLE_USER_HOME
5 | zipStorePath=wrapper/dists
6 | distributionUrl=https\://services.gradle.org/distributions/gradle-2.6-bin.zip
7 |
--------------------------------------------------------------------------------
/gradlew:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | ##############################################################################
4 | ##
5 | ## Gradle start up script for UN*X
6 | ##
7 | ##############################################################################
8 |
9 | # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
10 | DEFAULT_JVM_OPTS=""
11 |
12 | APP_NAME="Gradle"
13 | APP_BASE_NAME=`basename "$0"`
14 |
15 | # Use the maximum available, or set MAX_FD != -1 to use that value.
16 | MAX_FD="maximum"
17 |
18 | warn ( ) {
19 | echo "$*"
20 | }
21 |
22 | die ( ) {
23 | echo
24 | echo "$*"
25 | echo
26 | exit 1
27 | }
28 |
29 | # OS specific support (must be 'true' or 'false').
30 | cygwin=false
31 | msys=false
32 | darwin=false
33 | case "`uname`" in
34 | CYGWIN* )
35 | cygwin=true
36 | ;;
37 | Darwin* )
38 | darwin=true
39 | ;;
40 | MINGW* )
41 | msys=true
42 | ;;
43 | esac
44 |
45 | # For Cygwin, ensure paths are in UNIX format before anything is touched.
46 | if $cygwin ; then
47 | [ -n "$JAVA_HOME" ] && JAVA_HOME=`cygpath --unix "$JAVA_HOME"`
48 | fi
49 |
50 | # Attempt to set APP_HOME
51 | # Resolve links: $0 may be a link
52 | PRG="$0"
53 | # Need this for relative symlinks.
54 | while [ -h "$PRG" ] ; do
55 | ls=`ls -ld "$PRG"`
56 | link=`expr "$ls" : '.*-> \(.*\)$'`
57 | if expr "$link" : '/.*' > /dev/null; then
58 | PRG="$link"
59 | else
60 | PRG=`dirname "$PRG"`"/$link"
61 | fi
62 | done
63 | SAVED="`pwd`"
64 | cd "`dirname \"$PRG\"`/" >&-
65 | APP_HOME="`pwd -P`"
66 | cd "$SAVED" >&-
67 |
68 | CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
69 |
70 | # Determine the Java command to use to start the JVM.
71 | if [ -n "$JAVA_HOME" ] ; then
72 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
73 | # IBM's JDK on AIX uses strange locations for the executables
74 | JAVACMD="$JAVA_HOME/jre/sh/java"
75 | else
76 | JAVACMD="$JAVA_HOME/bin/java"
77 | fi
78 | if [ ! -x "$JAVACMD" ] ; then
79 | die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
80 |
81 | Please set the JAVA_HOME variable in your environment to match the
82 | location of your Java installation."
83 | fi
84 | else
85 | JAVACMD="java"
86 | which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
87 |
88 | Please set the JAVA_HOME variable in your environment to match the
89 | location of your Java installation."
90 | fi
91 |
92 | # Increase the maximum file descriptors if we can.
93 | if [ "$cygwin" = "false" -a "$darwin" = "false" ] ; then
94 | MAX_FD_LIMIT=`ulimit -H -n`
95 | if [ $? -eq 0 ] ; then
96 | if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
97 | MAX_FD="$MAX_FD_LIMIT"
98 | fi
99 | ulimit -n $MAX_FD
100 | if [ $? -ne 0 ] ; then
101 | warn "Could not set maximum file descriptor limit: $MAX_FD"
102 | fi
103 | else
104 | warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
105 | fi
106 | fi
107 |
108 | # For Darwin, add options to specify how the application appears in the dock
109 | if $darwin; then
110 | GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
111 | fi
112 |
113 | # For Cygwin, switch paths to Windows format before running java
114 | if $cygwin ; then
115 | APP_HOME=`cygpath --path --mixed "$APP_HOME"`
116 | CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
117 |
118 | # We build the pattern for arguments to be converted via cygpath
119 | ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
120 | SEP=""
121 | for dir in $ROOTDIRSRAW ; do
122 | ROOTDIRS="$ROOTDIRS$SEP$dir"
123 | SEP="|"
124 | done
125 | OURCYGPATTERN="(^($ROOTDIRS))"
126 | # Add a user-defined pattern to the cygpath arguments
127 | if [ "$GRADLE_CYGPATTERN" != "" ] ; then
128 | OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
129 | fi
130 | # Now convert the arguments - kludge to limit ourselves to /bin/sh
131 | i=0
132 | for arg in "$@" ; do
133 | CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
134 | CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option
135 |
136 | if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition
137 | eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
138 | else
139 | eval `echo args$i`="\"$arg\""
140 | fi
141 | i=$((i+1))
142 | done
143 | case $i in
144 | (0) set -- ;;
145 | (1) set -- "$args0" ;;
146 | (2) set -- "$args0" "$args1" ;;
147 | (3) set -- "$args0" "$args1" "$args2" ;;
148 | (4) set -- "$args0" "$args1" "$args2" "$args3" ;;
149 | (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
150 | (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
151 | (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
152 | (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
153 | (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
154 | esac
155 | fi
156 |
157 | # Split up the JVM_OPTS And GRADLE_OPTS values into an array, following the shell quoting and substitution rules
158 | function splitJvmOpts() {
159 | JVM_OPTS=("$@")
160 | }
161 | eval splitJvmOpts $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS
162 | JVM_OPTS[${#JVM_OPTS[*]}]="-Dorg.gradle.appname=$APP_BASE_NAME"
163 |
164 | exec "$JAVACMD" "${JVM_OPTS[@]}" -classpath "$CLASSPATH" org.gradle.wrapper.GradleWrapperMain "$@"
165 |
--------------------------------------------------------------------------------
/gradlew.bat:
--------------------------------------------------------------------------------
1 | @if "%DEBUG%" == "" @echo off
2 | @rem ##########################################################################
3 | @rem
4 | @rem Gradle startup script for Windows
5 | @rem
6 | @rem ##########################################################################
7 |
8 | @rem Set local scope for the variables with windows NT shell
9 | if "%OS%"=="Windows_NT" setlocal
10 |
11 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
12 | set DEFAULT_JVM_OPTS=
13 |
14 | set DIRNAME=%~dp0
15 | if "%DIRNAME%" == "" set DIRNAME=.
16 | set APP_BASE_NAME=%~n0
17 | set APP_HOME=%DIRNAME%
18 |
19 | @rem Find java.exe
20 | if defined JAVA_HOME goto findJavaFromJavaHome
21 |
22 | set JAVA_EXE=java.exe
23 | %JAVA_EXE% -version >NUL 2>&1
24 | if "%ERRORLEVEL%" == "0" goto init
25 |
26 | echo.
27 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
28 | echo.
29 | echo Please set the JAVA_HOME variable in your environment to match the
30 | echo location of your Java installation.
31 |
32 | goto fail
33 |
34 | :findJavaFromJavaHome
35 | set JAVA_HOME=%JAVA_HOME:"=%
36 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe
37 |
38 | if exist "%JAVA_EXE%" goto init
39 |
40 | echo.
41 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
42 | echo.
43 | echo Please set the JAVA_HOME variable in your environment to match the
44 | echo location of your Java installation.
45 |
46 | goto fail
47 |
48 | :init
49 | @rem Get command-line arguments, handling Windowz variants
50 |
51 | if not "%OS%" == "Windows_NT" goto win9xME_args
52 | if "%@eval[2+2]" == "4" goto 4NT_args
53 |
54 | :win9xME_args
55 | @rem Slurp the command line arguments.
56 | set CMD_LINE_ARGS=
57 | set _SKIP=2
58 |
59 | :win9xME_args_slurp
60 | if "x%~1" == "x" goto execute
61 |
62 | set CMD_LINE_ARGS=%*
63 | goto execute
64 |
65 | :4NT_args
66 | @rem Get arguments from the 4NT Shell from JP Software
67 | set CMD_LINE_ARGS=%$
68 |
69 | :execute
70 | @rem Setup the command line
71 |
72 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
73 |
74 | @rem Execute Gradle
75 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
76 |
77 | :end
78 | @rem End local scope for the variables with windows NT shell
79 | if "%ERRORLEVEL%"=="0" goto mainEnd
80 |
81 | :fail
82 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
83 | rem the _cmd.exe /c_ return code!
84 | if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
85 | exit /b 1
86 |
87 | :mainEnd
88 | if "%OS%"=="Windows_NT" endlocal
89 |
90 | :omega
91 |
--------------------------------------------------------------------------------
/lib/embulk/guess/apache-custom-log.rb:
--------------------------------------------------------------------------------
1 | module Embulk
2 | module Guess
3 |
4 | # TODO implement guess plugin to make this command work:
5 | # $ embulk guess -g "apache-log" partial-config.yml
6 | #
7 | # Depending on the file format the plugin uses, you can use choose
8 | # one of binary guess (GuessPlugin), text guess (TextGuessPlugin),
9 | # or line guess (LineGuessPlugin).
10 |
11 | #class ApacheCustomLogParserGuessPlugin < GuessPlugin
12 | # Plugin.register_guess("apache-log", self)
13 | #
14 | # def guess(config, sample_buffer)
15 | # if sample_buffer[0,2] == GZIP_HEADER
16 | # guessed = {}
17 | # guessed["type"] = "apache-log"
18 | # guessed["property1"] = "guessed-value"
19 | # return {"parser" => guessed}
20 | # else
21 | # return {}
22 | # end
23 | # end
24 | #end
25 |
26 | #class ApacheLogParserGuessPlugin < TextGuessPlugin
27 | # Plugin.register_guess("apache-log", self)
28 | #
29 | # def guess_text(config, sample_text)
30 | # js = JSON.parse(sample_text) rescue nil
31 | # if js && js["mykeyword"] == "keyword"
32 | # guessed = {}
33 | # guessed["type"] = "apache-log"
34 | # guessed["property1"] = "guessed-value"
35 | # return {"parser" => guessed}
36 | # else
37 | # return {}
38 | # end
39 | # end
40 | #end
41 |
42 | #class ApacheLogParserGuessPlugin < LineGuessPlugin
43 | # Plugin.register_guess("apache-log", self)
44 | #
45 | # def guess_lines(config, sample_lines)
46 | # all_line_matched = sample_lines.all? do |line|
47 | # line =~ /mypattern/
48 | # end
49 | # if all_line_matched
50 | # guessed = {}
51 | # guessed["type"] = "apache-log"
52 | # guessed["property1"] = "guessed-value"
53 | # return {"parser" => guessed}
54 | # else
55 | # return {}
56 | # end
57 | # end
58 | #end
59 |
60 | end
61 | end
62 |
--------------------------------------------------------------------------------
/lib/embulk/parser/apache-custom-log.rb:
--------------------------------------------------------------------------------
1 | Embulk::JavaPlugin.register_parser(
2 | "apache-custom-log", "org.embulk.parser.ApacheCustomLogParserPlugin",
3 | File.expand_path('../../../../classpath', __FILE__))
4 |
--------------------------------------------------------------------------------
/src/main/java/org/embulk/parser/ApacheCustomLogParserPlugin.java:
--------------------------------------------------------------------------------
1 | package org.embulk.parser;
2 |
3 | import com.google.common.collect.Lists;
4 | import org.embulk.config.Config;
5 | import org.embulk.config.ConfigSource;
6 | import org.embulk.config.Task;
7 | import org.embulk.config.TaskSource;
8 | import org.embulk.parser.apache.log.LogElement;
9 | import org.embulk.parser.apache.log.LogFormats;
10 | import org.embulk.parser.apache.log.Replacement;
11 | import org.embulk.spi.*;
12 | import org.embulk.spi.time.TimestampParser;
13 | import org.embulk.spi.util.LineDecoder;
14 | import org.slf4j.Logger;
15 | import org.slf4j.LoggerFactory;
16 |
17 | import java.util.ArrayList;
18 | import java.util.List;
19 | import java.util.regex.Matcher;
20 | import java.util.regex.Pattern;
21 |
22 | public class ApacheCustomLogParserPlugin
23 | implements ParserPlugin
24 | {
25 |
26 | private static final Logger logger = LoggerFactory.getLogger(ApacheCustomLogParserPlugin.class);
27 |
28 | public interface PluginTask
29 | extends Task, LineDecoder.DecoderTask, TimestampParser.Task
30 | {
31 |
32 | @Config("format")
33 | String getFormat();
34 |
35 | }
36 |
37 | @Override
38 | public void transaction(ConfigSource config, ParserPlugin.Control control)
39 | {
40 | PluginTask task = config.loadConfig(PluginTask.class);
41 | ArrayList columns = Lists.newArrayList();
42 | final String format = task.getFormat();
43 |
44 | List replacements = new LogFormats(task).getReplacements(format);
45 |
46 | for (Replacement replacement : replacements) {
47 | LogElement> logElement = replacement.getLogElement();
48 | columns.add(logElement.getColumnConfig(config));
49 | }
50 |
51 | Schema schema = new SchemaConfig(columns).toSchema();
52 | control.run(task.dump(), schema);
53 | }
54 |
55 | @Override
56 | public void run(TaskSource taskSource, Schema schema,
57 | FileInput input, PageOutput output)
58 | {
59 | PluginTask task = taskSource.loadTask(PluginTask.class);
60 | LineDecoder lineDecoder = new LineDecoder(input,task);
61 | PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), schema, output);
62 | String line;
63 | final String format = task.getFormat();
64 | LogFormats logFormats = new LogFormats(task);
65 |
66 | List replacements = logFormats.getReplacements(format);
67 |
68 | String regexp = logFormats.logFormat2RegexpString(format);
69 |
70 | logger.info("LogFormat : " + format);
71 | logger.info("RegExp : " + regexp);
72 |
73 | Pattern accessLogPattern = Pattern.compile("^" + regexp + "$", Pattern.CASE_INSENSITIVE | Pattern.DOTALL);
74 | Matcher accessLogEntryMatcher;
75 |
76 | int replacementSize = replacements.size();
77 |
78 | logger.info("replacement : " + replacementSize);
79 |
80 | while( input.nextFile() ){
81 | while(true){
82 | line = lineDecoder.poll();
83 |
84 | if (line == null) {
85 | break;
86 | }
87 |
88 | accessLogEntryMatcher = accessLogPattern.matcher(line);
89 |
90 | if(replacementSize != accessLogEntryMatcher.groupCount()){
91 | logger.warn("group count mismatch. + expected : " + replacementSize);
92 | }
93 |
94 | while(accessLogEntryMatcher.find()){
95 | for (int i = 0; i < replacementSize; i++) {
96 | LogElement> logElement = replacements.get(i).getLogElement();
97 | String value = accessLogEntryMatcher.group(i + 1);
98 |
99 | logElement.setToPageBuilder(pageBuilder, i, value);
100 | }
101 | }
102 |
103 | pageBuilder.addRecord();
104 | }
105 | }
106 | pageBuilder.finish();
107 | }
108 |
109 | }
110 |
--------------------------------------------------------------------------------
/src/main/java/org/embulk/parser/apache/log/LogElement.java:
--------------------------------------------------------------------------------
1 | package org.embulk.parser.apache.log;
2 |
3 |
4 | import org.embulk.config.ConfigSource;
5 | import org.embulk.spi.ColumnConfig;
6 | import org.embulk.spi.PageBuilder;
7 | import org.embulk.spi.type.Type;
8 |
9 | public abstract class LogElement {
10 |
11 | protected String name;
12 | protected String regexp;
13 | protected final Type outputType;
14 |
15 | public LogElement(String name, String regex, Type outputType){
16 | this.name = name;
17 | this.regexp = regex;
18 | this.outputType = outputType;
19 | }
20 |
21 | public String getName(){
22 | return name;
23 | }
24 |
25 | public String getRegexp() {
26 | return regexp;
27 | }
28 |
29 | public Type getOutputType() {
30 | return outputType;
31 | }
32 |
33 | public abstract T parse(String s);
34 |
35 | public abstract void setToPageBuilder(PageBuilder pageBuilder, int i, String value);
36 |
37 | public ColumnConfig getColumnConfig(ConfigSource config){
38 | return new ColumnConfig(name, outputType, config);
39 | }
40 |
41 | }
42 |
--------------------------------------------------------------------------------
/src/main/java/org/embulk/parser/apache/log/LogElementFactory.java:
--------------------------------------------------------------------------------
1 | package org.embulk.parser.apache.log;
2 |
3 |
4 | public interface LogElementFactory {
5 | T create(String parameter);
6 | }
7 |
--------------------------------------------------------------------------------
/src/main/java/org/embulk/parser/apache/log/LogFormats.java:
--------------------------------------------------------------------------------
1 | package org.embulk.parser.apache.log;
2 |
3 | import com.google.common.collect.Lists;
4 | import org.embulk.spi.time.TimestampParser;
5 |
6 | import java.util.*;
7 | import java.util.regex.Matcher;
8 | import java.util.regex.Pattern;
9 |
10 |
11 | public class LogFormats implements Patterns {
12 |
13 | TimestampParser.Task task;
14 |
15 | public LogFormats(TimestampParser.Task task) {
16 | this.task = task;
17 | }
18 |
19 | public Map> getLogElementMappings(){
20 |
21 | Map> mapping = new HashMap<>();
22 |
23 | mapping.put("a", new StringLogElementFactory("remote-ip", IP_ADDRESS));
24 | mapping.put("A", new StringLogElementFactory("local-ip", IP_ADDRESS));
25 | mapping.put("b", new LongLogElementFactory("response-bytes"));
26 | mapping.put("B", new LongLogElementFactory("response-bytes"));
27 | mapping.put("C", new StringLogElementFactory("request-cookie"));
28 | mapping.put("D", new LongLogElementFactory("request-process-time-us"));
29 | mapping.put("e", new StringLogElementFactory("env"));
30 | mapping.put("f", new StringLogElementFactory("file-name"));
31 | mapping.put("h", new StringLogElementFactory("remote-host"));
32 | mapping.put("H", new StringLogElementFactory("request-protocol", NON_SPACE));
33 | mapping.put("i", new StringLogElementFactory("request-header"));
34 | mapping.put("l", new StringLogElementFactory("remote-log-name", NON_SPACE));
35 | mapping.put("m", new StringLogElementFactory("request-method", METHOD));
36 |
37 | mapping.put("n", new StringLogElementFactory("module-note"));
38 | mapping.put("o", new StringLogElementFactory("response-header"));
39 |
40 | mapping.put("p", new LongLogElementFactory("request-port"));
41 |
42 | mapping.put("P", new LongLogElementFactory("request-process"));
43 |
44 | mapping.put("q", new StringLogElementFactory("request-query", QUERY));
45 |
46 | mapping.put("r", new StringLogElementFactory("request-line"));
47 | mapping.put("s", new LongLogElementFactory("response-status", STATUS));
48 |
49 | mapping.put("t", new TimestampLogElementFactory(task, "request-time"));
50 |
51 | mapping.put("T", new LongLogElementFactory("request-process-time-s"));
52 |
53 | mapping.put("u", new StringLogElementFactory("request-user"));
54 | mapping.put("U", new StringLogElementFactory("request-path", PATH));
55 | mapping.put("v", new StringLogElementFactory("request-server-name", NON_SPACE));
56 | mapping.put("V", new StringLogElementFactory("canonical-server-name", NON_SPACE));
57 | mapping.put("X", new StringLogElementFactory("connection-status", CONN_STATUS));
58 | mapping.put("I", new LongLogElementFactory("request-total-bytes"));
59 | mapping.put("O", new LongLogElementFactory("response-total-bytes"));
60 |
61 | mapping.put("%", new StringLogElementFactory("%", "(¥¥%)"));
62 |
63 | return mapping;
64 | }
65 |
66 | /**
67 | * RegExp pattern of extract log format key
68 | *
69 | * this pattern has 9 groups, which are described as below.
70 | *
71 | * (%((!)?(\d{3}(,\d{3})*))?(<|>)?(\{([^\}]+)\})?([A-z]))
72 | * | || | | | | | |- group(9) key
73 | * | || | | | | |------------- group(8) optional parameter
74 | * | || | | | |---------------- group(7) optional parameter wrapper group
75 | * | || | | |---------------------- group(6) logging timing parameter
76 | * | || | |---------------------------------- group(5) additional http status(es)
77 | * | || |---------------------------------------- group(4) http status(es)
78 | * | ||-------------------------------------------- group(3) inverse http status specifier
79 | * | |--------------------------------------------- group(2) http status specifier
80 | * |----------------------------------------------- group(0), group(1)
81 | *
82 | */
83 | public static final Pattern logFormatExtractor =
84 | Pattern.compile("(%((!)?(\\d{3}(,\\d{3})*))?(<|>)?(\\{([^\\}]+)\\})?([A-z]))",
85 | Pattern.DOTALL);
86 |
87 | /**
88 | * Convert logFormat String to Regexp String
89 | * @param logFormat apache custom log format
90 | * @return The pattern that matches CustomLog Configuration.
91 | *
92 | */
93 | public String logFormat2RegexpString(String logFormat){
94 | List replacements = getReplacements(logFormat);
95 | return replace(logFormat, replacements);
96 | }
97 |
98 | private String replace(String logFormat, List replacements) {
99 | int offset = 0;
100 |
101 | for (Replacement replacement : replacements) {
102 | String left = logFormat.substring(0, offset + replacement.getStart());
103 | String right = logFormat.substring(offset + replacement.getEnd(), logFormat.length());
104 | int originalLength = logFormat.length() - left.length() - right.length();
105 |
106 | String regexp = replacement.getLogElement().getRegexp();
107 | logFormat = left + regexp + right;
108 | offset += regexp.length() - originalLength;
109 | }
110 | return logFormat;
111 | }
112 |
113 | public List getReplacements(String logFormat) {
114 | Matcher matcher = logFormatExtractor.matcher(logFormat);
115 |
116 | List replacements = Lists.newArrayList();
117 |
118 | while(matcher.find()){
119 | if(matcher.groupCount() != 9){
120 | throw new IllegalArgumentException("invalid regexp pattern");
121 | }
122 | String all = empty(matcher.group(1));
123 |
124 | //TODO implement
125 | //String ignoreStatus = empty(matcher.group(3));
126 | //Object[] statuses = Arrays.stream(empty(matcher.group(4)).split(",")).toArray();
127 | //String position = empty(matcher.group(6));
128 |
129 | String parameter = matcher.group(8);
130 | String key = empty(matcher.group(9));
131 |
132 | LogElementFactory extends LogElement> factory = getLogElementMappings().get(key);
133 |
134 | if(factory != null){
135 | int start = matcher.start();
136 | int end = matcher.end();
137 | LogElement logElement = factory.create(parameter);
138 | replacements.add(new Replacement(start, end, logElement));
139 | }else{
140 | throw new IllegalStateException("unknown log format key " + all);
141 | }
142 |
143 | }
144 | return replacements;
145 | }
146 |
147 | private String empty(String s){
148 | return s == null ? "" : s;
149 | }
150 |
151 |
152 | }
153 |
--------------------------------------------------------------------------------
/src/main/java/org/embulk/parser/apache/log/LongLogElement.java:
--------------------------------------------------------------------------------
1 | package org.embulk.parser.apache.log;
2 |
3 |
4 | import org.embulk.spi.PageBuilder;
5 | import org.embulk.spi.type.Types;
6 |
7 | public class LongLogElement extends LogElement {
8 |
9 | public LongLogElement(String name, String regex) {
10 | super(name, regex, Types.LONG);
11 | }
12 |
13 | @Override
14 | public Long parse(String s) {
15 | try{
16 | if("-".equals(s)){
17 | return 0L;
18 | }
19 | return Long.parseLong(s);
20 | }catch (NumberFormatException e){
21 | return 0L;
22 | }
23 | }
24 |
25 | @Override
26 | public void setToPageBuilder(PageBuilder pageBuilder, int i, String value) {
27 | pageBuilder.setLong(i, parse(value));
28 | }
29 | }
30 |
--------------------------------------------------------------------------------
/src/main/java/org/embulk/parser/apache/log/LongLogElementFactory.java:
--------------------------------------------------------------------------------
1 | package org.embulk.parser.apache.log;
2 |
3 |
4 | import org.apache.commons.lang3.StringUtils;
5 |
6 | public class LongLogElementFactory implements LogElementFactory, Patterns {
7 |
8 | private String name;
9 | private String regexp;
10 |
11 | public LongLogElementFactory(String name, String regexp) {
12 | this.name = name;
13 | this.regexp = regexp;
14 | }
15 |
16 | public LongLogElementFactory(String name) {
17 | this.name = name;
18 | this.regexp = LONG;
19 | }
20 |
21 | @Override
22 | public LongLogElement create(String parameter) {
23 | if(StringUtils.isEmpty(parameter)){
24 | return new LongLogElement(name, regexp);
25 | }else {
26 | return new LongLogElement(name + "-" + parameter, regexp);
27 | }
28 |
29 | }
30 | }
31 |
--------------------------------------------------------------------------------
/src/main/java/org/embulk/parser/apache/log/Patterns.java:
--------------------------------------------------------------------------------
1 | package org.embulk.parser.apache.log;
2 |
3 | public interface Patterns {
4 |
5 | String NON_SPACE = "([^\\s]*)";
6 |
7 | String IP_ADDRESS = "(\\d+(?:\\.\\d+){3})";
8 |
9 | String LONG = "(-?\\d+|-)";
10 |
11 | String ANY = "(.*)";
12 |
13 | String PATH = "(/[^\\?]*)";
14 |
15 | String QUERY = "(\\?.*)?";
16 |
17 | String STATUS = "([1-9]\\d{2})";
18 |
19 | String METHOD = "(GET|POST|PUT|DELETE|HEAD|OPTIONS|TRACE|CONNECT)";
20 |
21 | String CONN_STATUS = "([X+\\-])";
22 |
23 | }
24 |
--------------------------------------------------------------------------------
/src/main/java/org/embulk/parser/apache/log/Replacement.java:
--------------------------------------------------------------------------------
1 | package org.embulk.parser.apache.log;
2 |
3 |
4 | public class Replacement {
5 | private final int start;
6 | private final int end;
7 | private final LogElement> logElement;
8 |
9 | public Replacement(int start, int end, LogElement> logElement) {
10 | this.logElement = logElement;
11 | this.end = end;
12 | this.start = start;
13 | }
14 |
15 | public int getStart() {
16 | return start;
17 | }
18 |
19 | public int getEnd() {
20 | return end;
21 | }
22 |
23 | public LogElement> getLogElement() {
24 | return logElement;
25 | }
26 |
27 | }
28 |
--------------------------------------------------------------------------------
/src/main/java/org/embulk/parser/apache/log/SimpleDateFormatTimestampLogElement.java:
--------------------------------------------------------------------------------
1 | package org.embulk.parser.apache.log;
2 |
3 | import org.embulk.spi.PageBuilder;
4 | import org.embulk.spi.time.Timestamp;
5 | import org.embulk.spi.time.TimestampParser;
6 | import org.joda.time.format.DateTimeFormat;
7 | import org.joda.time.format.DateTimeFormatter;
8 |
9 | import java.util.Locale;
10 |
11 |
12 | public class SimpleDateFormatTimestampLogElement extends TimestampLogElement {
13 |
14 | static final DateTimeFormatter formatter =
15 | DateTimeFormat
16 | .forPattern("dd/MMM/yyyy:HH:mm:ss Z")
17 | .withLocale(Locale.US);
18 |
19 | public SimpleDateFormatTimestampLogElement(TimestampParser.Task task, String name) {
20 | super(task, name, "\\[([^\\]]+)\\]", "");
21 | }
22 |
23 | @Override
24 | public Timestamp parse(String s) {
25 | try{
26 | long epoch = formatter.parseDateTime(s).getMillis();
27 | return Timestamp.ofEpochMilli(epoch);
28 | }catch (Exception e){
29 | return null;
30 | }
31 | }
32 |
33 | @Override
34 | public void setToPageBuilder(PageBuilder pageBuilder, int i, String value) {
35 | Timestamp parse = parse(value);
36 | if(parse != null){
37 | pageBuilder.setTimestamp(i, parse);
38 | }else{
39 | pageBuilder.setNull(i);
40 | }
41 | }
42 | }
43 |
--------------------------------------------------------------------------------
/src/main/java/org/embulk/parser/apache/log/StringLogElement.java:
--------------------------------------------------------------------------------
1 | package org.embulk.parser.apache.log;
2 |
3 |
4 | import org.embulk.spi.PageBuilder;
5 | import org.embulk.spi.type.Types;
6 |
7 | public class StringLogElement extends LogElement {
8 |
9 | public StringLogElement(String name, String regex) {
10 | super(name, regex, Types.STRING);
11 | }
12 |
13 | @Override
14 | public String parse(String s) {
15 | if("-".equals(s)){
16 | return null;
17 | }else{
18 | return s;
19 | }
20 |
21 | }
22 |
23 | @Override
24 | public void setToPageBuilder(PageBuilder pageBuilder, int i, String value) {
25 | String parsed = parse(value);
26 | if(parsed != null){
27 | pageBuilder.setString(i, parsed);
28 | }else{
29 | pageBuilder.setNull(i);
30 | }
31 |
32 | }
33 | }
34 |
--------------------------------------------------------------------------------
/src/main/java/org/embulk/parser/apache/log/StringLogElementFactory.java:
--------------------------------------------------------------------------------
1 | package org.embulk.parser.apache.log;
2 |
3 |
4 | import org.apache.commons.lang3.StringUtils;
5 |
6 | public class StringLogElementFactory implements LogElementFactory, Patterns {
7 |
8 | private String name;
9 | private String regexp;
10 |
11 | public StringLogElementFactory(String name, String regexp) {
12 | this.name = name;
13 | this.regexp = regexp;
14 | }
15 |
16 | public StringLogElementFactory(String name) {
17 | this.name = name;
18 | this.regexp = ANY;
19 | }
20 |
21 | @Override
22 | public StringLogElement create(String parameter) {
23 | if(StringUtils.isEmpty(parameter)){
24 | return new StringLogElement(name, regexp);
25 | }else{
26 | return new StringLogElement(name + "-" + parameter, regexp);
27 | }
28 | }
29 | }
30 |
--------------------------------------------------------------------------------
/src/main/java/org/embulk/parser/apache/log/TimestampLogElement.java:
--------------------------------------------------------------------------------
1 | package org.embulk.parser.apache.log;
2 |
3 | import org.embulk.config.ConfigSource;
4 | import org.embulk.config.Task;
5 | import org.embulk.spi.Exec;
6 | import org.embulk.spi.PageBuilder;
7 | import org.embulk.spi.time.Timestamp;
8 | import org.embulk.spi.time.TimestampParser;
9 |
10 |
11 | import static org.embulk.spi.type.Types.TIMESTAMP;
12 |
13 |
14 | public class TimestampLogElement extends LogElement {
15 |
16 | private final TimestampParser parser;
17 |
18 | public TimestampLogElement(TimestampParser.Task task, String name, String regex) {
19 | this(task, name, regex, "%d/%b/%Y:%T %z");
20 | }
21 |
22 | private static interface ParserIntlTask extends Task, TimestampParser.Task {}
23 | private static interface ParserIntlColumnOption extends Task, TimestampParser.TimestampColumnOption {}
24 |
25 | public TimestampLogElement(TimestampParser.Task task, String name, String regex, String pattern) {
26 | super(name, regex, TIMESTAMP);
27 | // TODO: Switch to a newer TimestampParser constructor after a reasonable interval.
28 | // Traditional constructor is used here for compatibility.
29 | final ConfigSource configSource = Exec.newConfigSource();
30 | configSource.set("format", pattern);
31 | configSource.set("timezone", task.getDefaultTimeZone());
32 | this.parser = new TimestampParser(
33 | Exec.newConfigSource().loadConfig(ParserIntlTask.class),
34 | configSource.loadConfig(ParserIntlColumnOption.class));
35 | }
36 |
37 | @Override
38 | public Timestamp parse(String s) {
39 | try{
40 | return parser.parse(s);
41 | }catch (Exception e){
42 | return null;
43 | }
44 | }
45 |
46 | @Override
47 | public void setToPageBuilder(PageBuilder pageBuilder, int i, String value) {
48 | Timestamp parse = parse(value);
49 | if(parse != null){
50 | pageBuilder.setTimestamp(i, parse);
51 | }else{
52 | pageBuilder.setNull(i);
53 | }
54 | }
55 | }
56 |
--------------------------------------------------------------------------------
/src/main/java/org/embulk/parser/apache/log/TimestampLogElementFactory.java:
--------------------------------------------------------------------------------
1 | package org.embulk.parser.apache.log;
2 |
3 |
4 | import org.apache.commons.lang3.StringUtils;
5 | import org.embulk.spi.time.TimestampParser;
6 | import org.slf4j.Logger;
7 | import org.slf4j.LoggerFactory;
8 |
9 | public class TimestampLogElementFactory implements LogElementFactory, Patterns {
10 |
11 | private static final Logger logger = LoggerFactory.getLogger(TimestampLogElementFactory.class);
12 |
13 | private TimestampParser.Task task;
14 | private String name;
15 |
16 | public TimestampLogElementFactory(TimestampParser.Task task, String name) {
17 | this.task = task;
18 | this.name = name;
19 | }
20 |
21 | @Override
22 | public TimestampLogElement create(String parameter) {
23 | if(StringUtils.isEmpty(parameter)){
24 | logger.info("since format parameter is not given, use DateTimeFormatter.");
25 | return new SimpleDateFormatTimestampLogElement(task, name);
26 | }else{
27 | String regex = toTimestampRegex(parameter);
28 | return new TimestampLogElement(task, name, regex, parameter);
29 | }
30 | }
31 |
32 | private String toTimestampRegex(String parameter) {
33 | String regex = "(" + parameter + ")";
34 | regex = regex.replaceAll("\\[|\\]","\\\\$0");
35 |
36 | regex = regex.replaceAll("%[abhpABPZ]","[A-z]+");
37 | regex = regex.replaceAll("%c","[A-z]{3} [A-z]{3} \\\\d{2} \\\\d{2}:\\\\d{2}:\\\\d{2} \\\\d{4}");
38 | regex = regex.replaceAll("%[dgmyCHIMSUVW]","\\\\d{2}");
39 | regex = regex.replaceAll("%[Dx]","\\\\d{2}/\\\\d{2}/\\\\d{2}");
40 | regex = regex.replaceAll("%[ekl]","[1-9 ]\\\\d");
41 | regex = regex.replaceAll("%F","\\\\d{4}-\\\\d{2}-\\\\d{2}");
42 | regex = regex.replaceAll("%[GY]","\\\\d{4}");
43 | regex = regex.replaceAll("%j","\\\\d{3}");
44 | regex = regex.replaceAll("%r","\\\\d{2}:\\\\d{2}:\\\\d{2} [A-z]+");
45 | regex = regex.replaceAll("%R","\\\\d{2}:\\\\d{2}");
46 | regex = regex.replaceAll("%s","\\\\d+");
47 | regex = regex.replaceAll("%[TX]","\\\\d{2}:\\\\d{2}:\\\\d{2}");
48 | regex = regex.replaceAll("%[uw]","\\\\d");
49 | regex = regex.replaceAll("%z","\\\\+\\\\d{4}");
50 |
51 | return regex;
52 | }
53 | }
54 |
--------------------------------------------------------------------------------
/src/test/java/org/embulk/parser/TestApacheLogParserPlugin.java:
--------------------------------------------------------------------------------
1 | package org.embulk.parser;
2 |
3 | import org.embulk.spi.ParserPlugin;
4 | import org.embulk.tester.EmbulkPluginTester;
5 | import org.junit.Test;
6 |
7 | import java.io.BufferedReader;
8 | import java.io.File;
9 | import java.io.FileReader;
10 | import java.io.IOException;
11 | import java.net.URISyntaxException;
12 | import java.util.Arrays;
13 | import java.util.function.Consumer;
14 |
15 | import static org.hamcrest.CoreMatchers.is;
16 | import static org.junit.Assert.assertThat;
17 |
18 | public class TestApacheLogParserPlugin {
19 |
20 | private static EmbulkPluginTester tester = new EmbulkPluginTester(ParserPlugin.class, "apache-log", ApacheCustomLogParserPlugin.class);
21 |
22 | @Test
23 | public void test_common() throws Exception {
24 | tester.run("/yml/test_common.yml");
25 |
26 | assertResult(
27 | "/temp/result_common.000.00.tsv",
28 | cols -> {
29 | String[] expected = new String[]{
30 | "remote-host",
31 | "remote-log-name",
32 | "request-user",
33 | "request-time",
34 | "request-line",
35 | "response-status",
36 | "response-bytes"
37 | };
38 | assertThat(cols, is(expected));
39 | },
40 | cols -> {
41 | String[] expected = new String[]{
42 | "127.0.0.1",
43 | "",
44 | "frank",
45 | "2000-10-10 20:55:36.000000 +0000",
46 | "GET /apache_pb.gif HTTP/1.0",
47 | "200",
48 | "2326"
49 | };
50 | assertThat(cols, is(expected));
51 | }
52 | );
53 |
54 | }
55 |
56 | @Test
57 | public void test_custom_time_format() throws Exception {
58 | tester.run("/yml/test_custom_time_format.yml");
59 |
60 | assertResult(
61 | "/temp/result_custom_time_format.000.00.tsv",
62 | cols -> {
63 | String[] expected = new String[]{
64 | "remote-host",
65 | "remote-log-name",
66 | "request-user",
67 | "request-time",
68 | "request-line",
69 | "response-status",
70 | "response-bytes"
71 | };
72 | assertThat(cols, is(expected));
73 | },
74 | cols -> {
75 | String[] expected = new String[]{
76 | "127.0.0.1",
77 | "",
78 | "frank",
79 | "2015-11-20 13:55:36.000000 +0000",
80 | "GET /apache_pb.gif HTTP/1.0",
81 | "200",
82 | "2326"
83 | };
84 | assertThat(cols, is(expected));
85 | }
86 | );
87 |
88 | }
89 |
90 | @Test
91 | public void test_combined() throws Exception {
92 | tester.run("/yml/test_combined.yml");
93 |
94 | assertResult(
95 | "/temp/result_combined.000.00.tsv",
96 | cols -> {
97 | String[] expected = new String[]{
98 | "remote-host",
99 | "remote-log-name",
100 | "request-user",
101 | "request-time",
102 | "request-line",
103 | "response-status",
104 | "response-bytes",
105 | "request-header-Referer",
106 | "request-header-User-agent"
107 | };
108 | assertThat(cols, is(expected));
109 | },
110 | cols -> {
111 | String[] expected = new String[]{
112 | "127.0.0.1",
113 | "",
114 | "frank",
115 | "2000-10-10 20:55:36.000000 +0000",
116 | "GET /apache_pb.gif HTTP/1.0",
117 | "200",
118 | "2326",
119 | "http://www.example.com/start.html",
120 | "Mozilla/4.08 [en] (Win98; I ;Nav)"
121 | };
122 | assertThat(cols, is(expected));
123 | }
124 | );
125 |
126 | }
127 |
128 | @Test
129 | public void test_combined2() throws Exception {
130 | tester.run("/yml/test_combined2.yml");
131 |
132 | assertResult(
133 | "/temp/result_2_combined.000.00.tsv",
134 | cols -> {
135 | String[] expected = new String[]{
136 | "remote-host",
137 | "remote-log-name",
138 | "request-user",
139 | "request-time",
140 | "request-method",
141 | "request-path",
142 | "request-query",
143 | "request-protocol",
144 | "response-status",
145 | "response-bytes",
146 | "request-header-Referer",
147 | "request-header-User-agent"
148 | };
149 | assertThat(cols, is(expected));
150 | },
151 | cols -> {
152 | String[] expected = new String[]{
153 | "24.93.39.209",
154 | "",
155 | "",
156 | "2015-07-25 06:31:32.000000 +0000",
157 | "POST",
158 | "/search/",
159 | "?c=Computers",
160 | "HTTP/1.1",
161 | "200",
162 | "88",
163 | "/category/health",
164 | "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; WOW64; Trident/4.0; GTB6; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30618; .NET4.0C)"
165 | };
166 | assertThat(cols, is(expected));
167 | }
168 | );
169 |
170 | }
171 |
172 | @SafeVarargs
173 | private final void assertResult(String path, Consumer headerAssert, Consumer... bodyHeadAsserts) throws URISyntaxException, IOException {
174 |
175 | File resultFile = new File(TestApacheLogParserPlugin.class.getResource(path).toURI());
176 |
177 | try (BufferedReader reader = new BufferedReader(new FileReader(resultFile))) {
178 |
179 | String[] headerLine = reader.readLine().split("\t");
180 |
181 | for (Consumer bodyHeadAssert : bodyHeadAsserts) {
182 | String[] bodyHeadLine = reader.readLine().split("\t");
183 |
184 | assertThat("body column length mismatch.", bodyHeadLine.length, is(headerLine.length));
185 |
186 | headerAssert.accept(headerLine);
187 |
188 | bodyHeadAssert.accept(bodyHeadLine);
189 | }
190 |
191 | }
192 |
193 |
194 | }
195 |
196 |
197 | }
198 |
--------------------------------------------------------------------------------
/src/test/java/org/embulk/parser/apache/log/LogFormatsTest.java:
--------------------------------------------------------------------------------
1 | package org.embulk.parser.apache.log;
2 |
3 | import org.embulk.EmbulkTestRuntime;
4 | import org.embulk.spi.time.TimestampParser;
5 | import org.joda.time.DateTimeZone;
6 | import org.jruby.embed.ScriptingContainer;
7 | import org.junit.Rule;
8 | import org.junit.Test;
9 |
10 | import java.util.regex.Pattern;
11 |
12 | public class LogFormatsTest {
13 |
14 | @Rule
15 | public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
16 |
17 | @Test
18 | public void testLogFormat2Regexp() throws Exception {
19 |
20 | String format = "%!100v %!100,200,300>{hogeHoge}v %v %{X-Forwarded-For}i %t %{%D}t %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\" %I %O %D";
21 |
22 | LogFormats logFormats = new LogFormats(new TimestampParser.Task() {
23 | @Override
24 | public DateTimeZone getDefaultTimeZone() {
25 | return DateTimeZone.UTC;
26 | }
27 |
28 | @Override
29 | public String getDefaultTimestampFormat() {
30 | return "\"%Y-%m-%d %H:%M:%S.%N %z\"";
31 | }
32 |
33 | @Override
34 | public ScriptingContainer getJRuby() {
35 | return new ScriptingContainer();
36 | }
37 | });
38 |
39 | String s = logFormats.logFormat2RegexpString(format);
40 |
41 | System.out.println(Pattern.quote(s));
42 |
43 |
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/src/test/java/org/embulk/parser/apache/log/PatternsTest.java:
--------------------------------------------------------------------------------
1 | package org.embulk.parser.apache.log;
2 |
3 | import junit.framework.TestCase;
4 | import org.junit.Test;
5 | import org.junit.experimental.runners.Enclosed;
6 | import org.junit.runner.RunWith;
7 |
8 | import java.util.regex.Pattern;
9 |
10 | import static org.hamcrest.CoreMatchers.is;
11 | import static org.junit.Assert.assertThat;
12 |
13 | @RunWith(Enclosed.class)
14 | public class PatternsTest extends TestCase {
15 |
16 | public static class Test_IP_ADDRESS {
17 | Pattern pattern = Pattern.compile(Patterns.IP_ADDRESS);
18 | @Test
19 | public void test_match_with_ipv4(){
20 | assertThat(pattern.matcher("127.0.0.1").matches(), is(true));
21 | assertThat(pattern.matcher("255.255.255.255").matches(), is(true));
22 | }
23 | }
24 |
25 | public static class Test_LONG {
26 | Pattern pattern = Pattern.compile(Patterns.LONG);
27 | @Test
28 | public void test_match_with_long(){
29 | assertThat(pattern.matcher("1").matches(), is(true));
30 | assertThat(pattern.matcher("-1").matches(), is(true));
31 | assertThat(pattern.matcher("-").matches(), is(true));
32 |
33 | assertThat(pattern.matcher("a").matches(), is(false));
34 | }
35 | }
36 |
37 | public static class Test_ANY {
38 | Pattern pattern = Pattern.compile(Patterns.ANY);
39 | @Test
40 | public void test_match_with_long(){
41 | assertThat(pattern.matcher("1").matches(), is(true));
42 | assertThat(pattern.matcher("-1").matches(), is(true));
43 | assertThat(pattern.matcher("-").matches(), is(true));
44 |
45 | assertThat(pattern.matcher("a").matches(), is(true));
46 | assertThat(pattern.matcher("").matches(), is(true));
47 | }
48 | }
49 |
50 | public static class Test_PATH {
51 | Pattern pattern = Pattern.compile(Patterns.PATH);
52 | @Test
53 | public void test_match_with_long(){
54 | assertThat(pattern.matcher("/1").matches(), is(true));
55 | assertThat(pattern.matcher("/abc/123").matches(), is(true));
56 |
57 | assertThat(pattern.matcher("").matches(), is(false));
58 | }
59 | }
60 |
61 | public static class Test_QUERY {
62 | Pattern pattern = Pattern.compile(Patterns.QUERY);
63 | @Test
64 | public void test_match_with_long(){
65 | assertThat(pattern.matcher("?1").matches(), is(true));
66 | assertThat(pattern.matcher("?abc=123&p=v#hash").matches(), is(true));
67 |
68 | assertThat(pattern.matcher("").matches(), is(true));
69 | }
70 | }
71 |
72 | public static class Test_STATUS {
73 | Pattern pattern = Pattern.compile(Patterns.STATUS);
74 | @Test
75 | public void test_match_with_long(){
76 | assertThat(pattern.matcher("100").matches(), is(true));
77 | assertThat(pattern.matcher("200").matches(), is(true));
78 | assertThat(pattern.matcher("302").matches(), is(true));
79 | assertThat(pattern.matcher("404").matches(), is(true));
80 | assertThat(pattern.matcher("500").matches(), is(true));
81 | assertThat(pattern.matcher("999").matches(), is(true));
82 |
83 | assertThat(pattern.matcher("99").matches(), is(false));
84 | assertThat(pattern.matcher("099").matches(), is(false));
85 | assertThat(pattern.matcher("1000").matches(), is(false));
86 | }
87 | }
88 |
89 | public static class Test_METHOD {
90 | Pattern pattern = Pattern.compile(Patterns.METHOD);
91 | @Test
92 | public void test_match_with_long(){
93 | assertThat(pattern.matcher("HEAD").matches(), is(true));
94 | assertThat(pattern.matcher("GET").matches(), is(true));
95 | assertThat(pattern.matcher("POST").matches(), is(true));
96 | assertThat(pattern.matcher("PUT").matches(), is(true));
97 | assertThat(pattern.matcher("OPTIONS").matches(), is(true));
98 | assertThat(pattern.matcher("TRACE").matches(), is(true));
99 | assertThat(pattern.matcher("CONNECT").matches(), is(true));
100 |
101 | assertThat(pattern.matcher("").matches(), is(false));
102 | assertThat(pattern.matcher("OTHER").matches(), is(false));
103 | }
104 | }
105 |
106 | public static class Test_CONN_STATUS {
107 | Pattern pattern = Pattern.compile(Patterns.CONN_STATUS);
108 | @Test
109 | public void test_match_with_long(){
110 | assertThat(pattern.matcher("X").matches(), is(true));
111 | assertThat(pattern.matcher("-").matches(), is(true));
112 | assertThat(pattern.matcher("+").matches(), is(true));
113 |
114 | assertThat(pattern.matcher("").matches(), is(false));
115 | assertThat(pattern.matcher("foo").matches(), is(false));
116 | }
117 | }
118 |
119 |
120 | }
--------------------------------------------------------------------------------
/src/test/java/org/embulk/parser/apache/log/StringLogElementFactoryTest.java:
--------------------------------------------------------------------------------
1 | package org.embulk.parser.apache.log;
2 |
3 | import junit.framework.TestCase;
4 | import org.junit.Test;
5 | import org.junit.experimental.runners.Enclosed;
6 | import org.junit.runner.RunWith;
7 |
8 | import static org.hamcrest.CoreMatchers.is;
9 | import static org.junit.Assert.assertThat;
10 |
11 | @RunWith(Enclosed.class)
12 | public class StringLogElementFactoryTest extends TestCase {
13 |
14 | public static class WithName{
15 |
16 | @Test
17 | public void testCreateWithNull() throws Exception {
18 |
19 | StringLogElementFactory factory = new StringLogElementFactory("test-name");
20 |
21 | StringLogElement logElement = factory.create(null);
22 |
23 | assertThat(logElement.getName(), is("test-name"));
24 | assertThat(logElement.getRegexp(), is("(.*)"));
25 |
26 | }
27 |
28 | @Test
29 | public void testCreateWithEmptyString() throws Exception {
30 |
31 | StringLogElementFactory factory = new StringLogElementFactory("test-name");
32 |
33 | StringLogElement logElement = factory.create(null);
34 |
35 | assertThat(logElement.getName(), is("test-name"));
36 | assertThat(logElement.getRegexp(), is("(.*)"));
37 |
38 | }
39 |
40 | @Test
41 | public void testCreateWithParameter() throws Exception {
42 |
43 | StringLogElementFactory factory = new StringLogElementFactory("test-name");
44 |
45 | StringLogElement logElement = factory.create("param");
46 |
47 | assertThat(logElement.getName(), is("test-name-param"));
48 | assertThat(logElement.getRegexp(), is("(.*)"));
49 |
50 | }
51 | }
52 |
53 | public static class WithNameAndRegexp{
54 |
55 | @Test
56 | public void testCreateWithNull() throws Exception {
57 |
58 | StringLogElementFactory factory = new StringLogElementFactory("test-name", "(.+)");
59 |
60 | StringLogElement logElement = factory.create(null);
61 |
62 | assertThat(logElement.getName(), is("test-name"));
63 | assertThat(logElement.getRegexp(), is("(.+)"));
64 |
65 | }
66 |
67 | @Test
68 | public void testCreateWithEmptyString() throws Exception {
69 |
70 | StringLogElementFactory factory = new StringLogElementFactory("test-name", "(.+)");
71 |
72 | StringLogElement logElement = factory.create(null);
73 |
74 | assertThat(logElement.getName(), is("test-name"));
75 | assertThat(logElement.getRegexp(), is("(.+)"));
76 |
77 | }
78 |
79 | @Test
80 | public void testCreateWithParameter() throws Exception {
81 |
82 | StringLogElementFactory factory = new StringLogElementFactory("test-name", "(.+)");
83 |
84 | StringLogElement logElement = factory.create("param");
85 |
86 | assertThat(logElement.getName(), is("test-name-param"));
87 | assertThat(logElement.getRegexp(), is("(.+)"));
88 |
89 | }
90 | }
91 | }
--------------------------------------------------------------------------------
/src/test/java/org/embulk/parser/apache/log/StringLogElementTest.java:
--------------------------------------------------------------------------------
1 | package org.embulk.parser.apache.log;
2 |
3 | import junit.framework.TestCase;
4 | import org.embulk.spi.PageBuilder;
5 | import org.junit.Test;
6 | import org.junit.experimental.runners.Enclosed;
7 | import org.junit.runner.RunWith;
8 |
9 | import static org.hamcrest.CoreMatchers.is;
10 | import static org.junit.Assert.assertThat;
11 |
12 | @RunWith(Enclosed.class)
13 | public class StringLogElementTest extends TestCase {
14 |
15 | public static class TestParse {
16 |
17 | StringLogElement elem = new StringLogElement("test-elem", "(.*)");
18 |
19 | @Test
20 | public void testParseWithNull() throws Exception {
21 | assertThat(elem.parse(null), is((String)null));
22 | }
23 |
24 | @Test
25 | public void testParseWithEmpty() throws Exception {
26 | assertThat(elem.parse(""), is(""));
27 | }
28 |
29 | @Test
30 | public void testParseWithNonEmptyString() throws Exception {
31 | assertThat(elem.parse("str"), is("str"));
32 | }
33 |
34 | @Test
35 | public void testParseWithCLFEmptyString() throws Exception {
36 | assertThat(elem.parse("-"), is((String)null));
37 | }
38 |
39 | }
40 |
41 | public static class TestSetToPageBuilder{
42 | @Test
43 | public void testSetToPageBuilder() throws Exception {
44 | //TODO implement
45 | }
46 | }
47 |
48 |
49 |
50 |
51 | }
--------------------------------------------------------------------------------
/src/test/java/org/embulk/tester/DummyConfigSource.java:
--------------------------------------------------------------------------------
1 | package org.embulk.tester;
2 |
3 | import com.fasterxml.jackson.databind.JsonNode;
4 | import com.fasterxml.jackson.databind.node.ObjectNode;
5 | import org.embulk.config.ConfigSource;
6 | import org.embulk.config.DataSource;
7 |
8 | import java.util.List;
9 | import java.util.Map;
10 |
11 | public class DummyConfigSource implements ConfigSource {
12 | @Override
13 | public T loadConfig(Class taskType) {
14 | return null;
15 | }
16 |
17 | @Override
18 | public List getAttributeNames() {
19 | return null;
20 | }
21 |
22 | @Override
23 | public Iterable> getAttributes() {
24 | return null;
25 | }
26 |
27 | @Override
28 | public boolean isEmpty() {
29 | return false;
30 | }
31 |
32 | @Override
33 | public boolean has(String attrName) {
34 | return false;
35 | }
36 |
37 | @Override
38 | public E get(Class type, String attrName) {
39 | return null;
40 | }
41 |
42 | @Override
43 | public E get(Class type, String attrName, E defaultValue) {
44 | return null;
45 | }
46 |
47 | @Override
48 | public ConfigSource getNested(String attrName) {
49 | return null;
50 | }
51 |
52 | @Override
53 | public ConfigSource getNestedOrSetEmpty(String attrName) {
54 | return null;
55 | }
56 |
57 | @Override
58 | public ConfigSource set(String attrName, Object v) {
59 | return null;
60 | }
61 |
62 | @Override
63 | public ConfigSource setNested(String attrName, DataSource v) {
64 | return null;
65 | }
66 |
67 | @Override
68 | public ConfigSource setAll(DataSource other) {
69 | return null;
70 | }
71 |
72 | @Override
73 | public ConfigSource remove(String attrName) {
74 | return null;
75 | }
76 |
77 | @Override
78 | public ConfigSource deepCopy() {
79 | return null;
80 | }
81 |
82 | @Override
83 | public ConfigSource merge(DataSource other) {
84 | return null;
85 | }
86 |
87 | @Override
88 | public ObjectNode getObjectNode() {
89 | return null;
90 | }
91 | }
92 |
--------------------------------------------------------------------------------
/src/test/java/org/embulk/tester/EmbulkPluginTester.java:
--------------------------------------------------------------------------------
1 | package org.embulk.tester;
2 |
3 | import java.io.BufferedReader;
4 | import java.io.BufferedWriter;
5 | import java.io.File;
6 | import java.io.FileReader;
7 | import java.io.FileWriter;
8 | import java.util.regex.Matcher;
9 | import java.util.regex.Pattern;
10 |
11 | import org.embulk.EmbulkEmbed;
12 | import org.embulk.config.ConfigLoader;
13 |
14 | public class EmbulkPluginTester {
15 |
16 | public EmbulkPluginTester(Class> iface, String name, Class> impl)
17 | {
18 | TestExtension.addPlugin(iface, name, impl);
19 | }
20 |
21 | public void run(String ymlPath) throws Exception
22 | {
23 | EmbulkEmbed.Bootstrap bootstrap = new EmbulkEmbed.Bootstrap();
24 |
25 | EmbulkEmbed embulk = bootstrap.initialize();
26 |
27 | ConfigLoader configLoader = new ConfigLoader(embulk.getModelManager());
28 | embulk.run(configLoader.fromYamlFile(new File(convert(ymlPath))));
29 |
30 |
31 | }
32 |
33 | private String convert(String yml) throws Exception
34 | {
35 | File rootPath = new File(EmbulkPluginTester.class.getResource("/resource.txt").toURI()).getParentFile();
36 | File ymlPath = new File(EmbulkPluginTester.class.getResource(yml).toURI());
37 | File tempYmlPath = new File(ymlPath.getParentFile(), "temp-" + ymlPath.getName());
38 | Pattern pathPrefixPattern = Pattern.compile("^ *path(_prefix)?: '(.*)'$");
39 | try (BufferedReader reader = new BufferedReader(new FileReader(ymlPath))) {
40 | try (BufferedWriter writer = new BufferedWriter(new FileWriter(tempYmlPath))) {
41 | String line;
42 | while ((line = reader.readLine()) != null) {
43 | Matcher matcher = pathPrefixPattern.matcher(line);
44 | if (matcher.matches()) {
45 | int group = 2;
46 | writer.write(line.substring(0, matcher.start(group)));
47 | writer.write(new File(rootPath, matcher.group(group)).getAbsolutePath());
48 | writer.write(line.substring(matcher.end(group)));
49 | } else {
50 | writer.write(line);
51 | }
52 | writer.newLine();
53 | }
54 | }
55 | }
56 | return tempYmlPath.getAbsolutePath();
57 | }
58 |
59 | }
60 |
--------------------------------------------------------------------------------
/src/test/java/org/embulk/tester/TestExtension.java:
--------------------------------------------------------------------------------
1 | package org.embulk.tester;
2 |
3 | import java.util.ArrayList;
4 | import java.util.List;
5 |
6 | import org.embulk.config.ConfigSource;
7 | import org.embulk.plugin.InjectedPluginSource;
8 | import org.embulk.spi.Extension;
9 |
10 | import com.google.common.collect.ImmutableList;
11 | import com.google.inject.Binder;
12 | import com.google.inject.Module;
13 |
14 |
15 | public class TestExtension implements Extension
16 | {
17 | private static class PluginDefinition
18 | {
19 | public final Class> iface;
20 | public final String name;
21 | public final Class> impl;
22 |
23 | public PluginDefinition(Class> iface, String name, Class> impl)
24 | {
25 | this.iface = iface;
26 | this.name = name;
27 | this.impl = impl;
28 | }
29 | }
30 |
31 | private static List plugins = new ArrayList<>();
32 |
33 | public static void addPlugin(Class> iface, String name, Class> impl)
34 | {
35 | plugins.add(new PluginDefinition(iface, name, impl));
36 | }
37 |
38 | @Override
39 | public List getModules(ConfigSource configsource) {
40 | Module module = new Module() {
41 |
42 | @Override
43 | public void configure(Binder binder) {
44 | for (PluginDefinition plugin : plugins) {
45 | InjectedPluginSource.registerPluginTo(binder, plugin.iface, plugin.name, plugin.impl);
46 | }
47 | }
48 | };
49 | return ImmutableList.of(module);
50 | }
51 |
52 | }
53 |
--------------------------------------------------------------------------------
/src/test/resources/META-INF/services/org.embulk.spi.Extension:
--------------------------------------------------------------------------------
1 | org.embulk.tester.TestExtension
2 |
--------------------------------------------------------------------------------
/src/test/resources/data/access_log_2_combined:
--------------------------------------------------------------------------------
1 | 24.93.39.209 - - [25/Jul/2015:15:31:32 +0900] "POST /search/?c=Computers HTTP/1.1" 200 88 "/category/health" "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; WOW64; Trident/4.0; GTB6; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30618; .NET4.0C)"
2 |
--------------------------------------------------------------------------------
/src/test/resources/data/access_log_combined:
--------------------------------------------------------------------------------
1 | 127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326 "http://www.example.com/start.html" "Mozilla/4.08 [en] (Win98; I ;Nav)"
2 |
3 |
--------------------------------------------------------------------------------
/src/test/resources/data/access_log_common:
--------------------------------------------------------------------------------
1 | 127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326
2 |
--------------------------------------------------------------------------------
/src/test/resources/data/access_log_custom_time_format:
--------------------------------------------------------------------------------
1 | 127.0.0.1 - frank [2015.11.20 13:55:36] "GET /apache_pb.gif HTTP/1.0" 200 2326
2 |
--------------------------------------------------------------------------------
/src/test/resources/resource.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jami-i/embulk-parser-apache-custom-log/2ca9bdbead7217a97be015fcffe6798f86587f52/src/test/resources/resource.txt
--------------------------------------------------------------------------------
/src/test/resources/temp/dummy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jami-i/embulk-parser-apache-custom-log/2ca9bdbead7217a97be015fcffe6798f86587f52/src/test/resources/temp/dummy
--------------------------------------------------------------------------------
/src/test/resources/yml/test_combined.yml:
--------------------------------------------------------------------------------
1 | in:
2 | type: file
3 | path_prefix: 'data/access_log_combined'
4 | parser:
5 | type: apache-log
6 | format: '%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\"'
7 | out:
8 | type: file
9 | path_prefix: '/temp/result_combined.'
10 | file_ext: tsv
11 | formatter:
12 | type: csv
13 | delimiter: "\t"
--------------------------------------------------------------------------------
/src/test/resources/yml/test_combined2.yml:
--------------------------------------------------------------------------------
1 | in:
2 | type: file
3 | path_prefix: 'data/access_log_2_combined'
4 | parser:
5 | type: apache-log
6 | format: '%h %l %u %t \"%m %U%q %H\" %>s %b \"%{Referer}i\" \"%{User-agent}i\"'
7 | out:
8 | type: file
9 | path_prefix: '/temp/result_2_combined.'
10 | file_ext: tsv
11 | formatter:
12 | type: csv
13 | delimiter: "\t"
--------------------------------------------------------------------------------
/src/test/resources/yml/test_common.yml:
--------------------------------------------------------------------------------
1 | in:
2 | type: file
3 | path_prefix: 'data/access_log_common'
4 | parser:
5 | type: apache-log
6 | format: '%h %l %u %t \"%r\" %>s %b'
7 | out:
8 | type: file
9 | path_prefix: '/temp/result_common.'
10 | file_ext: tsv
11 | formatter:
12 | type: csv
13 | delimiter: "\t"
--------------------------------------------------------------------------------
/src/test/resources/yml/test_custom_time_format.yml:
--------------------------------------------------------------------------------
1 | in:
2 | type: file
3 | path_prefix: 'data/access_log_custom_time_format'
4 | parser:
5 | type: apache-log
6 | format: '%h %l %u %{[%Y.%m.%d %T]}t \"%r\" %>s %b'
7 | out:
8 | type: file
9 | path_prefix: '/temp/result_custom_time_format.'
10 | file_ext: tsv
11 | formatter:
12 | type: csv
13 | delimiter: "\t"
--------------------------------------------------------------------------------