├── .gitignore ├── NOTICE ├── gradle └── wrapper │ ├── gradle-wrapper.jar │ └── gradle-wrapper.properties ├── config ├── README.md ├── checkstyle │ ├── copyright-header │ └── checkstyle.xml └── intellij │ └── codestyle.xml ├── gradle.properties ├── .github ├── PULL_REQUEST_TEMPLATE.md └── workflows │ └── main.yml ├── src ├── test │ ├── kotlin │ │ └── com │ │ │ └── amazon │ │ │ └── ionpathextraction │ │ │ ├── PathExtractorImplTest.kt │ │ │ ├── FsmPathExtractorTest.kt │ │ │ ├── ExampleTest.java │ │ │ └── PathExtractorTest.kt │ └── resources │ │ └── test-cases.ion ├── main │ └── java │ │ └── com │ │ └── amazon │ │ └── ionpathextraction │ │ ├── UnsupportedPathExpression.java │ │ ├── exceptions │ │ └── PathExtractionException.java │ │ ├── pathcomponents │ │ ├── Wildcard.java │ │ ├── Index.java │ │ ├── Text.java │ │ └── PathComponent.java │ │ ├── internal │ │ ├── PathExtractorConfig.java │ │ ├── MatchContext.java │ │ ├── Annotations.java │ │ └── Preconditions.java │ │ ├── FsmMatcher.java │ │ ├── SearchPath.java │ │ ├── PathExtractor.java │ │ ├── SearchPathParser.java │ │ ├── FsmPathExtractor.java │ │ ├── PathExtractorImpl.java │ │ ├── PathExtractorBuilder.java │ │ └── FsmMatcherBuilder.java └── jmh │ └── java │ └── com │ └── amazon │ └── ionpathextraction │ └── benchmarks │ └── PathExtractorBenchmark.java ├── CODE_OF_CONDUCT.md ├── settings.gradle ├── gradlew.bat ├── CONTRIBUTING.md ├── gradlew ├── README.md └── LICENSE /.gitignore: -------------------------------------------------------------------------------- 1 | .gradle/ 2 | .idea/ 3 | build 4 | out/ 5 | ion-c 6 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | Ion Java Path Extraction 2 | Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazon-ion/ion-java-path-extraction/HEAD/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /config/README.md: -------------------------------------------------------------------------------- 1 | # Configuration 2 | Project development configuration files, for example code style and checkstyle settings used on all modules -------------------------------------------------------------------------------- /gradle.properties: -------------------------------------------------------------------------------- 1 | signing.keyId=EMPTY 2 | signing.password=EMPTY 3 | signing.secretKeyRingFile=EMPTY 4 | 5 | ossrhUsername=EMPTY 6 | ossrhPassword=EMPTY 7 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | *Issue #, if available:* 2 | 3 | *Description of changes:* 4 | 5 | 6 | By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. 7 | -------------------------------------------------------------------------------- /src/test/kotlin/com/amazon/ionpathextraction/PathExtractorImplTest.kt: -------------------------------------------------------------------------------- 1 | package com.amazon.ionpathextraction 2 | 3 | class PathExtractorImplTest : PathExtractorTest() { 4 | override fun PathExtractorBuilder.buildExtractor(): PathExtractor = buildLegacy() 5 | } 6 | -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | #Mon Apr 08 15:47:47 PDT 2019 2 | distributionBase=GRADLE_USER_HOME 3 | distributionPath=wrapper/dists 4 | zipStoreBase=GRADLE_USER_HOME 5 | zipStorePath=wrapper/dists 6 | distributionUrl=https\://services.gradle.org/distributions/gradle-8.7-bin.zip 7 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 4 | opensource-codeofconduct@amazon.com with any additional questions or comments. 5 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: CI build 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | 10 | jobs: 11 | build: 12 | runs-on: ubuntu-latest 13 | strategy: 14 | matrix: 15 | java: [8, 9, 10, 11] 16 | steps: 17 | - uses: actions/checkout@v2 18 | with: 19 | submodules: recursive 20 | - name: Use java ${{ matrix.java }} 21 | uses: actions/setup-java@v1 22 | with: 23 | java-version: ${{ matrix.java }} 24 | - name: Build with Gradle 25 | run: ./gradlew build 26 | -------------------------------------------------------------------------------- /config/checkstyle/copyright-header: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | * Licensed under the Apache License, Version 2.0 (the "License"). 4 | * You may not use this file except in compliance with the License. 5 | * A copy of the License is located at: 6 | * 7 | * http://aws.amazon.com/apache2.0/ 8 | * 9 | * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific 11 | * language governing permissions and limitations under the License. 12 | */ -------------------------------------------------------------------------------- /settings.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | * Licensed under the Apache License, Version 2.0 (the "License"). 4 | * You may not use this file except in compliance with the License. 5 | * A copy of the License is located at: 6 | * 7 | * http://aws.amazon.com/apache2.0/ 8 | * 9 | * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific 11 | * language governing permissions and limitations under the License. 12 | */ 13 | 14 | rootProject.name = 'ion-java-path-extraction' 15 | 16 | -------------------------------------------------------------------------------- /src/main/java/com/amazon/ionpathextraction/UnsupportedPathExpression.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | * Licensed under the Apache License, Version 2.0 (the "License"). 4 | * You may not use this file except in compliance with the License. 5 | * A copy of the License is located at: 6 | * 7 | * http://aws.amazon.com/apache2.0/ 8 | * 9 | * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific 11 | * language governing permissions and limitations under the License. 12 | */ 13 | 14 | package com.amazon.ionpathextraction; 15 | 16 | /** 17 | * Thrown when trying to build a "strict" PathExtractor if a SearchPath or set of paths is not supported. 18 | * A user should rewrite their extraction to match the strictness invariant or use the "legacy" PathExtractor. 19 | */ 20 | public class UnsupportedPathExpression extends RuntimeException { 21 | public UnsupportedPathExpression(final String msg) { 22 | super(msg); 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /src/main/java/com/amazon/ionpathextraction/exceptions/PathExtractionException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | * Licensed under the Apache License, Version 2.0 (the "License"). 4 | * You may not use this file except in compliance with the License. 5 | * A copy of the License is located at: 6 | * 7 | * http://aws.amazon.com/apache2.0/ 8 | * 9 | * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific 11 | * language governing permissions and limitations under the License. 12 | */ 13 | 14 | package com.amazon.ionpathextraction.exceptions; 15 | 16 | /** 17 | * Base exception. 18 | */ 19 | public class PathExtractionException extends RuntimeException { 20 | 21 | public PathExtractionException(final String message) { 22 | super(message); 23 | } 24 | 25 | public PathExtractionException(final String message, final Throwable cause) { 26 | super(message, cause); 27 | } 28 | 29 | public PathExtractionException(final Throwable cause) { 30 | super(cause); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/main/java/com/amazon/ionpathextraction/pathcomponents/Wildcard.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | * Licensed under the Apache License, Version 2.0 (the "License"). 4 | * You may not use this file except in compliance with the License. 5 | * A copy of the License is located at: 6 | * 7 | * http://aws.amazon.com/apache2.0/ 8 | * 9 | * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific 11 | * language governing permissions and limitations under the License. 12 | */ 13 | 14 | package com.amazon.ionpathextraction.pathcomponents; 15 | 16 | import com.amazon.ionpathextraction.internal.Annotations; 17 | import com.amazon.ionpathextraction.internal.MatchContext; 18 | 19 | /** 20 | * Wildcard path component matches any value, example. 21 | *
22 |  * data: {foo: [1,2,3], bar: { baz: [1] }}
23 |  *
24 |  * search path | callback invoked with reader at
25 |  * ------------|--------------------
26 |  *  (*)        | [1, 2, 3] and { baz: [1] }
27 |  *  (* *)      | 1, 2, 3 and [1]
28 |  * 
29 | */ 30 | public final class Wildcard extends PathComponent { 31 | 32 | public static final String TEXT = "*"; 33 | 34 | public Wildcard(final String[] annotations) { 35 | this(new Annotations(annotations)); 36 | } 37 | 38 | public Wildcard(final Annotations annotations) { 39 | super(annotations); 40 | } 41 | 42 | public Wildcard() { 43 | this(Annotations.EMPTY); 44 | } 45 | 46 | @Override 47 | public boolean innerMatches(final MatchContext context) { 48 | return true; 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/main/java/com/amazon/ionpathextraction/internal/PathExtractorConfig.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | * Licensed under the Apache License, Version 2.0 (the "License"). 4 | * You may not use this file except in compliance with the License. 5 | * A copy of the License is located at: 6 | * 7 | * http://aws.amazon.com/apache2.0/ 8 | * 9 | * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific 11 | * language governing permissions and limitations under the License. 12 | */ 13 | 14 | package com.amazon.ionpathextraction.internal; 15 | 16 | /** 17 | * Internal only. Not intended for application use. 18 | */ 19 | public final class PathExtractorConfig { 20 | 21 | private final boolean matchRelativePaths; 22 | private final boolean matchCaseInsensitive; 23 | private final boolean matchFieldsCaseInsensitive; 24 | 25 | /** 26 | * Instantiate a PathExtractorConfig. 27 | */ 28 | public PathExtractorConfig( 29 | final boolean matchRelativePaths, 30 | final boolean matchCaseInsensitive, 31 | final boolean matchFieldsCaseInsensitive) { 32 | this.matchRelativePaths = matchRelativePaths; 33 | this.matchCaseInsensitive = matchCaseInsensitive; 34 | this.matchFieldsCaseInsensitive = matchFieldsCaseInsensitive; 35 | } 36 | 37 | public boolean isMatchRelativePaths() { 38 | return matchRelativePaths; 39 | } 40 | 41 | public boolean isMatchCaseInsensitive() { 42 | return matchCaseInsensitive; 43 | } 44 | 45 | public boolean isMatchFieldsCaseInsensitive() { 46 | return matchFieldsCaseInsensitive; 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /src/main/java/com/amazon/ionpathextraction/pathcomponents/Index.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | * Licensed under the Apache License, Version 2.0 (the "License"). 4 | * You may not use this file except in compliance with the License. 5 | * A copy of the License is located at: 6 | * 7 | * http://aws.amazon.com/apache2.0/ 8 | * 9 | * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific 11 | * language governing permissions and limitations under the License. 12 | */ 13 | 14 | package com.amazon.ionpathextraction.pathcomponents; 15 | 16 | import com.amazon.ionpathextraction.internal.Annotations; 17 | import com.amazon.ionpathextraction.internal.MatchContext; 18 | 19 | /** 20 | * Index path component matches collection by position, example. 21 | *
22 |  * data: {foo: [1,2,3], bar: { baz: [1] }}
23 |  *
24 |  * search path | callback invoked with reader at
25 |  * ------------|--------------------
26 |  *  (0)        | [1, 2, 3]
27 |  *  (0 2)      | 3
28 |  * 
29 | */ 30 | public final class Index extends PathComponent { 31 | 32 | private final int ordinal; 33 | 34 | /** 35 | * Constructor. 36 | * 37 | * @param ordinal component ordinal. 38 | */ 39 | public Index(final int ordinal, final String[] annotations) { 40 | super(new Annotations(annotations)); 41 | this.ordinal = ordinal; 42 | } 43 | 44 | public Index(final int ordinal) { 45 | this(ordinal, EMPTY_STRING_ARRAY); 46 | } 47 | 48 | public Integer getOrdinal() { 49 | return ordinal; 50 | } 51 | 52 | @Override 53 | public boolean innerMatches(final MatchContext context) { 54 | return ordinal == context.getReaderContainerIndex(); 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/main/java/com/amazon/ionpathextraction/internal/MatchContext.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | * Licensed under the Apache License, Version 2.0 (the "License"). 4 | * You may not use this file except in compliance with the License. 5 | * A copy of the License is located at: 6 | * 7 | * http://aws.amazon.com/apache2.0/ 8 | * 9 | * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific 11 | * language governing permissions and limitations under the License. 12 | */ 13 | 14 | package com.amazon.ionpathextraction.internal; 15 | 16 | import com.amazon.ion.IonReader; 17 | 18 | /** 19 | *

20 | * Context for matching search paths. 21 | *

22 | * 23 | *

24 | * Internal only. Not intended for application use. 25 | *

26 | */ 27 | public class MatchContext { 28 | private final IonReader reader; 29 | private final int pathComponentIndex; 30 | private final int readerContainerIndex; 31 | private final String[] annotations; 32 | private final PathExtractorConfig config; 33 | 34 | /** 35 | * Constructor. 36 | */ 37 | public MatchContext(final IonReader reader, 38 | final int pathComponentIndex, 39 | final int readerContainerIndex, 40 | final PathExtractorConfig config) { 41 | this.reader = reader; 42 | this.pathComponentIndex = pathComponentIndex; 43 | this.readerContainerIndex = readerContainerIndex; 44 | this.annotations = reader.getTypeAnnotations(); 45 | this.config = config; 46 | } 47 | 48 | public IonReader getReader() { 49 | return reader; 50 | } 51 | 52 | public int getPathComponentIndex() { 53 | return pathComponentIndex; 54 | } 55 | 56 | public int getReaderContainerIndex() { 57 | return readerContainerIndex; 58 | } 59 | 60 | public String[] getAnnotations() { 61 | return annotations; 62 | } 63 | 64 | public PathExtractorConfig getConfig() { 65 | return config; 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /src/main/java/com/amazon/ionpathextraction/pathcomponents/Text.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | * Licensed under the Apache License, Version 2.0 (the "License"). 4 | * You may not use this file except in compliance with the License. 5 | * A copy of the License is located at: 6 | * 7 | * http://aws.amazon.com/apache2.0/ 8 | * 9 | * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific 11 | * language governing permissions and limitations under the License. 12 | */ 13 | 14 | package com.amazon.ionpathextraction.pathcomponents; 15 | 16 | import static com.amazon.ionpathextraction.internal.Preconditions.checkArgument; 17 | 18 | import com.amazon.ion.IonReader; 19 | import com.amazon.ionpathextraction.internal.Annotations; 20 | import com.amazon.ionpathextraction.internal.MatchContext; 21 | 22 | /** 23 | * Text path component matches struct field names, example. 24 | *
25 |  * data: {foo: [1,2,3], bar: { baz: [1] }}
26 |  *
27 |  * search path | callback invoked with reader at
28 |  * ------------|--------------------
29 |  *  (foo)      | [1, 2, 3]
30 |  *  (bar baz)  | [1]
31 |  * 
32 | */ 33 | public final class Text extends PathComponent { 34 | 35 | private final String fieldName; 36 | 37 | /** 38 | * Constructor. 39 | * 40 | * @param fieldName component field name. 41 | */ 42 | public Text(final String fieldName, final String[] annotations) { 43 | super(new Annotations(annotations)); 44 | checkArgument(fieldName != null, "fieldName cannot be null"); 45 | 46 | this.fieldName = fieldName; 47 | } 48 | 49 | public Text(final String fieldName) { 50 | this(fieldName, EMPTY_STRING_ARRAY); 51 | } 52 | 53 | public String getFieldName() { 54 | return fieldName; 55 | } 56 | 57 | @Override 58 | public boolean innerMatches(final MatchContext context) { 59 | final IonReader reader = context.getReader(); 60 | if (!reader.isInStruct()) { 61 | return false; 62 | } 63 | 64 | return context.getConfig().isMatchFieldsCaseInsensitive() 65 | ? fieldName.equalsIgnoreCase(reader.getFieldName()) 66 | : fieldName.equals(reader.getFieldName()); 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /src/main/java/com/amazon/ionpathextraction/pathcomponents/PathComponent.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | * Licensed under the Apache License, Version 2.0 (the "License"). 4 | * You may not use this file except in compliance with the License. 5 | * A copy of the License is located at: 6 | * 7 | * http://aws.amazon.com/apache2.0/ 8 | * 9 | * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific 11 | * language governing permissions and limitations under the License. 12 | */ 13 | 14 | package com.amazon.ionpathextraction.pathcomponents; 15 | 16 | import static com.amazon.ionpathextraction.internal.Preconditions.checkArgument; 17 | 18 | import com.amazon.ionpathextraction.internal.Annotations; 19 | import com.amazon.ionpathextraction.internal.MatchContext; 20 | 21 | /** 22 | * A search path component, for example the path (foo * 1) has three components. 23 | * 24 | *
    25 | *
  1. foo
  2. 26 | *
  3. *
  4. 27 | *
  5. 1
  6. 28 | *
29 | */ 30 | public abstract class PathComponent { 31 | 32 | public static final String[] EMPTY_STRING_ARRAY = new String[0]; 33 | 34 | protected final Annotations annotations; 35 | 36 | PathComponent(final Annotations annotations) { 37 | checkArgument(annotations != null, "annotations cannot be null"); 38 | 39 | this.annotations = annotations; 40 | } 41 | 42 | public Annotations getAnnotations() { 43 | return annotations; 44 | } 45 | 46 | public boolean hasAnnotations() { 47 | return annotations.hasAnnotations(); 48 | } 49 | 50 | /** 51 | * Checks if this component matches the current reader position with the given configuration. 52 | * 53 | * @return true if the component matches the current reader position false otherwise. 54 | */ 55 | public final boolean matches(final MatchContext context) { 56 | return annotations.match(context.getAnnotations(), context.getConfig().isMatchCaseInsensitive()) 57 | && innerMatches(context); 58 | } 59 | 60 | /** 61 | * Called by {@link PathComponent#matches(MatchContext)} after applying the standard matching logic. Subclasses must 62 | * implement their specific matching logic in this method. 63 | */ 64 | protected abstract boolean innerMatches(final MatchContext context); 65 | } 66 | -------------------------------------------------------------------------------- /gradlew.bat: -------------------------------------------------------------------------------- 1 | @if "%DEBUG%" == "" @echo off 2 | @rem ########################################################################## 3 | @rem 4 | @rem Gradle startup script for Windows 5 | @rem 6 | @rem ########################################################################## 7 | 8 | @rem Set local scope for the variables with windows NT shell 9 | if "%OS%"=="Windows_NT" setlocal 10 | 11 | set DIRNAME=%~dp0 12 | if "%DIRNAME%" == "" set DIRNAME=. 13 | set APP_BASE_NAME=%~n0 14 | set APP_HOME=%DIRNAME% 15 | 16 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 17 | set DEFAULT_JVM_OPTS="-Xmx64m" 18 | 19 | @rem Find java.exe 20 | if defined JAVA_HOME goto findJavaFromJavaHome 21 | 22 | set JAVA_EXE=java.exe 23 | %JAVA_EXE% -version >NUL 2>&1 24 | if "%ERRORLEVEL%" == "0" goto init 25 | 26 | echo. 27 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 28 | echo. 29 | echo Please set the JAVA_HOME variable in your environment to match the 30 | echo location of your Java installation. 31 | 32 | goto fail 33 | 34 | :findJavaFromJavaHome 35 | set JAVA_HOME=%JAVA_HOME:"=% 36 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe 37 | 38 | if exist "%JAVA_EXE%" goto init 39 | 40 | echo. 41 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 42 | echo. 43 | echo Please set the JAVA_HOME variable in your environment to match the 44 | echo location of your Java installation. 45 | 46 | goto fail 47 | 48 | :init 49 | @rem Get command-line arguments, handling Windows variants 50 | 51 | if not "%OS%" == "Windows_NT" goto win9xME_args 52 | 53 | :win9xME_args 54 | @rem Slurp the command line arguments. 55 | set CMD_LINE_ARGS= 56 | set _SKIP=2 57 | 58 | :win9xME_args_slurp 59 | if "x%~1" == "x" goto execute 60 | 61 | set CMD_LINE_ARGS=%* 62 | 63 | :execute 64 | @rem Setup the command line 65 | 66 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar 67 | 68 | @rem Execute Gradle 69 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS% 70 | 71 | :end 72 | @rem End local scope for the variables with windows NT shell 73 | if "%ERRORLEVEL%"=="0" goto mainEnd 74 | 75 | :fail 76 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of 77 | rem the _cmd.exe /c_ return code! 78 | if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 79 | exit /b 1 80 | 81 | :mainEnd 82 | if "%OS%"=="Windows_NT" endlocal 83 | 84 | :omega 85 | -------------------------------------------------------------------------------- /src/main/java/com/amazon/ionpathextraction/FsmMatcher.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | * Licensed under the Apache License, Version 2.0 (the "License"). 4 | * You may not use this file except in compliance with the License. 5 | * A copy of the License is located at: 6 | * 7 | * http://aws.amazon.com/apache2.0/ 8 | * 9 | * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific 11 | * language governing permissions and limitations under the License. 12 | */ 13 | 14 | package com.amazon.ionpathextraction; 15 | 16 | import com.amazon.ion.IonReader; 17 | import com.amazon.ion.IonType; 18 | import java.util.function.BiFunction; 19 | import java.util.function.Supplier; 20 | 21 | /** 22 | * Base class for match states in the Finite State Machine matching implementation. 23 | */ 24 | abstract class FsmMatcher { 25 | /** 26 | * Callback for match state. May be null. 27 | */ 28 | BiFunction callback; 29 | 30 | enum Transitionable { 31 | TERMINAL(false, false), 32 | POSSIBLE(false, true), 33 | MISTYPED(true, false); 34 | 35 | final boolean invalid; 36 | final boolean possible; 37 | 38 | Transitionable(final boolean invalid, final boolean possible) { 39 | this.invalid = invalid; 40 | this.possible = possible; 41 | } 42 | } 43 | 44 | /** 45 | * Indicates if there _may_ be transitions to child matchers from the given IonType, 46 | * or if the given IonType is mistyped for the expected transitions. 47 | */ 48 | Transitionable transitionsFrom(final IonType ionType) { 49 | if (IonType.isContainer(ionType)) { 50 | return Transitionable.POSSIBLE; 51 | } 52 | if (ionType == IonType.NULL) { 53 | return Transitionable.TERMINAL; 54 | } 55 | return Transitionable.MISTYPED; 56 | } 57 | 58 | /** 59 | * Return the child matcher for the given reader context. 60 | * Return null if there is no match. 61 | *
62 | * @param position will be -1 for top-level-values, otherwise will be the position ordinal 63 | * of the value in the container, both for sequences and structs. 64 | * @param fieldName will be non-null only for struct values. 65 | */ 66 | abstract FsmMatcher transition(String fieldName, int position, Supplier annotations); 67 | } 68 | -------------------------------------------------------------------------------- /src/main/java/com/amazon/ionpathextraction/internal/Annotations.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | * Licensed under the Apache License, Version 2.0 (the "License"). 4 | * You may not use this file except in compliance with the License. 5 | * A copy of the License is located at: 6 | * 7 | * http://aws.amazon.com/apache2.0/ 8 | * 9 | * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific 11 | * language governing permissions and limitations under the License. 12 | */ 13 | 14 | package com.amazon.ionpathextraction.internal; 15 | 16 | import java.util.Arrays; 17 | import java.util.stream.IntStream; 18 | 19 | /** 20 | *

21 | * Represents the optional annotation that path components or search paths should match on. 22 | *

23 | * 24 | *

25 | * Internal only. Not intended for application use. 26 | *

27 | */ 28 | public final class Annotations { 29 | 30 | public static final Annotations EMPTY = new Annotations(new String[] {}); 31 | 32 | final String[] rawAnnotations; 33 | 34 | /** 35 | * Constructor. 36 | */ 37 | public Annotations(final String[] rawAnnotations) { 38 | this.rawAnnotations = rawAnnotations; 39 | } 40 | 41 | public String[] getAnnotations() { 42 | return rawAnnotations; 43 | } 44 | 45 | public boolean hasAnnotations() { 46 | return rawAnnotations.length > 0; 47 | } 48 | 49 | /** 50 | * returns true if it matches on the annotations provided. 51 | */ 52 | public boolean match(final String[] annotations, final boolean ignoreCase) { 53 | return rawAnnotations.length == 0 54 | || arrayEquals(rawAnnotations, annotations, ignoreCase); 55 | } 56 | 57 | private static boolean arrayEquals(final String[] left, final String[] right, final boolean ignoreCase) { 58 | if (left.length != right.length) { 59 | return false; 60 | } 61 | 62 | return IntStream.range(0, left.length) 63 | .allMatch(i -> ignoreCase ? left[i].equalsIgnoreCase(right[i]) : left[i].equals(right[i])); 64 | } 65 | 66 | @Override 67 | public boolean equals(final Object o) { 68 | if (this == o) { 69 | return true; 70 | } 71 | if (!(o instanceof Annotations)) { 72 | return false; 73 | } 74 | return Arrays.equals(rawAnnotations, ((Annotations) o).rawAnnotations); 75 | } 76 | 77 | @Override 78 | public int hashCode() { 79 | return Arrays.hashCode(rawAnnotations); 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /src/test/kotlin/com/amazon/ionpathextraction/FsmPathExtractorTest.kt: -------------------------------------------------------------------------------- 1 | package com.amazon.ionpathextraction 2 | 3 | import com.amazon.ion.IonReader 4 | import com.amazon.ionpathextraction.exceptions.PathExtractionException 5 | import org.junit.jupiter.api.Assertions.assertEquals 6 | import org.junit.jupiter.api.Test 7 | import org.junit.jupiter.api.assertThrows 8 | import org.junit.jupiter.params.ParameterizedTest 9 | import org.junit.jupiter.params.provider.MethodSource 10 | 11 | class FsmPathExtractorTest : PathExtractorTest() { 12 | override fun PathExtractorBuilder.buildExtractor(): PathExtractor = buildStrict() 13 | 14 | @ParameterizedTest 15 | @MethodSource("testCases") 16 | override fun testSearchPaths(testCase: Companion.TestCase) { 17 | if (testCase.legacyOnly) { 18 | assertThrows { 19 | super.testSearchPaths(testCase) 20 | } 21 | } else { 22 | super.testSearchPaths(testCase) 23 | } 24 | } 25 | 26 | @ParameterizedTest 27 | @MethodSource("testCases") 28 | override fun testSearchPathsMatchCurrentValue(testCase: Companion.TestCase) { 29 | if (testCase.legacyOnly) { 30 | assertThrows { 31 | super.testSearchPaths(testCase) 32 | } 33 | } else { 34 | super.testSearchPaths(testCase) 35 | } 36 | } 37 | 38 | data class TypingTestCase(val searchPath: String, val validity: List, val matchCount: Int) 39 | 40 | @Test 41 | fun testStrictTyping() { 42 | val inputs = listOf("17", "[31]", "(53)", "null", "{ foo: 67 }") 43 | val testCases = listOf( // 17, [31], (53), null, { foo: 67 } 44 | TypingTestCase("()", listOf(true, true, true, true, true), 5), 45 | TypingTestCase("A::()", listOf(true, true, true, true, true), 0), 46 | TypingTestCase("(*)", listOf(false, true, true, true, true), 3), 47 | TypingTestCase("(A::*)", listOf(false, true, true, true, true), 0), 48 | TypingTestCase("(0)", listOf(false, true, true, true, true), 3), 49 | TypingTestCase("(foo)", listOf(false, false, false, true, true), 1)) 50 | 51 | testCases.forEach { testCase -> 52 | var count = 0; 53 | val counter = { _: IonReader -> 54 | count += 1 55 | 0 56 | } 57 | val extractor = PathExtractorBuilder.standard() 58 | .withSearchPath(testCase.searchPath, counter) 59 | .buildStrict(true) 60 | 61 | for (j in inputs.indices) { 62 | val ionReader = ION.newReader(inputs[j]) 63 | if (testCase.validity[j]) { 64 | extractor.match(ionReader) 65 | } else { 66 | assertThrows { 67 | extractor.match(ionReader) 68 | } 69 | } 70 | } 71 | assertEquals(testCase.matchCount, count) 72 | } 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /src/main/java/com/amazon/ionpathextraction/SearchPath.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | * Licensed under the Apache License, Version 2.0 (the "License"). 4 | * You may not use this file except in compliance with the License. 5 | * A copy of the License is located at: 6 | * 7 | * http://aws.amazon.com/apache2.0/ 8 | * 9 | * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific 11 | * language governing permissions and limitations under the License. 12 | */ 13 | 14 | package com.amazon.ionpathextraction; 15 | 16 | import com.amazon.ion.IonReader; 17 | import com.amazon.ionpathextraction.internal.Annotations; 18 | import com.amazon.ionpathextraction.internal.MatchContext; 19 | import com.amazon.ionpathextraction.pathcomponents.PathComponent; 20 | import com.amazon.ionpathextraction.pathcomponents.Wildcard; 21 | import java.util.ArrayList; 22 | import java.util.List; 23 | import java.util.function.BiFunction; 24 | 25 | /** 26 | * A path which is provided to the extractor for matching. 27 | * 28 | * @param type accepted by the callback function 29 | */ 30 | final class SearchPath { 31 | 32 | private final List pathComponents; 33 | private final BiFunction callback; 34 | private final Annotations annotations; 35 | 36 | SearchPath(final List pathComponents, 37 | final BiFunction callback, 38 | final Annotations annotations) { 39 | this.annotations = annotations; 40 | this.pathComponents = pathComponents; 41 | this.callback = callback; 42 | } 43 | 44 | /** 45 | * Number of path components in this search path. 46 | */ 47 | int size() { 48 | return pathComponents.size(); 49 | } 50 | 51 | /** 52 | * Produces a "normalized" path for the SearchPath. 53 | * Basically: the SearchPath has the annotations (or not) for matching top-level-values. 54 | * The "normalized" path treats this as an explicit Wildcard step and adds it to the head 55 | * of the PathComponents. 56 | */ 57 | List getNormalizedPath() { 58 | List normalizedPath = new ArrayList<>(pathComponents.size() + 1); 59 | normalizedPath.add(new Wildcard(annotations)); 60 | normalizedPath.addAll(pathComponents); 61 | return normalizedPath; 62 | } 63 | 64 | /** 65 | * Callback to be invoked when the Search Path is matched. 66 | */ 67 | BiFunction getCallback() { 68 | return callback; 69 | } 70 | 71 | /** 72 | * Checks that this search path matches the stream at a given path context index. 73 | */ 74 | boolean partialMatchAt(final MatchContext context) { 75 | int pathComponentIndex = context.getPathComponentIndex(); 76 | 77 | if (pathComponentIndex == 0) { 78 | return annotations.match(context.getAnnotations(), context.getConfig().isMatchCaseInsensitive()); 79 | } else if (pathComponentIndex <= pathComponents.size()) { 80 | return pathComponents.get(pathComponentIndex - 1).matches(context); 81 | } 82 | 83 | return false; 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /src/main/java/com/amazon/ionpathextraction/internal/Preconditions.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | * Licensed under the Apache License, Version 2.0 (the "License"). 4 | * You may not use this file except in compliance with the License. 5 | * A copy of the License is located at: 6 | * 7 | * http://aws.amazon.com/apache2.0/ 8 | * 9 | * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific 11 | * language governing permissions and limitations under the License. 12 | */ 13 | 14 | package com.amazon.ionpathextraction.internal; 15 | 16 | import com.amazon.ionpathextraction.exceptions.PathExtractionException; 17 | 18 | /** 19 | *

20 | * Precondition check helper. 21 | *

22 | * 23 | *

24 | * Internal only. Not intended for application use. 25 | *

26 | */ 27 | public class Preconditions { 28 | 29 | /** 30 | * Validates argument, fails if condition is not met. 31 | * Prefer {@link #checkArgument(boolean isValid, String messageFormat, Object[] args) } over concatenating 32 | * Strings at call-site. 33 | * 34 | * @param isValid if condition is met. 35 | * @param message error message. 36 | * @throws PathExtractionException if not valid. 37 | */ 38 | public static void checkArgument(final boolean isValid, final String message) { 39 | if (!isValid) { 40 | throw new PathExtractionException(message); 41 | } 42 | } 43 | 44 | /** 45 | * Validates argument, fails if condition is not met. 46 | * This overload only builds the error message if isValid is false. 47 | * 48 | * @param isValid if condition is met. 49 | * @param messageFormat error message _format_. 50 | * @param args arguments to String.format() 51 | * @throws PathExtractionException if not valid. 52 | */ 53 | public static void checkArgument(final boolean isValid, final String messageFormat, final Object... args) { 54 | if (!isValid) { 55 | throw new PathExtractionException(String.format(messageFormat, args)); 56 | } 57 | } 58 | 59 | /** 60 | * Validates a state, fails if condition is not met. 61 | * Prefer {@link #checkState(boolean isValid, String messageFormat, Object[] args) } over concatenating 62 | * Strings at call-site. 63 | * 64 | * @param isValid if condition is met. 65 | * @param message error message. 66 | * @throws PathExtractionException if not valid. 67 | */ 68 | public static void checkState(final boolean isValid, final String message) { 69 | if (!isValid) { 70 | throw new PathExtractionException(message); 71 | } 72 | } 73 | 74 | /** 75 | * Validates a state, fails if condition is not met. 76 | * This overload only builds the error message if isValid is false. 77 | * 78 | * @param isValid if condition is met. 79 | * @param messageFormat error message _format_. 80 | * @param args arguments to String.format() 81 | * @throws PathExtractionException if not valid. 82 | */ 83 | public static void checkState(final boolean isValid, final String messageFormat, final Object... args) { 84 | if (!isValid) { 85 | throw new PathExtractionException(String.format(messageFormat, args)); 86 | } 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /src/main/java/com/amazon/ionpathextraction/PathExtractor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | * Licensed under the Apache License, Version 2.0 (the "License"). 4 | * You may not use this file except in compliance with the License. 5 | * A copy of the License is located at: 6 | * 7 | * http://aws.amazon.com/apache2.0/ 8 | * 9 | * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific 11 | * language governing permissions and limitations under the License. 12 | */ 13 | 14 | package com.amazon.ionpathextraction; 15 | 16 | import com.amazon.ion.IonReader; 17 | 18 | /** 19 | *

20 | * Path extractor takes registered paths and when it finds one during stream processing invokes the respective callback. 21 | * This allows the Ion reader to plan the most efficient traversal over the data without requiring further manual 22 | * interaction from the user. 23 | *

24 | * 25 | *

26 | * For example, there is no reason to step in to containers which could not possibly match one of the search paths. When 27 | * encoded in binary Ion, the resulting skip is a seek forward in the input stream, which is inexpensive relative to the 28 | * cost of parsing (and in the case of a DOM, materializing) the skipped value. 29 | *

30 | * 31 | *

32 | * WARNING:Implementations of this interface are not required to be Thread safe 33 | *

34 | */ 35 | public interface PathExtractor { 36 | 37 | /** 38 | * Iterates over the reader looking for registered search paths, when a match is found invokes the respective 39 | * callback. 40 | * 41 | * @param reader {@link IonReader} to process. 42 | */ 43 | void match(final IonReader reader); 44 | 45 | /** 46 | * Iterates over the reader looking for registered search paths, when a match is found invokes the respective 47 | * callback. 48 | * 49 | * @param reader {@link IonReader} to process. 50 | * @param context context passed in to callback functions. 51 | */ 52 | void match(final IonReader reader, final T context); 53 | 54 | /** 55 | * Behaves identically to {@link #match(IonReader)}, except that only the value at which the given reader is 56 | * currently positioned is evaluated against the registered search paths. Before this method is called, the caller 57 | * must position the reader on the value to be searched using {@link IonReader#next()}. After this method returns, 58 | * it is the caller's responsibility to call {@link IonReader#next()} to position the reader on the next value at 59 | * the same depth. 60 | * @param reader {@link IonReader}, already positioned on a value, to process. 61 | */ 62 | void matchCurrentValue(final IonReader reader); 63 | 64 | /** 65 | * Behaves identically to {@link #match(IonReader, Object)}, except that only the value at which the given reader is 66 | * currently positioned is evaluated against the registered search paths. Before this method is called, the caller 67 | * must position the given reader on the value to be searched using {@link IonReader#next()}. After this method 68 | * returns, it is the caller's responsibility to call {@link IonReader#next()} to position the reader on the next 69 | * value at the same depth. 70 | * @param reader {@link IonReader}, already positioned on a value, to process. 71 | * @param context context passed in to callback functions. 72 | */ 73 | void matchCurrentValue(final IonReader reader, final T context); 74 | } 75 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional 4 | documentation, we greatly value feedback and contributions from our community. 5 | 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 7 | information to effectively respond to your bug report or contribution. 8 | 9 | 10 | ## Reporting Bugs/Feature Requests 11 | 12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features. 13 | 14 | When filing an issue, please check [existing open](https://github.com/amzn/ion-java-path-extraction/issues), or [recently closed](https://github.com/amzn/ion-java-path-extraction/issues?utf8=%E2%9C%93&q=is%3Aissue%20is%3Aclosed%20), issues to make sure somebody else hasn't already 15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: 16 | 17 | * A reproducible test case or series of steps 18 | * The version of our code being used 19 | * Any modifications you've made relevant to the bug 20 | * Anything unusual about your environment or deployment 21 | 22 | 23 | ## Contributing via Pull Requests 24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 25 | 26 | 1. You are working against the latest source on the *master* branch. 27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. 29 | 30 | To send us a pull request, please: 31 | 32 | 1. Fork the repository. 33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 34 | 3. Ensure local tests pass. 35 | 4. Commit to your fork using clear commit messages. 36 | 5. Send us a pull request, answering any default questions in the pull request interface. 37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. 38 | 39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). 41 | 42 | 43 | ## Finding contributions to work on 44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any ['help wanted'](https://github.com/amzn/ion-java-path-extraction/labels/help%20wanted) issues is a great place to start. 45 | 46 | 47 | ## Code of Conduct 48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 50 | opensource-codeofconduct@amazon.com with any additional questions or comments. 51 | 52 | 53 | ## Security issue notifications 54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. 55 | 56 | 57 | ## Licensing 58 | 59 | See the [LICENSE](https://github.com/amzn/ion-java-path-extraction/blob/master/LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. 60 | 61 | We may ask you to sign a [Contributor License Agreement (CLA)](http://en.wikipedia.org/wiki/Contributor_License_Agreement) for larger changes. 62 | -------------------------------------------------------------------------------- /src/test/kotlin/com/amazon/ionpathextraction/ExampleTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | * Licensed under the Apache License, Version 2.0 (the "License"). 4 | * You may not use this file except in compliance with the License. 5 | * A copy of the License is located at: 6 | * 7 | * http://aws.amazon.com/apache2.0/ 8 | * 9 | * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific 11 | * language governing permissions and limitations under the License. 12 | */ 13 | 14 | package com.amazon.ionpathextraction; 15 | 16 | import static org.junit.Assert.assertEquals; 17 | 18 | import com.amazon.ion.IonReader; 19 | import com.amazon.ion.system.IonReaderBuilder; 20 | import com.amazon.ionpathextraction.pathcomponents.Index; 21 | import com.amazon.ionpathextraction.pathcomponents.PathComponent; 22 | import com.amazon.ionpathextraction.pathcomponents.Text; 23 | import com.amazon.ionpathextraction.pathcomponents.Wildcard; 24 | import java.util.ArrayList; 25 | import java.util.List; 26 | import java.util.concurrent.atomic.AtomicLong; 27 | import java.util.function.BiFunction; 28 | import java.util.function.Function; 29 | import org.junit.Test; 30 | 31 | /** 32 | * Test the example code in README.md 33 | */ 34 | public class ExampleTest { 35 | 36 | @Test 37 | public void example() { 38 | final AtomicLong counter = new AtomicLong(0); 39 | 40 | final Function callback = (reader) -> { 41 | counter.addAndGet(reader.intValue()); 42 | 43 | return 0; 44 | }; 45 | 46 | final PathExtractor pathExtractor = PathExtractorBuilder.standard() 47 | .withSearchPath("(foo)", callback) 48 | .withSearchPath("(bar)", callback) 49 | .withSearchPath("(A::baz 1)", callback) 50 | .build(); 51 | 52 | final IonReader ionReader = IonReaderBuilder.standard().build("{foo: 1}" 53 | + "{bar: 2}" 54 | + "{baz: A::[10,20,30,40]}" 55 | + "{baz: [100,200,300,400]}" 56 | + "{other: 99}" 57 | ); 58 | 59 | pathExtractor.match(ionReader); 60 | 61 | assertEquals(23, counter.get()); 62 | } 63 | 64 | @Test 65 | public void topLevelExample() { 66 | final AtomicLong counterA = new AtomicLong(0); 67 | final AtomicLong counterB = new AtomicLong(0); 68 | 69 | final PathExtractor pathExtractor = PathExtractorBuilder.standard() 70 | .withSearchPath("()", (reader) -> { 71 | counterA.addAndGet(reader.intValue()); 72 | 73 | return 0; 74 | }) 75 | .withSearchPath("A::()", (reader) -> { 76 | counterB.addAndGet(reader.intValue()); 77 | 78 | return 0; 79 | }) 80 | .build(); 81 | 82 | final IonReader ionReader = IonReaderBuilder.standard().build("1 1 1 A::10 1"); 83 | 84 | pathExtractor.match(ionReader); 85 | 86 | assertEquals(14, counterA.get()); 87 | assertEquals(10, counterB.get()); 88 | } 89 | 90 | @Test 91 | public void exampleWithContext() { 92 | 93 | final BiFunction, Integer> callback = (reader, list) -> { 94 | list.add(reader.intValue()); 95 | 96 | return 0; 97 | }; 98 | 99 | final PathExtractor> pathExtractor = PathExtractorBuilder.>standard() 100 | .withSearchPath("(foo)", callback) 101 | .withSearchPath("(bar)", callback) 102 | .withSearchPath("(A::baz 1)", callback) 103 | .build(); 104 | 105 | final IonReader ionReader = IonReaderBuilder.standard().build("{foo: 1}" 106 | + "{bar: 2}" 107 | + "{baz: A::[10,20,30,40]}" 108 | + "{baz: [100,200,300,400]}" 109 | + "{other: 99}" 110 | ); 111 | 112 | final List list = new ArrayList<>(); 113 | pathExtractor.match(ionReader, list); 114 | 115 | assertEquals("[1, 2, 20]", list.toString()); 116 | } 117 | 118 | @Test 119 | public void programmaticExample() { 120 | 121 | final BiFunction callback = (ionReader, stringBuilder) -> { 122 | stringBuilder.append(ionReader.stringValue()); 123 | return 0; 124 | }; 125 | 126 | final List steps = new ArrayList<>(); 127 | steps.add(new Wildcard()); 128 | steps.add(new Text("foo")); 129 | steps.add(new Index(0)); 130 | final PathExtractor extractor = PathExtractorBuilder.standard() 131 | .withSearchPath(steps, callback) 132 | .build(); 133 | 134 | final IonReader ionReader = IonReaderBuilder.standard().build("[{foo: [bar]}]"); 135 | final StringBuilder stringBuilder = new StringBuilder(); 136 | extractor.match(ionReader, stringBuilder); 137 | 138 | assertEquals("bar", stringBuilder.toString()); 139 | } 140 | } 141 | -------------------------------------------------------------------------------- /src/main/java/com/amazon/ionpathextraction/SearchPathParser.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | * Licensed under the Apache License, Version 2.0 (the "License"). 4 | * You may not use this file except in compliance with the License. 5 | * A copy of the License is located at: 6 | * 7 | * http://aws.amazon.com/apache2.0/ 8 | * 9 | * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific 11 | * language governing permissions and limitations under the License. 12 | */ 13 | 14 | package com.amazon.ionpathextraction; 15 | 16 | import static com.amazon.ionpathextraction.internal.Preconditions.checkArgument; 17 | 18 | import com.amazon.ion.IonReader; 19 | import com.amazon.ion.IonType; 20 | import com.amazon.ion.IonWriter; 21 | import com.amazon.ion.system.IonReaderBuilder; 22 | import com.amazon.ion.system.IonTextWriterBuilder; 23 | import com.amazon.ionpathextraction.exceptions.PathExtractionException; 24 | import com.amazon.ionpathextraction.internal.Annotations; 25 | import com.amazon.ionpathextraction.pathcomponents.Index; 26 | import com.amazon.ionpathextraction.pathcomponents.PathComponent; 27 | import com.amazon.ionpathextraction.pathcomponents.Text; 28 | import com.amazon.ionpathextraction.pathcomponents.Wildcard; 29 | import java.io.IOException; 30 | import java.util.ArrayList; 31 | import java.util.List; 32 | import java.util.function.BiFunction; 33 | 34 | /** 35 | * Parses a search path ion expression into a {@link SearchPath}s. 36 | */ 37 | final class SearchPathParser { 38 | 39 | private static final IonReaderBuilder READER_BUILDER = IonReaderBuilder.standard(); 40 | private static final IonTextWriterBuilder WRITER_BUILDER = IonTextWriterBuilder.standard(); 41 | 42 | private static final String WILDCARD_ESCAPE_ANNOTATION = "$ion_extractor_field"; 43 | 44 | // only has static methods, should not be invoked 45 | private SearchPathParser() { 46 | } 47 | 48 | static SearchPath parse(final String ionPathExpression, final BiFunction callback) { 49 | final List pathComponents; 50 | 51 | try (final IonReader reader = newIonReader(ionPathExpression)) { 52 | checkArgument(reader.next() != null, "ionPathExpression cannot be empty"); 53 | checkArgument(reader.getType() == IonType.SEXP || reader.getType() == IonType.LIST, 54 | "ionPathExpression must be a s-expression or list"); 55 | 56 | final String[] typeAnnotations = reader.getTypeAnnotations(); 57 | 58 | reader.stepIn(); 59 | pathComponents = parsePathComponents(reader); 60 | reader.stepOut(); 61 | 62 | return new SearchPath<>(pathComponents, callback, new Annotations(typeAnnotations)); 63 | } catch (IOException e) { 64 | throw new PathExtractionException(e); 65 | } 66 | } 67 | 68 | private static List parsePathComponents(final IonReader reader) { 69 | final List pathComponents = new ArrayList<>(); 70 | 71 | while (reader.next() != null) { 72 | pathComponents.add(readComponent(reader)); 73 | } 74 | 75 | return pathComponents; 76 | } 77 | 78 | private static PathComponent readComponent(final IonReader reader) { 79 | final PathComponent pathComponent; 80 | final String[] annotations = extractAnnotations(reader); 81 | 82 | switch (reader.getType()) { 83 | case INT: 84 | pathComponent = new Index(reader.intValue(), annotations); 85 | break; 86 | 87 | case STRING: 88 | case SYMBOL: 89 | if (isWildcard(reader)) { 90 | pathComponent = new Wildcard(annotations); 91 | } else { 92 | pathComponent = new Text(reader.stringValue(), annotations); 93 | } 94 | break; 95 | 96 | default: 97 | throw new PathExtractionException("Invalid path component type: " + readIonText(reader)); 98 | } 99 | 100 | return pathComponent; 101 | } 102 | 103 | private static String[] extractAnnotations(final IonReader reader) { 104 | String[] typeAnnotations = reader.getTypeAnnotations(); 105 | 106 | final String[] annotations; 107 | final int offset; 108 | if (typeAnnotations.length > 0 && WILDCARD_ESCAPE_ANNOTATION.equals(typeAnnotations[0])) { 109 | annotations = new String[typeAnnotations.length - 1]; 110 | offset = 1; 111 | } else { 112 | annotations = new String[typeAnnotations.length]; 113 | offset = 0; 114 | } 115 | 116 | System.arraycopy(typeAnnotations, offset, annotations, 0, annotations.length); 117 | 118 | return annotations; 119 | } 120 | 121 | private static boolean isWildcard(final IonReader reader) { 122 | if (reader.stringValue().equals(Wildcard.TEXT)) { 123 | final String[] annotations = reader.getTypeAnnotations(); 124 | return annotations.length == 0 || !WILDCARD_ESCAPE_ANNOTATION.equals(annotations[0]); 125 | } 126 | return false; 127 | } 128 | 129 | private static String readIonText(final IonReader reader) { 130 | StringBuilder out = new StringBuilder(); 131 | try (IonWriter writer = newIonTextWriter(out)) { 132 | writer.writeValue(reader); 133 | } catch (IOException e) { 134 | throw new PathExtractionException(e); 135 | } 136 | return out.toString(); 137 | } 138 | 139 | private static IonReader newIonReader(final String ionText) { 140 | return READER_BUILDER.build(ionText); 141 | } 142 | 143 | private static IonWriter newIonTextWriter(final StringBuilder out) { 144 | return WRITER_BUILDER.build(out); 145 | } 146 | } 147 | -------------------------------------------------------------------------------- /gradlew: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | 3 | ############################################################################## 4 | ## 5 | ## Gradle start up script for UN*X 6 | ## 7 | ############################################################################## 8 | 9 | # Attempt to set APP_HOME 10 | # Resolve links: $0 may be a link 11 | PRG="$0" 12 | # Need this for relative symlinks. 13 | while [ -h "$PRG" ] ; do 14 | ls=`ls -ld "$PRG"` 15 | link=`expr "$ls" : '.*-> \(.*\)$'` 16 | if expr "$link" : '/.*' > /dev/null; then 17 | PRG="$link" 18 | else 19 | PRG=`dirname "$PRG"`"/$link" 20 | fi 21 | done 22 | SAVED="`pwd`" 23 | cd "`dirname \"$PRG\"`/" >/dev/null 24 | APP_HOME="`pwd -P`" 25 | cd "$SAVED" >/dev/null 26 | 27 | APP_NAME="Gradle" 28 | APP_BASE_NAME=`basename "$0"` 29 | 30 | # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 31 | DEFAULT_JVM_OPTS='"-Xmx64m"' 32 | 33 | # Use the maximum available, or set MAX_FD != -1 to use that value. 34 | MAX_FD="maximum" 35 | 36 | warn () { 37 | echo "$*" 38 | } 39 | 40 | die () { 41 | echo 42 | echo "$*" 43 | echo 44 | exit 1 45 | } 46 | 47 | # OS specific support (must be 'true' or 'false'). 48 | cygwin=false 49 | msys=false 50 | darwin=false 51 | nonstop=false 52 | case "`uname`" in 53 | CYGWIN* ) 54 | cygwin=true 55 | ;; 56 | Darwin* ) 57 | darwin=true 58 | ;; 59 | MINGW* ) 60 | msys=true 61 | ;; 62 | NONSTOP* ) 63 | nonstop=true 64 | ;; 65 | esac 66 | 67 | CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar 68 | 69 | # Determine the Java command to use to start the JVM. 70 | if [ -n "$JAVA_HOME" ] ; then 71 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then 72 | # IBM's JDK on AIX uses strange locations for the executables 73 | JAVACMD="$JAVA_HOME/jre/sh/java" 74 | else 75 | JAVACMD="$JAVA_HOME/bin/java" 76 | fi 77 | if [ ! -x "$JAVACMD" ] ; then 78 | die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME 79 | 80 | Please set the JAVA_HOME variable in your environment to match the 81 | location of your Java installation." 82 | fi 83 | else 84 | JAVACMD="java" 85 | which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 86 | 87 | Please set the JAVA_HOME variable in your environment to match the 88 | location of your Java installation." 89 | fi 90 | 91 | # Increase the maximum file descriptors if we can. 92 | if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then 93 | MAX_FD_LIMIT=`ulimit -H -n` 94 | if [ $? -eq 0 ] ; then 95 | if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then 96 | MAX_FD="$MAX_FD_LIMIT" 97 | fi 98 | ulimit -n $MAX_FD 99 | if [ $? -ne 0 ] ; then 100 | warn "Could not set maximum file descriptor limit: $MAX_FD" 101 | fi 102 | else 103 | warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT" 104 | fi 105 | fi 106 | 107 | # For Darwin, add options to specify how the application appears in the dock 108 | if $darwin; then 109 | GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" 110 | fi 111 | 112 | # For Cygwin, switch paths to Windows format before running java 113 | if $cygwin ; then 114 | APP_HOME=`cygpath --path --mixed "$APP_HOME"` 115 | CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` 116 | JAVACMD=`cygpath --unix "$JAVACMD"` 117 | 118 | # We build the pattern for arguments to be converted via cygpath 119 | ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null` 120 | SEP="" 121 | for dir in $ROOTDIRSRAW ; do 122 | ROOTDIRS="$ROOTDIRS$SEP$dir" 123 | SEP="|" 124 | done 125 | OURCYGPATTERN="(^($ROOTDIRS))" 126 | # Add a user-defined pattern to the cygpath arguments 127 | if [ "$GRADLE_CYGPATTERN" != "" ] ; then 128 | OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)" 129 | fi 130 | # Now convert the arguments - kludge to limit ourselves to /bin/sh 131 | i=0 132 | for arg in "$@" ; do 133 | CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -` 134 | CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option 135 | 136 | if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition 137 | eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"` 138 | else 139 | eval `echo args$i`="\"$arg\"" 140 | fi 141 | i=$((i+1)) 142 | done 143 | case $i in 144 | (0) set -- ;; 145 | (1) set -- "$args0" ;; 146 | (2) set -- "$args0" "$args1" ;; 147 | (3) set -- "$args0" "$args1" "$args2" ;; 148 | (4) set -- "$args0" "$args1" "$args2" "$args3" ;; 149 | (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; 150 | (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; 151 | (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; 152 | (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; 153 | (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; 154 | esac 155 | fi 156 | 157 | # Escape application args 158 | save () { 159 | for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done 160 | echo " " 161 | } 162 | APP_ARGS=$(save "$@") 163 | 164 | # Collect all arguments for the java command, following the shell quoting and substitution rules 165 | eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS" 166 | 167 | # by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong 168 | if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then 169 | cd "$(dirname "$0")" 170 | fi 171 | 172 | exec "$JAVACMD" "$@" 173 | -------------------------------------------------------------------------------- /src/main/java/com/amazon/ionpathextraction/FsmPathExtractor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | * Licensed under the Apache License, Version 2.0 (the "License"). 4 | * You may not use this file except in compliance with the License. 5 | * A copy of the License is located at: 6 | * 7 | * http://aws.amazon.com/apache2.0/ 8 | * 9 | * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific 11 | * language governing permissions and limitations under the License. 12 | */ 13 | 14 | package com.amazon.ionpathextraction; 15 | 16 | import static com.amazon.ionpathextraction.internal.Preconditions.checkArgument; 17 | import static com.amazon.ionpathextraction.internal.Preconditions.checkState; 18 | 19 | import com.amazon.ion.IonReader; 20 | import com.amazon.ion.IonType; 21 | import com.amazon.ionpathextraction.exceptions.PathExtractionException; 22 | import com.amazon.ionpathextraction.internal.PathExtractorConfig; 23 | import java.util.List; 24 | import java.util.function.BiFunction; 25 | 26 | /** 27 | * A PathExtractor modeled as a Finite State Machine. 28 | *
29 | * Compared to the PathExtractorImpl, this supports a narrower set of 30 | * SearchPaths and their combinations, but is more performant, particularly 31 | * when a large number of field names are searched for. 32 | *
33 | * A more comprehensive explanation of the strictness from a user PoV can be 34 | * found on the PathExtractorBuilder.buildStrict() API method. Notes on the 35 | * 'why' can be found in the FsmMatcherBuilder. 36 | */ 37 | class FsmPathExtractor implements PathExtractor { 38 | private final FsmMatcher rootMatcher; 39 | private final boolean strictTyping; 40 | private final PathExtractorConfig config; 41 | 42 | private FsmPathExtractor( 43 | final FsmMatcher rootMatcher, 44 | final boolean strictTyping, 45 | final PathExtractorConfig config) { 46 | this.rootMatcher = rootMatcher; 47 | this.strictTyping = strictTyping; 48 | this.config = config; 49 | } 50 | 51 | static FsmPathExtractor create( 52 | final List> searchPaths, 53 | final boolean strictTyping, 54 | final PathExtractorConfig config) { 55 | FsmMatcherBuilder builder = new FsmMatcherBuilder<>( 56 | config.isMatchCaseInsensitive(), 57 | config.isMatchFieldsCaseInsensitive()); 58 | for (SearchPath path : searchPaths) { 59 | builder.accept(path); 60 | } 61 | 62 | return new FsmPathExtractor<>(builder.build(), strictTyping, config); 63 | } 64 | 65 | @Override 66 | public void match(final IonReader reader) { 67 | match(reader, null); 68 | } 69 | 70 | @Override 71 | public void match(final IonReader reader, final T context) { 72 | checkArgument(reader.getDepth() == 0 || config.isMatchRelativePaths(), 73 | "reader must be at depth zero, it was at: %s", reader.getDepth()); 74 | 75 | while (reader.next() != null) { 76 | matchCurrentValue(reader, context); 77 | } 78 | } 79 | 80 | @Override 81 | public void matchCurrentValue(final IonReader reader) { 82 | matchCurrentValue(reader, null); 83 | } 84 | 85 | @Override 86 | public void matchCurrentValue(final IonReader reader, final T context) { 87 | checkArgument(reader.getDepth() == 0 || config.isMatchRelativePaths(), 88 | "reader must be at depth zero, it was at: %s", reader.getDepth()); 89 | checkArgument(reader.getType() != null, 90 | "reader must be positioned at a value; call IonReader.next() first."); 91 | 92 | matchRecursive(reader, rootMatcher, context, -1, reader.getDepth()); 93 | } 94 | 95 | private int matchRecursive( 96 | final IonReader reader, 97 | final FsmMatcher matcher, 98 | final T context, 99 | final int position, 100 | final int initialDepth) { 101 | FsmMatcher child = matcher.transition(reader.getFieldName(), position, reader::getTypeAnnotations); 102 | if (child == null) { 103 | return 0; 104 | } 105 | 106 | if (child.callback != null) { 107 | int stepOut = invokeCallback(reader, child.callback, initialDepth, context); 108 | if (stepOut > 0) { 109 | return stepOut; 110 | } 111 | } 112 | 113 | FsmMatcher.Transitionable transitionable = child.transitionsFrom(reader.getType()); 114 | if (transitionable.possible) { 115 | reader.stepIn(); 116 | int childPos = 0; 117 | while (reader.next() != null) { 118 | int stepOut = matchRecursive(reader, child, context, childPos++, initialDepth); 119 | if (stepOut > 0) { 120 | reader.stepOut(); 121 | return stepOut - 1; 122 | } 123 | } 124 | reader.stepOut(); 125 | return 0; 126 | } 127 | 128 | if (transitionable.invalid && strictTyping) { 129 | throw new PathExtractionException( 130 | String.format("IonType %s is not valid for transitions from %s", reader.getType(), child)); 131 | } 132 | 133 | return 0; 134 | } 135 | 136 | private int invokeCallback( 137 | final IonReader reader, 138 | final BiFunction callback, 139 | final int initialReaderDepth, 140 | final T context) { 141 | int previousReaderDepth = reader.getDepth(); 142 | 143 | int stepOutTimes = callback.apply(reader, context); 144 | int newReaderDepth = reader.getDepth(); 145 | 146 | checkState(previousReaderDepth == newReaderDepth, 147 | "Reader must be at same depth when returning from callbacks. initial: %s, new: %s", 148 | previousReaderDepth, 149 | newReaderDepth); 150 | 151 | // we don't allow users to step out the initial reader depth 152 | int readerRelativeDepth = reader.getDepth() - initialReaderDepth; 153 | 154 | checkState(stepOutTimes <= readerRelativeDepth, 155 | STEP_OUT_TOO_FAR_MSG, 156 | stepOutTimes, 157 | readerRelativeDepth); 158 | 159 | return stepOutTimes; 160 | } 161 | 162 | private static final String STEP_OUT_TOO_FAR_MSG = 163 | "Callback return cannot be greater than the reader current relative depth. " 164 | + "return: %s, relative reader depth: %s"; 165 | } 166 | -------------------------------------------------------------------------------- /src/main/java/com/amazon/ionpathextraction/PathExtractorImpl.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | * Licensed under the Apache License, Version 2.0 (the "License"). 4 | * You may not use this file except in compliance with the License. 5 | * A copy of the License is located at: 6 | * 7 | * http://aws.amazon.com/apache2.0/ 8 | * 9 | * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific 11 | * language governing permissions and limitations under the License. 12 | */ 13 | 14 | package com.amazon.ionpathextraction; 15 | 16 | import static com.amazon.ionpathextraction.internal.Preconditions.checkArgument; 17 | import static com.amazon.ionpathextraction.internal.Preconditions.checkState; 18 | 19 | import com.amazon.ion.IonReader; 20 | import com.amazon.ion.IonType; 21 | import com.amazon.ionpathextraction.internal.MatchContext; 22 | import com.amazon.ionpathextraction.internal.PathExtractorConfig; 23 | import java.util.ArrayDeque; 24 | import java.util.ArrayList; 25 | import java.util.Deque; 26 | import java.util.List; 27 | 28 | /** 29 | *

30 | * Default implementation of {@link PathExtractor}. 31 | *

32 | *

33 | * This implementation is thread safe. 34 | *

35 | */ 36 | final class PathExtractorImpl implements PathExtractor { 37 | 38 | private final PathExtractorConfig config; 39 | private final List> searchPaths; 40 | 41 | private final int maxSearchPathDepth; 42 | 43 | /** 44 | * Constructor, should only be invoked by {@link PathExtractorBuilder}. 45 | */ 46 | PathExtractorImpl(final List> searchPaths, 47 | final PathExtractorConfig config) { 48 | 49 | this.searchPaths = searchPaths; 50 | this.config = config; 51 | 52 | maxSearchPathDepth = searchPaths.stream() 53 | .mapToInt(SearchPath::size) 54 | .max() 55 | .orElse(0); 56 | } 57 | 58 | @Override 59 | public void match(final IonReader reader) { 60 | match(reader, null); 61 | } 62 | 63 | @Override 64 | public void match(final IonReader reader, final T context) { 65 | checkArgument(reader.getDepth() == 0 || config.isMatchRelativePaths(), 66 | "reader must be at depth zero, it was at: " + reader.getDepth()); 67 | 68 | // short circuit when there are zero SearchPaths 69 | if (searchPaths.isEmpty()) { 70 | return; 71 | } 72 | 73 | final Tracker tracker = new Tracker<>(maxSearchPathDepth, searchPaths, reader.getDepth()); 74 | 75 | matchAllValuesRecursive(reader, tracker, context); 76 | } 77 | 78 | @Override 79 | public void matchCurrentValue(final IonReader reader) { 80 | matchCurrentValue(reader, null); 81 | } 82 | 83 | @Override 84 | public void matchCurrentValue(final IonReader reader, final T context) { 85 | checkArgument(reader.getDepth() == 0 || config.isMatchRelativePaths(), 86 | "reader must be at depth zero, it was at: " + reader.getDepth()); 87 | checkArgument(reader.getType() != null, 88 | "reader must be positioned at a value; call IonReader.next() first."); 89 | 90 | // short circuit when there are zero SearchPaths 91 | if (searchPaths.isEmpty()) { 92 | return; 93 | } 94 | 95 | final Tracker tracker = new Tracker<>(maxSearchPathDepth, searchPaths, reader.getDepth()); 96 | matchCurrentValueRecursive(reader, tracker, context, 0, tracker.getCurrentDepth()); 97 | } 98 | 99 | private int matchCurrentValueRecursive( 100 | final IonReader reader, 101 | final Tracker tracker, 102 | final T context, 103 | final int readerContainerIndex, 104 | final int currentDepth 105 | ) { 106 | // will continue to next depth 107 | final List> partialMatches = new ArrayList<>(); 108 | 109 | final MatchContext matchContext = new MatchContext(reader, currentDepth, readerContainerIndex, config); 110 | for (SearchPath sp : tracker.activePaths()) { 111 | // a terminal search path is at the last path component meaning that if this search path partially 112 | // matches it will be a full match and the callback must be invoked 113 | boolean searchPathIsTerminal = isTerminal(tracker.getCurrentDepth(), sp); 114 | boolean partialMatch = sp.partialMatchAt(matchContext); 115 | 116 | if (partialMatch) { 117 | if (searchPathIsTerminal) { 118 | int stepOutTimes = invokeCallback(reader, sp, tracker.getInitialReaderDepth(), context); 119 | if (stepOutTimes > 0) { 120 | return stepOutTimes; 121 | } 122 | } else { 123 | partialMatches.add(sp); 124 | } 125 | } 126 | } 127 | 128 | if (IonType.isContainer(reader.getType()) && !partialMatches.isEmpty()) { 129 | tracker.push(partialMatches); 130 | reader.stepIn(); 131 | int stepOutTimes = matchAllValuesRecursive(reader, tracker, context); 132 | reader.stepOut(); 133 | tracker.pop(); 134 | 135 | if (stepOutTimes > 0) { 136 | return stepOutTimes; 137 | } 138 | } 139 | return 0; 140 | } 141 | 142 | private int matchAllValuesRecursive(final IonReader reader, final Tracker tracker, final T context) { 143 | final int currentDepth = tracker.getCurrentDepth(); 144 | int readerContainerIndex = 0; 145 | 146 | while (reader.next() != null) { 147 | int stepOutTimes = matchCurrentValueRecursive(reader, tracker, context, readerContainerIndex, currentDepth); 148 | if (stepOutTimes > 0) { 149 | return stepOutTimes - 1; 150 | } 151 | readerContainerIndex += 1; 152 | } 153 | 154 | return 0; 155 | } 156 | 157 | private int invokeCallback(final IonReader reader, 158 | final SearchPath searchPath, 159 | final int initialReaderDepth, 160 | final T context) { 161 | int previousReaderDepth = reader.getDepth(); 162 | 163 | int stepOutTimes = searchPath.getCallback().apply(reader, context); 164 | int newReaderDepth = reader.getDepth(); 165 | 166 | checkState(previousReaderDepth == newReaderDepth, 167 | "Reader must be at same depth when returning from callbacks. initial: " 168 | + previousReaderDepth 169 | + ", new: " 170 | + newReaderDepth); 171 | 172 | // we don't allow users to step out the initial reader depth 173 | int readerRelativeDepth = reader.getDepth() - initialReaderDepth; 174 | 175 | checkState(stepOutTimes <= readerRelativeDepth, 176 | "Callback return cannot be greater than the reader current relative depth." 177 | + " return: " 178 | + stepOutTimes 179 | + ", relative reader depth: " 180 | + readerRelativeDepth); 181 | 182 | return stepOutTimes; 183 | } 184 | 185 | private boolean isTerminal(final int pathComponentIndex, final SearchPath searchPath) { 186 | return pathComponentIndex == searchPath.size(); 187 | } 188 | 189 | private static class Tracker { 190 | 191 | private final Deque>> stack; 192 | private final int initialReaderDepth; 193 | 194 | Tracker(final int size, final List> searchPaths, final int initialReaderDepth) { 195 | stack = new ArrayDeque<>(size); 196 | stack.push(searchPaths); 197 | this.initialReaderDepth = initialReaderDepth; 198 | } 199 | 200 | List> activePaths() { 201 | return stack.peek(); 202 | } 203 | 204 | int getCurrentDepth() { 205 | return stack.size() - 1; 206 | } 207 | 208 | void push(final List> partialMatches) { 209 | stack.push(partialMatches); 210 | } 211 | 212 | void pop() { 213 | stack.pop(); 214 | } 215 | 216 | int getInitialReaderDepth() { 217 | return initialReaderDepth; 218 | } 219 | } 220 | } 221 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Ion Java Path Extraction 2 | 3 | [![Build Status](https://travis-ci.org/amzn/ion-java-path-extraction.svg?branch=master)](https://travis-ci.org/amzn/ion-java-path-extraction) 4 | [![Maven Central](https://maven-badges.herokuapp.com/maven-central/com.amazon.ion/ion-java-path-extraction/badge.svg)](https://maven-badges.herokuapp.com/maven-central/com.amazon.ion/ion-java-path-extraction) 5 | [![Javadocs](https://www.javadoc.io/badge/com.amazon.ion/ion-java-path-extraction.svg)](https://www.javadoc.io/doc/com.amazon.ion/ion-java-path-extraction) 6 | 7 | Ion Path Extraction API aims to combine the convenience of a DOM API with the speed of a streaming API. 8 | 9 | The traditional streaming and DOM APIs force the user to choose between speed and convenience, respectively. 10 | Path extraction APIs aim to combine the two by allowing the user to register paths into the data using just a 11 | few lines of code and receive callbacks during stream processing when any of those paths is matched. This allows 12 | the Ion reader to plan the most efficient traversal over the data without requiring further manual interaction 13 | from the user. For example, there is no reason to step in to containers which could not possibly match one of 14 | the search paths. When encoded in binary Ion, the resulting skip is a seek forward in the input stream, which 15 | is inexpensive relative to the cost of parsing (and in the case of a DOM, materializing) the skipped value. 16 | 17 | ## Usage 18 | Path extractor works in two phases: 19 | 1. Configuration 20 | 2. Notification 21 | 22 | ### Search Paths 23 | A `SearchPath` is a path provided to the extractor for matching. It's composed of a list of [PathComponent](https://static.javadoc.io/com.amazon.ion/ion-java-path-extraction/1.0.1/com/amazon/ionpathextraction/pathcomponents/PathComponent.html)s 24 | which can be one of: 25 | * Wildcard: matches all values. 26 | * Index: match the value at that index. 27 | * Text: match all values whose field names are equivalent to that text. 28 | * Annotations: matches values specified by a wrapped path component with the given annotations. 29 | Some examples: 30 | ``` 31 | data on reader: {foo: ["foo1", "foo2"] , bar: "myBarValue", bar: A::"annotatedValue"} 32 | 33 | (foo 0) - matches "foo1" 34 | (1) - matches "myBarValue" 35 | (*) - matches ["foo1", "foo2"], "myBarValue" and A::"annotatedValue" 36 | () - matches {foo: ["foo1", "foo2"] , bar: "myBarValue", bar: A::"annotatedValue"} 37 | (bar) - matches "myBarValue" and A::"annotatedValue" 38 | (A::*) - matches A::"annotatedValue" 39 | (A::bar) - matches A::"annotatedValue" (is not supported in "strict" mode, see #Optimization below) 40 | ``` 41 | 42 | The `()` matcher matches all values in the stream but you can also use annotations with it, example: 43 | ``` 44 | data on reader: 2 3 {} 4 A::2 B::C::[] 45 | 46 | () - matches 2, 3, {}, 4, A::2 and B::C::[] 47 | A::() - matches A::2 48 | B::C::() - matches B::C::[] 49 | B::() - doesn't match anything 50 | ``` 51 | 52 | ### Configuration 53 | The configuration phase involves building a `PathExtractor` instance through the `PathExtractorBuilder` by setting its 54 | configuration options and registering its search paths. The built `PathExtractor` can be reused over many `IonReader`s. 55 | 56 | example: 57 | 58 | ```java 59 | PathExtractorBuilder.standard() 60 | .withMatchCaseInsensitive(true) 61 | .withSearchPath("(foo)", (reader) -> { ... }) 62 | .build() 63 | ``` 64 | 65 | see PathExtractorBuilder [javadoc](https://static.javadoc.io/com.amazon.ion/ion-java-path-extraction/1.0.1/com/amazon/ionpathextraction/PathExtractorBuilder.html) for more information on configuration options and search path registration. 66 | 67 | ### Optimization 68 | 69 | There are two implementations: "strict" and "legacy". The strict implementation is more performant, particularly as the 70 | number of fields extracted grows. By default `PathExtractorBuilder.build()` will try to build you a strict extractor and 71 | will fall back to the legacy extractor. You may be explicit that you want a specific implementation by calling 72 | `PathExtractorBuilder.buildStrict()` or `PathExtractorBuilder.buildLegacy()`. 73 | 74 | The strict implementation supports basic paths, with field names, index ordinals, and annotations on top-level-values or 75 | wildcards. It does not support mixing field names and index ordinals, multiple callbacks on the same path or annotations 76 | on non-wildcard values. Case-insensitive annotations matching is not supported. 77 | 78 | ### Notification 79 | Each time the `PathExtractor` encounters a value that matches a registered search path it will invoke the respective 80 | callback passing the reader positioned at the current value. See `PathExtractorBuilder#withSearchPath` methods for more 81 | information on the callback contract. 82 | 83 | ### Example: 84 | 85 | ```java 86 | // Adds matched values 87 | final AtomicLong counter = new AtomicLong(0); 88 | 89 | final Function callback = (reader) -> { 90 | counter.addAndGet(reader.intValue()); 91 | 92 | return 0; 93 | }; 94 | 95 | final PathExtractor pathExtractor = PathExtractorBuilder.standard() 96 | .withSearchPath("(foo)", callback) 97 | .withSearchPath("(bar)", callback) 98 | .withSearchPath("(A::baz 1)", callback) 99 | .build(); 100 | 101 | final IonReader ionReader = IonReaderBuilder.standard().build("{foo: 1}" 102 | + "{bar: 2}" 103 | + "{baz: A::[10,20,30,40]}" 104 | + "{baz: [100,200,300,400]}" 105 | + "{other: 99}" 106 | ); 107 | 108 | pathExtractor.match(ionReader); 109 | 110 | assertEquals(23, counter.get()); 111 | ``` 112 | 113 | ```java 114 | // Top level matchers 115 | final AtomicLong counterA = new AtomicLong(0); 116 | final AtomicLong counterB = new AtomicLong(0); 117 | 118 | final PathExtractor pathExtractor = PathExtractorBuilder.standard() 119 | .withSearchPath("()", (reader) -> { 120 | counterA.addAndGet(reader.intValue()); 121 | 122 | return 0; 123 | }) 124 | .withSearchPath("A::()", (reader) -> { 125 | counterB.addAndGet(reader.intValue()); 126 | 127 | return 0; 128 | }) 129 | .build(); 130 | 131 | final IonReader ionReader = IonReaderBuilder.standard().build("1 1 1 A::10 1"); 132 | 133 | pathExtractor.match(ionReader); 134 | 135 | assertEquals(14, counterA.get()); 136 | assertEquals(10, counterB.get()); 137 | ``` 138 | 139 | ```java 140 | // accumulates matched paths into a list 141 | final BiFunction, Integer> callback = (reader, list) -> { 142 | list.add(reader.intValue()); 143 | 144 | return 0; 145 | }; 146 | 147 | final PathExtractor> pathExtractor = PathExtractorBuilder.>standard() 148 | .withSearchPath("(foo)", callback) 149 | .withSearchPath("(bar)", callback) 150 | .withSearchPath("(A::baz 1)", callback) 151 | .build(); 152 | 153 | final IonReader ionReader = IonReaderBuilder.standard().build("{foo: 1}" 154 | + "{bar: 2}" 155 | + "{baz: A::[10,20,30,40]}" 156 | + "{baz: [100,200,300,400]}" 157 | + "{other: 99}" 158 | ); 159 | 160 | final List list = new ArrayList<>(); 161 | pathExtractor.match(ionReader, list); 162 | 163 | assertEquals("[1, 2, 20]", list.toString()); 164 | ``` 165 | 166 | `PathExtractorBuilder#withSearchPath` [javadoc](https://static.javadoc.io/com.amazon.ion/ion-java-path-extraction/1.0.1/com/amazon/ionpathextraction/PathExtractorBuilder.html#withSearchPath-java.lang.String-java.util.function.Function-) 167 | 168 | ## Benchmark 169 | 170 | Some benchmarks comparing the path extractor with fully materializing a DOM are included in this package. All benchmarks 171 | use as data source the JSON in https://data.nasa.gov/data.json, a publicly available data set from NASA. 172 | 173 | The `dataset` struct from the original JSON is written as Ion binary and Ion text without any type coercion. The 174 | binary file is ~81M and the text file ~95M. There are four benchmarks types: 175 | 1. `dom`: fully materializes a DOM for the file using an `IonLoader`. 176 | 1. `full`: fully materializes all struct fields as `IonValue`s using a path extractor. 177 | 1. `partial`: materializes a single struct fields as `IonValue` using a path extractor.a 178 | 1. `partialNoDom`: access the java representation directly of a single struct field without materializing an `IonValue`. 179 | 180 | All the path extractor benchmarks are run in "strict" mode. 181 | There is a binary and a text version for all four benchmark types. See the [PathExtractorBenchmark](https://github.com/amzn/ion-java-path-extraction/blob/master/src/jmh/java/com/amazon/ionpathextraction/benchmarks/PathExtractorBenchmark.java) class for 182 | more details. 183 | 184 | To execute the benchmarks run: `gradle --no-daemon jmh`, requires an internet connection as it downloads the data set. 185 | Results below, higher is better. 186 | 187 | ``` 188 | Benchmark Mode Cnt Score Error Units 189 | PathExtractorBenchmark.domBinary thrpt 5 5.060 ± 0.075 ops/s 190 | PathExtractorBenchmark.domText thrpt 5 1.172 ± 0.040 ops/s 191 | PathExtractorBenchmark.fullBinary thrpt 5 6.011 ± 0.107 ops/s 192 | PathExtractorBenchmark.fullText thrpt 5 1.214 ± 0.025 ops/s 193 | PathExtractorBenchmark.partialBinary thrpt 5 57.329 ± 13.585 ops/s 194 | PathExtractorBenchmark.partialBinaryNoDom thrpt 5 56.598 ± 2.424 ops/s 195 | PathExtractorBenchmark.partialText thrpt 5 2.430 ± 0.073 ops/s 196 | PathExtractorBenchmark.partialTextNoDom thrpt 5 2.416 ± 0.175 ops/s 197 | ``` 198 | 199 | Using the path extractor has equivalent performance for both text and binary when fully materializing the document and 200 | can give significant performance improvements when partially materializing binary documents. This happens due to Ion's 201 | ability to skip scan values in the binary format as they are length prefixed. The gains will be proportional to how 202 | much of the document can be skipped over. 203 | 204 | ## Ion Developer information 205 | See the developer guide on: http://amzn.github.io/ion-docs/guides/path-extractor-guide.html 206 | 207 | ## License 208 | This library is licensed under the Apache 2.0 License. 209 | -------------------------------------------------------------------------------- /src/jmh/java/com/amazon/ionpathextraction/benchmarks/PathExtractorBenchmark.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | * Licensed under the Apache License, Version 2.0 (the "License"). 4 | * You may not use this file except in compliance with the License. 5 | * A copy of the License is located at: 6 | * 7 | * http://aws.amazon.com/apache2.0/ 8 | * 9 | * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific 11 | * language governing permissions and limitations under the License. 12 | */ 13 | 14 | package com.amazon.ionpathextraction.benchmarks; 15 | 16 | import com.amazon.ion.IonReader; 17 | import com.amazon.ion.IonSystem; 18 | import com.amazon.ion.IonValue; 19 | import com.amazon.ion.IonWriter; 20 | import com.amazon.ion.system.IonBinaryWriterBuilder; 21 | import com.amazon.ion.system.IonReaderBuilder; 22 | import com.amazon.ion.system.IonSystemBuilder; 23 | import com.amazon.ion.system.IonTextWriterBuilder; 24 | import com.amazon.ionpathextraction.PathExtractor; 25 | import com.amazon.ionpathextraction.PathExtractorBuilder; 26 | import java.io.ByteArrayInputStream; 27 | import java.io.ByteArrayOutputStream; 28 | import java.io.IOException; 29 | import java.io.InputStream; 30 | import java.io.OutputStream; 31 | import java.net.URL; 32 | import java.util.Iterator; 33 | import java.util.function.Function; 34 | import java.util.stream.Stream; 35 | import org.openjdk.jmh.annotations.Benchmark; 36 | import org.openjdk.jmh.annotations.Level; 37 | import org.openjdk.jmh.annotations.Scope; 38 | import org.openjdk.jmh.annotations.Setup; 39 | import org.openjdk.jmh.annotations.State; 40 | 41 | /** 42 | * Benchmarks comparing the PathExtractor with fully materializing the DOM. 43 | */ 44 | public class PathExtractorBenchmark { 45 | 46 | private static final IonSystem DOM_FACTORY = IonSystemBuilder.standard().build(); 47 | private static final String DATA_URL = "https://data.nasa.gov/data.json"; 48 | private static byte[] bytesBinary; 49 | private static byte[] bytesText; 50 | 51 | // sets up shared test data once. 52 | static { 53 | try { 54 | setupTestData(); 55 | } catch (IOException e) { 56 | throw new RuntimeException(e); 57 | } 58 | } 59 | 60 | private static IonReader newReader(final InputStream inputStream) { 61 | return IonReaderBuilder.standard().build(inputStream); 62 | } 63 | 64 | private static IonWriter newBinaryWriter(final OutputStream outputStream) { 65 | return IonBinaryWriterBuilder.standard().build(outputStream); 66 | } 67 | 68 | private static IonWriter newTextWriter(final OutputStream outputStream) { 69 | return IonTextWriterBuilder.standard().build(outputStream); 70 | } 71 | 72 | private static void setupTestData() throws IOException { 73 | final URL url = new URL(DATA_URL); 74 | 75 | final ByteArrayOutputStream binaryOut = new ByteArrayOutputStream(); 76 | try ( 77 | final InputStream inputStream = url.openStream(); 78 | final IonReader reader = newReader(inputStream); 79 | final IonWriter binaryWriter = newBinaryWriter(binaryOut) 80 | ) { 81 | binaryWriter.writeValues(reader); 82 | } 83 | 84 | bytesBinary = binaryOut.toByteArray(); 85 | 86 | // text version. Writes from the binary memory buffer to avoid downloading the data twice 87 | final ByteArrayOutputStream textOut = new ByteArrayOutputStream(); 88 | try ( 89 | final InputStream inputStream = new ByteArrayInputStream(bytesBinary); 90 | final IonReader reader = newReader(inputStream); 91 | final IonWriter writer = newTextWriter(textOut) 92 | ) { 93 | writer.writeValues(reader); 94 | } 95 | 96 | bytesText = textOut.toByteArray(); 97 | } 98 | 99 | /** 100 | * Fully materializes all struct fields as IonValues using a path extractor. 101 | */ 102 | @Benchmark 103 | public Object fullBinary(final ThreadState threadState) { 104 | // instantiate reader inside benchmark to be more comparable to dom loading 105 | IonReader reader = newReader(new ByteArrayInputStream(bytesBinary)); 106 | threadState.pathExtractor.match(reader); 107 | 108 | return reader; 109 | } 110 | 111 | /** 112 | * Text version of {@link #fullBinary(ThreadState)}. 113 | */ 114 | @Benchmark 115 | public Object fullText(final ThreadState threadState) { 116 | // instantiate reader inside benchmark to be more comparable to dom loading 117 | IonReader reader = newReader(new ByteArrayInputStream(bytesText)); 118 | threadState.pathExtractor.match(reader); 119 | 120 | return reader; 121 | } 122 | 123 | /** 124 | * Materializes a single struct fields as IonValue using a path extractor. 125 | */ 126 | @Benchmark 127 | public Object partialBinary(final ThreadState threadState) { 128 | // instantiate reader inside benchmark to be more comparable to dom loading 129 | IonReader reader = newReader(new ByteArrayInputStream(bytesBinary)); 130 | threadState.pathExtractorPartial.match(reader); 131 | 132 | return reader; 133 | } 134 | 135 | /** 136 | * Text version of {@link #partialBinary(ThreadState)}. 137 | */ 138 | @Benchmark 139 | public Object partialText(final ThreadState threadState) { 140 | // instantiate reader inside benchmark to be more comparable to dom loading 141 | IonReader reader = newReader(new ByteArrayInputStream(bytesText)); 142 | threadState.pathExtractorPartial.match(reader); 143 | 144 | return reader; 145 | } 146 | 147 | /** 148 | * Access the java representation directly of a single struct field without materializing an `IonValue`. 149 | */ 150 | @Benchmark 151 | public Object partialBinaryNoDom(final ThreadState threadState) { 152 | // instantiate reader inside benchmark to be more comparable to dom loading 153 | IonReader reader = newReader(new ByteArrayInputStream(bytesBinary)); 154 | threadState.pathExtractorPartialNoDom.match(reader); 155 | 156 | return reader; 157 | } 158 | 159 | /** 160 | * Text version of {@link #partialBinaryNoDom(ThreadState)}. 161 | */ 162 | @Benchmark 163 | public Object partialTextNoDom(final ThreadState threadState) { 164 | // instantiate reader inside benchmark to be more comparable to dom loading 165 | IonReader reader = newReader(new ByteArrayInputStream(bytesText)); 166 | threadState.pathExtractorPartialNoDom.match(reader); 167 | 168 | return reader; 169 | } 170 | 171 | /** 172 | * Fully materializes a DOM for the file using an IonLoader. 173 | */ 174 | @Benchmark 175 | public Object domBinary() { 176 | IonReader reader = newReader(new ByteArrayInputStream(bytesBinary)); 177 | // iterating over Top-Level-Values is more apples:apples to path extractor 178 | // vs loading all as a datagram 179 | Iterator iter = DOM_FACTORY.iterate(reader); 180 | while (iter.hasNext()) { 181 | iter.next(); 182 | } 183 | return reader; 184 | } 185 | 186 | /** 187 | * Text version of {@link #domBinary()}. 188 | */ 189 | @Benchmark 190 | public Object domText() { 191 | IonReader reader = newReader(new ByteArrayInputStream(bytesText)); 192 | // iterating over Top-Level-Values is more apples:apples to path extractor 193 | // vs loading all as a datagram 194 | Iterator iter = DOM_FACTORY.iterate(reader); 195 | while (iter.hasNext()) { 196 | iter.next(); 197 | } 198 | return reader; 199 | } 200 | 201 | /** 202 | * Each thread gets a single instance. 203 | */ 204 | @State(Scope.Thread) 205 | public static class ThreadState { 206 | 207 | PathExtractor pathExtractor; 208 | PathExtractor pathExtractorPartial; 209 | PathExtractor pathExtractorPartialNoDom; 210 | 211 | @Setup(Level.Trial) 212 | public void setup() throws Exception { 213 | pathExtractor = makePathExtractor(reader -> { 214 | // reads matches as DOM doing similar work as the DOM loader 215 | DOM_FACTORY.newValue(reader); 216 | return 0; 217 | }, 218 | "(@context)", 219 | "(@type)", 220 | "(conformsTo)", 221 | "(describedBy)", 222 | "(dataset * @type)", 223 | "(dataset * accessLevel)", 224 | "(dataset * accrualPeriodicity)", 225 | "(dataset * bureauCode)", 226 | "(dataset * contactPoint)", 227 | "(dataset * description)", 228 | "(dataset * distribution)", 229 | "(dataset * identifier)", 230 | "(dataset * issued)", 231 | "(dataset * keyword)", 232 | "(dataset * landingPage)", 233 | "(dataset * modified)", 234 | "(dataset * programCode)", 235 | "(dataset * publisher)", 236 | "(dataset * title)", 237 | "(dataset * license)" 238 | ); 239 | 240 | pathExtractorPartial = makePathExtractor(reader -> { 241 | // reads matches as DOM doing similar work as the DOM loader but only for matched values 242 | DOM_FACTORY.newValue(reader); 243 | return 0; 244 | }, 245 | "(@context)", 246 | "(@type)", 247 | "(conformsTo)", 248 | "(describedBy)", 249 | "(dataset * accessLevel)" 250 | ); 251 | 252 | pathExtractorPartialNoDom = makePathExtractor(reader -> { 253 | // reads the value without materializing a DOM object 254 | reader.stringValue(); // all matched paths are strings 255 | return 0; 256 | }, 257 | "(@context)", 258 | "(@type)", 259 | "(conformsTo)", 260 | "(describedBy)", 261 | "(dataset * accessLevel)" 262 | ); 263 | } 264 | 265 | private PathExtractor makePathExtractor(final Function callback, 266 | final String... searchPaths) { 267 | final PathExtractorBuilder builder = PathExtractorBuilder.standard(); 268 | Stream.of(searchPaths).forEach(sp -> builder.withSearchPath(sp, callback)); 269 | return builder.build(); 270 | } 271 | } 272 | } 273 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /src/test/resources/test-cases.ion: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Test case spec: 4 | 5 | single search path: 6 | { 7 | searchPath: , 8 | data: , 9 | expected: 10 | } 11 | 12 | multiple search paths: 13 | { 14 | searchPaths: , 15 | data: , 16 | expected: 17 | } 18 | 19 | Only difference is that for multiple the searchPath key is pluralized to searchPaths and expects an Ion sequence of 20 | search paths 21 | */ 22 | 23 | // zero search paths --------------------------------------------------------------------- 24 | // no-op extractor, data doesn't matter 25 | { searchPaths: [], expected: [], data: {foo: 1} } 26 | { searchPaths: [], expected: [], data: (3 4) } 27 | { searchPaths: [], expected: [], data: 99 } 28 | { searchPaths: [], expected: [], data: [1, 2] } 29 | 30 | // Field only ---------------------------------------------------------------------------- 31 | 32 | // matches 33 | { searchPath: (foo), data: {foo: 1}, expected: [1] } 34 | { searchPath: [foo], data: {foo: 1}, expected: [1] } 35 | { searchPath: (foo bar), data: {foo: {bar : 2}}, expected: [2] } 36 | 37 | // escaped wildcard 38 | { searchPath: ('$ion_extractor_field'::*), data: {'*': 1, foo: 2}, expected: [1]} 39 | 40 | // matches one sibling 41 | { searchPath: (foo baz), data: {foo: {bar : 2, baz: 3}}, expected: [3] } 42 | 43 | // multiple matches 44 | { searchPath: (foo bar), data: {foo: {bar : 2, bar: 3}}, expected: [2, 3] } 45 | 46 | // no match 47 | { searchPath: (foo), data: {baz: 10}, expected: [] } 48 | { searchPath: (foo baz), data: {foo: {bar : 2}}, expected: [] } 49 | 50 | // stepOut 51 | { searchPath: (foo bar), data: {foo: {bar : 2, bar: 3}}, expected: [2], stepOutN: 1 } 52 | { 53 | searchPath: (foo bar baz), 54 | data: { foo: { bar: {baz: 1}, bar: {baz: 2} } }, 55 | expected: [1], 56 | stepOutN: 2 57 | } 58 | 59 | // empty containers 60 | { searchPath: (foo), data: {}, expected: [] } 61 | { searchPath: (foo), data: (), expected: [] } 62 | { searchPath: (foo), data: [], expected: [] } 63 | 64 | // not containers 65 | { searchPath: (foo), data: null, expected: [] } 66 | { searchPath: (foo), data: true, expected: [] } 67 | { searchPath: (foo), data: 1, expected: [] } 68 | { searchPath: (foo), data: 1e0, expected: [] } 69 | { searchPath: (foo), data: 1.0, expected: [] } 70 | { searchPath: (foo), data: 2018T, expected: [] } 71 | { searchPath: (foo), data: "", expected: [] } 72 | { searchPath: (foo), data: '', expected: [] } 73 | { searchPath: (foo), data: {{ }}, expected: [] } 74 | { searchPath: (foo), data: {{ "" }}, expected: [] } 75 | 76 | 77 | 78 | // Ordinal only -------------------------------------------------------------------------- 79 | 80 | // matches 81 | { searchPath: (0), data: [1], expected: [1] } 82 | { searchPath: [0], data: [1], expected: [1] } 83 | { searchPath: (0), data: (1), expected: [1] } 84 | { searchPath: (0), data: {f: 1}, expected: [1] } 85 | { searchPath: (1), data: [1, 2], expected: [2] } 86 | { searchPath: (1), data: (1 3), expected: [3] } 87 | { searchPath: (1), data: {f1: 1, f2: 2}, expected: [2] } 88 | { searchPath: (0), data: [1, 2], expected: [1] } 89 | { searchPath: (0), data: (1 3), expected: [1] } 90 | { searchPath: (0), data: {f1: 1, f2: 2}, expected: [1] } 91 | 92 | // out of bounds 93 | { searchPath: (1), data: [1], expected: [] } 94 | { searchPath: (1), data: (1), expected: [] } 95 | { searchPath: (1), data: {foo: 1}, expected: [] } 96 | 97 | // empty containers 98 | { searchPath: (0), data: [], expected: [] } 99 | { searchPath: (0), data: (), expected: [] } 100 | { searchPath: (0), data: {}, expected: [] } 101 | 102 | // not containers 103 | { searchPath: (0), data: null, expected: [] } 104 | { searchPath: (0), data: true, expected: [] } 105 | { searchPath: (0), data: 1, expected: [] } 106 | { searchPath: (0), data: 1e0, expected: [] } 107 | { searchPath: (0), data: 1.0, expected: [] } 108 | { searchPath: (0), data: 2018T, expected: [] } 109 | { searchPath: (0), data: "", expected: [] } 110 | { searchPath: (0), data: '', expected: [] } 111 | { searchPath: (0), data: {{ }}, expected: [] } 112 | { searchPath: (0), data: {{ "" }}, expected: [] } 113 | 114 | 115 | // Wildcard only ------------------------------------------------------------------------- 116 | 117 | // matches 118 | { searchPath: (*), data: [1], expected: [1] } 119 | { searchPath: ['*'], data: [1], expected: [1] } 120 | { searchPath: (*), data: (1), expected: [1] } 121 | { searchPath: (*), data: {f: 1}, expected: [1] } 122 | { searchPath: (*), data: [1, 2], expected: [1, 2] } 123 | { searchPath: (*), data: (1 3), expected: [1, 3] } 124 | { searchPath: (*), data: {f1: 1, f2: 2}, expected: [1, 2] } 125 | { searchPath: (* *), data: [1, [2]], expected: [2] } 126 | { searchPath: (* *), data: (1 (3)), expected: [3] } 127 | { searchPath: (* *), data: {f1: 1, f2: {f3: 2}}, expected: [2] } 128 | 129 | // escape annotation is only valid as the first annotation 130 | { 131 | searchPath: (foo::'$ion_extractor_field'::*), 132 | data: [foo::'$ion_extractor_field'::1, foo::'$ion_extractor_field'::2], 133 | expected: [foo::'$ion_extractor_field'::1, foo::'$ion_extractor_field'::2] 134 | } 135 | 136 | // insufficient depth 137 | { searchPath: (* *), data: [1], expected: [] } 138 | { searchPath: (* *), data: (1), expected: [] } 139 | { searchPath: (* *), data: {f1: 1}, expected: [] } 140 | { searchPath: (* *), data: [1, 2], expected: [] } 141 | { searchPath: (* *), data: (1 2), expected: [] } 142 | { searchPath: (* *), data: {f1: 1, f2: 2}, expected: [] } 143 | 144 | // step out 145 | { searchPath: (* *), data: [[1], [2]], expected: [1], stepOutN: 2 } 146 | 147 | // empty containers 148 | { searchPath: (*), data: [], expected: [] } 149 | { searchPath: (*), data: (), expected: [] } 150 | { searchPath: (*), data: {}, expected: [] } 151 | 152 | // not containers 153 | { searchPath: (*), data: null, expected: [] } 154 | { searchPath: (*), data: true, expected: [] } 155 | { searchPath: (*), data: 1, expected: [] } 156 | { searchPath: (*), data: 1e0, expected: [] } 157 | { searchPath: (*), data: 1.0, expected: [] } 158 | { searchPath: (*), data: 2018T, expected: [] } 159 | { searchPath: (*), data: "", expected: [] } 160 | { searchPath: (*), data: '', expected: [] } 161 | { searchPath: (*), data: {{ }}, expected: [] } 162 | { searchPath: (*), data: {{ "" }}, expected: [] } 163 | 164 | 165 | // Empty search path --------------------------------------------------------------------- 166 | 167 | // containers 168 | { searchPath: (), data: [1], expected: [[1]] } 169 | { searchPath: [], data: [1], expected: [[1]] } 170 | { searchPath: (), data: (1), expected: [(1)] } 171 | { searchPath: (), data: {foo: 1}, expected: [{foo: 1}] } 172 | 173 | // empty containers 174 | { searchPath: (), data: [], expected: [[]] } 175 | { searchPath: (), data: (), expected: [()] } 176 | { searchPath: (), data: {}, expected: [{}] } 177 | 178 | // not containers 179 | { searchPath: (), data: null, expected: [null] } 180 | { searchPath: (), data: true, expected: [true] } 181 | { searchPath: (), data: 1, expected: [1] } 182 | { searchPath: (), data: 1e0, expected: [1e0] } 183 | { searchPath: (), data: 1.0, expected: [1.0] } 184 | { searchPath: (), data: 2018T, expected: [2018T] } 185 | { searchPath: (), data: "", expected: [""] } 186 | { searchPath: (), data: '', expected: [''] } 187 | { searchPath: (), data: {{ }}, expected: [{{ }}] } 188 | { searchPath: (), data: {{ "" }}, expected: [{{ "" }}] } 189 | 190 | 191 | // Mixed path components ----------------------------------------------------------------- 192 | { 193 | searchPath: (foo 1), 194 | data: { foo: [0, 1], foo: (0 2), foo: {a: 1, b: 3}, foo: 1, bar: [0, 1] }, 195 | expected: [1, 2, 3] 196 | } 197 | { 198 | searchPath: [foo, '*'], 199 | data: { foo: [1], foo: (2), foo: {bar: 3}, foo: 1, bar: (9) }, 200 | expected: [1, 2, 3] 201 | } 202 | { 203 | searchPath: (foo * bar), 204 | data: { foo: [ {bar: 1} ], foo: { baz: {bar: 2} } }, 205 | expected: [1, 2] 206 | } 207 | { 208 | searchPath: (foo * 0), 209 | data: { foo: [1, [2]], foo: {bar: (3)} }, 210 | expected: [2, 3] 211 | } 212 | { 213 | searchPath: (foo bar 2), 214 | data: {abc: def, foo: {bar:[1, 2, 3]}}, 215 | expected: [3] 216 | } 217 | { 218 | searchPath: (foo bar *), 219 | data: {abc: def, foo: {bar:[1, 2, 3]}}, 220 | expected: [1, 2, 3] 221 | } 222 | { 223 | searchPath: (foo bar * baz), 224 | data: {abc: def, foo: {bar:[{baz:1}, {zar:2}, {baz:3}]}}, 225 | expected: [1, 3] 226 | } 227 | 228 | // stepOut 229 | { 230 | searchPath: (foo * 0), 231 | data: { 232 | foo: { first: [1], second: [2] }, 233 | foo: { first: [10], second: [20] } 234 | }, 235 | expected: [1,10], 236 | stepOutN: 2 237 | } 238 | 239 | // Multiple search paths ----------------------------------------------------------------- 240 | // all match 241 | legacy::{ searchPaths: [(0), (foo)], data: {bar: 1, foo: 2}, expected: [1, 2] } 242 | 243 | // none match 244 | legacy::{ searchPaths: [(1), [foo]], data: [0], expected: [] } 245 | 246 | // multiple matchers match the same value 247 | legacy::{ searchPaths: [(1), (*)], data: [1, 2, 3], expected: [1, 2, 2, 3] } 248 | 249 | { searchPaths: [(foo 1), (foo 2)], data: {foo: [0, 1, 2]}, expected: [1, 2] } 250 | 251 | // With annotations ---------------------------------------------------------------------- 252 | { searchPath: A::(), data: A::1, expected: [A::1] } 253 | { searchPath: A::(), data: 1, expected: [] } 254 | { 255 | searchPath: A::(foo), 256 | data: $datagram::[ 257 | A::{bar: 1}, 258 | A::{foo: 2}, 259 | {foo: 3} 260 | ], 261 | expected: [2] 262 | } 263 | 264 | { searchPath: (A::'*'), data: [A::1, 2], expected: [A::1] } 265 | { searchPath: ('$ion_extractor_field'::*), data: {'*': A::1, foo: 2}, expected: [A::1]} 266 | { searchPath: (A::B::C::*), data: [A::B::C::1, B::A::C::2], expected: [A::B::C::1] } 267 | legacy::{ 268 | // annotations on ordinals or fields _could_ be supported in the FSM impl 269 | // but usage appears non-existent at time of writing (Sep 2024) 270 | searchPath: (foo A::2 bar), 271 | data: { 272 | foo: [0, 1, A::{bar: 1}], 273 | foo: [0, 1, {bar: 2}] 274 | }, 275 | expected: [1] 276 | } 277 | legacy::{ 278 | searchPath: (f::foo), 279 | data: { foo: f::17, foo: F::31, Foo: f::51, Foo: F::67 }, 280 | expected: [f::17], 281 | // default is None but explicit here as it is essential to the test case 282 | caseInsensitive: None 283 | } 284 | 285 | // case insensitivity 286 | { 287 | searchPath: (foo), 288 | data: $datagram::[{FOO: 1, foO: 2},{foo: 3},{fOo: 4},{bar: 5}], 289 | expected: [1, 2, 3, 4], 290 | caseInsensitive: Both 291 | } 292 | legacy::{ 293 | // these resolve to the same path for the Fsm Impl 294 | searchPaths: [(foo), (Foo)], 295 | data: $datagram::[{FOO: 1, foO: 2},{foo: 3},{Foo: 4},{bar: 5}], 296 | expected: [1, 1, 2, 2, 3, 3, 4, 4], 297 | caseInsensitive: Both 298 | } 299 | { 300 | searchPath: (foo), 301 | data: $datagram::[{FOO: 1, foO: 2},{foo: 3},{Foo: 4},{bar: 5}], 302 | expected: [1, 2, 3, 4], 303 | caseInsensitive: Fields 304 | } 305 | legacy::{ 306 | searchPath: F::(), 307 | data: $datagram::[F::17, g::31, f::51], 308 | expected: [F::17, f::51], 309 | caseInsensitive: Both 310 | } 311 | { 312 | searchPath: F::(), 313 | data: $datagram::[F::17, g::31, f::51], 314 | expected: [F::17], 315 | caseInsensitive: Fields 316 | } 317 | { 318 | searchPath: (foo), 319 | data: $datagram::[[17]], 320 | expected: [], 321 | caseInsensitive: Fields 322 | } 323 | { 324 | searchPath: (foo), 325 | data: $datagram::[null.struct], 326 | expected: [], 327 | caseInsensitive: Fields 328 | } -------------------------------------------------------------------------------- /config/checkstyle/checkstyle.xml: -------------------------------------------------------------------------------- 1 | 2 | 14 | 15 | 16 | 17 | 18 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 50 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 65 | 66 | 67 | 68 | 69 | 70 | 73 | 74 | 75 | 76 | 77 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 88 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 132 | 133 | 134 | 136 | 137 | 138 | 139 | 141 | 142 | 143 | 144 | 145 | 147 | 148 | 149 | 150 | 152 | 153 | 154 | 155 | 157 | 158 | 159 | 160 | 161 | 163 | 164 | 165 | 166 | 168 | 169 | 170 | 171 | 173 | 174 | 175 | 176 | 178 | 179 | 180 | 181 | 182 | 184 | 186 | 188 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 210 | 211 | 212 | 213 | 214 | 215 | 218 | 219 | 220 | 221 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 234 | 235 | 236 | 237 | 238 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | -------------------------------------------------------------------------------- /src/main/java/com/amazon/ionpathextraction/PathExtractorBuilder.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | * Licensed under the Apache License, Version 2.0 (the "License"). 4 | * You may not use this file except in compliance with the License. 5 | * A copy of the License is located at: 6 | * 7 | * http://aws.amazon.com/apache2.0/ 8 | * 9 | * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific 11 | * language governing permissions and limitations under the License. 12 | */ 13 | 14 | package com.amazon.ionpathextraction; 15 | 16 | import static com.amazon.ionpathextraction.internal.Preconditions.checkArgument; 17 | import static com.amazon.ionpathextraction.pathcomponents.PathComponent.EMPTY_STRING_ARRAY; 18 | 19 | import com.amazon.ion.IonReader; 20 | import com.amazon.ionpathextraction.internal.Annotations; 21 | import com.amazon.ionpathextraction.internal.PathExtractorConfig; 22 | import com.amazon.ionpathextraction.pathcomponents.PathComponent; 23 | import java.util.ArrayList; 24 | import java.util.List; 25 | import java.util.function.BiFunction; 26 | import java.util.function.Function; 27 | 28 | /** 29 | * {@link PathExtractor} builder. 30 | */ 31 | public final class PathExtractorBuilder { 32 | 33 | private static final boolean DEFAULT_MATCH_RELATIVE_PATHS = false; 34 | private static final boolean DEFAULT_CASE_INSENSITIVE = false; 35 | 36 | private final List> searchPaths = new ArrayList<>(); 37 | private boolean matchRelativePaths; 38 | private boolean matchCaseInsensitive; 39 | private boolean matchFieldsCaseInsensitive; 40 | 41 | private PathExtractorBuilder() { 42 | } 43 | 44 | /** 45 | * Creates a new builder with standard configuration. 46 | * 47 | * @return new standard builder instance. 48 | */ 49 | public static PathExtractorBuilder standard() { 50 | PathExtractorBuilder builder = new PathExtractorBuilder<>(); 51 | builder.matchCaseInsensitive = DEFAULT_CASE_INSENSITIVE; 52 | builder.matchRelativePaths = DEFAULT_MATCH_RELATIVE_PATHS; 53 | builder.matchFieldsCaseInsensitive = DEFAULT_CASE_INSENSITIVE; 54 | 55 | return builder; 56 | } 57 | 58 | /** 59 | * Instantiates a thread safe {@link PathExtractor} configured by this builder. 60 | * Attempts to build a "strict" PathExtractor which is much more performant, particularly for extractions with many 61 | * field names. Falls back to the "legacy" implementation if the paths registered are incompatible with the "strict" 62 | * implementation. 63 | *
64 | * Use buildStrict to ensure the more optimal implementation is used. 65 | * @return new {@link PathExtractor} instance. 66 | */ 67 | public PathExtractor build() { 68 | try { 69 | return buildStrict(); 70 | } catch (UnsupportedPathExpression e) { 71 | return buildLegacy(); 72 | } 73 | } 74 | 75 | /** 76 | * Instantiate a "stricter" and more optimized PathExtractor. 77 | *
78 | * Supports search paths where there is only one "variant" of step type from each parent step, and only one callback 79 | * per state. 80 | * Annotations matching is only supported on the root or wildcards. 81 | * Case insensitivity is supported on field names, not annotations. 82 | *
83 | * Examples of supported paths (and any combination of the below): 84 | * `A::()` 85 | * `(foo bar)` 86 | * `(foo qux)` 87 | * `(spam 0)` 88 | * `(spam 1)` 89 | * `(quid * quo)` 90 | * `(lorem A::B::* ipsum)` 91 | *
92 | * Examples of unsupported paths: 93 | * `(a::foo)` annotations on field names not supported, yet. 94 | * `(a::1)` annotations on index ordinals not supported, yet. 95 | * `(foo bar) (foo 1) (foo *)` combination of field names, index ordinals or wildcards not supported. 96 | * `a::() ()` combination of annotated and non-annotated root (or other wildcard) matching. 97 | * 98 | * @return new {@link PathExtractor} instance. 99 | * @throws UnsupportedPathExpression if any search path or the paths combined, are not supported. 100 | */ 101 | public PathExtractor buildStrict() { 102 | return buildStrict(false); 103 | } 104 | 105 | /** 106 | * Instantiate a "strict" path extractor, which also enforces type expectations. 107 | *
108 | * Paths that attempt to find named children are only valid on Structs or untyped null. 109 | * Paths that attempt to find indexed (or wildcard) children are only valid on containers or untyped null. 110 | * For backwards compatibility that includes Structs, though they are defined as unordered per the Ion Datamodel. 111 | *
112 | * The type check is performed _after_ any callbacks registered for the current path and 113 | * _before_ any child matches are attempted. 114 | */ 115 | public PathExtractor buildStrict(final boolean strictTyping) { 116 | return FsmPathExtractor.create(searchPaths, 117 | strictTyping, 118 | new PathExtractorConfig(matchRelativePaths, matchCaseInsensitive, matchFieldsCaseInsensitive)); 119 | } 120 | 121 | /** 122 | * Instantiate a "legacy" PathExtractor implementation. 123 | * The returned PathExtractor is inefficient when a large number of field names is searched, 124 | * but a wider variety of search paths are supported. 125 | */ 126 | public PathExtractor buildLegacy() { 127 | return new PathExtractorImpl<>(searchPaths, 128 | new PathExtractorConfig(matchRelativePaths, matchCaseInsensitive, matchFieldsCaseInsensitive)); 129 | } 130 | 131 | /** 132 | * Sets matchRelativePaths config. When true the path extractor will accept readers at any depth, when false the 133 | * reader must be at depth zero. 134 | * 135 | *
136 | * defaults to false. 137 | * 138 | * @param matchRelativePaths new config value. 139 | * @return builder for chaining. 140 | */ 141 | public PathExtractorBuilder withMatchRelativePaths(final boolean matchRelativePaths) { 142 | this.matchRelativePaths = matchRelativePaths; 143 | 144 | return this; 145 | } 146 | 147 | /** 148 | * Sets matchCaseInsensitive config. When true the path extractor will match fields _and annotations_ ignoring case. 149 | * When false the path extractor will match respecting the path components case. 150 | * To set case insensitivity for _only field names_ use the `withMatchFieldNamesCaseInsensitive` builder. 151 | * 152 | *
153 | * defaults to false. 154 | * 155 | * @param matchCaseInsensitive new config value. 156 | * @return builder for chaining. 157 | */ 158 | public PathExtractorBuilder withMatchCaseInsensitive(final boolean matchCaseInsensitive) { 159 | this.matchCaseInsensitive = matchCaseInsensitive; 160 | this.matchFieldsCaseInsensitive = matchCaseInsensitive; 161 | 162 | return this; 163 | } 164 | 165 | /** 166 | * Sets matchFieldNamesCaseInsensitive config. When true the path extractor will match field names ignoring case. 167 | * For example: 'Foo' will match 'foo'. 168 | * 169 | *
170 | * defaults to false. 171 | * 172 | * @param matchCaseInsensitive new config value. 173 | * @return builder for chaining. 174 | */ 175 | public PathExtractorBuilder withMatchFieldNamesCaseInsensitive(final boolean matchCaseInsensitive) { 176 | this.matchFieldsCaseInsensitive = matchCaseInsensitive; 177 | 178 | return this; 179 | } 180 | 181 | /** 182 | * Register a callback for a search path. 183 | * 184 | * @param searchPathAsIon string representation of a search path. 185 | * @param callback callback to be registered. 186 | * @return builder for chaining. 187 | * @see PathExtractorBuilder#withSearchPath(List, BiFunction, String[]) 188 | */ 189 | public PathExtractorBuilder withSearchPath(final String searchPathAsIon, 190 | final Function callback) { 191 | checkArgument(callback != null, "callback cannot be null"); 192 | 193 | withSearchPath(searchPathAsIon, (reader, t) -> callback.apply(reader)); 194 | 195 | return this; 196 | } 197 | 198 | /** 199 | * Register a callback for a search path. 200 | * 201 | * @param searchPathAsIon string representation of a search path. 202 | * @param callback callback to be registered. 203 | * @return builder for chaining. 204 | * @see PathExtractorBuilder#withSearchPath(List, BiFunction, String[]) 205 | */ 206 | public PathExtractorBuilder withSearchPath(final String searchPathAsIon, 207 | final BiFunction callback) { 208 | checkArgument(searchPathAsIon != null, "searchPathAsIon cannot be null"); 209 | checkArgument(callback != null, "callback cannot be null"); 210 | 211 | SearchPath searchPath = SearchPathParser.parse(searchPathAsIon, callback); 212 | searchPaths.add(searchPath); 213 | 214 | return this; 215 | } 216 | 217 | /** 218 | * Register a callback for a search path. 219 | * 220 | * @param pathComponents search path as a list of path components. 221 | * @param callback callback to be registered. 222 | * @param annotations annotations used with this search path. 223 | * @return builder for chaining. 224 | */ 225 | public PathExtractorBuilder withSearchPath(final List pathComponents, 226 | final Function callback, 227 | final String[] annotations) { 228 | checkArgument(callback != null, "callback cannot be null"); 229 | 230 | return withSearchPath(pathComponents, (reader, t) -> callback.apply(reader), annotations); 231 | } 232 | 233 | /** 234 | * Register a callback for a search path. 235 | *

236 | * The callback receives the matcher's {@link IonReader}, positioned on the matching value, so that it can use the 237 | * appropriate reader method to access the value. The callback return value is a ‘step-out-N’ instruction. The most 238 | * common value is zero, which tells the extractor to continue with the next value at the same depth. A return value 239 | * greater than zero may be useful to users who only care about the first match at a particular depth. 240 | *

241 | * 242 | *

243 | * Callback implementations MUST comply with the following: 244 | *

245 | * 246 | *
    247 | *
  • 248 | * The reader must not be advanced past the matching value. Violating this will cause the following value to be 249 | * skipped. If a value is skipped, neither the value itself nor any of its children will be checked for match 250 | * against any of the extractor's registered paths. 251 | *
  • 252 | *
  • 253 | * If the reader is positioned on a container value, its cursor must be at the same depth when the callback returns. 254 | * In other words, if the user steps in to the matched value, it must step out an equal number of times. Violating 255 | * this will raise an error. 256 | *
  • 257 | *
  • 258 | * Return value must be between zero and the the current reader relative depth, for example the following search 259 | * path (foo bar) must return values between 0 and 2 inclusive. 260 | *
  • 261 | *
  • 262 | * When there are nested search paths, e.g. (foo) and (foo bar), the callback for (foo) should not read the reader 263 | * value if it's a container. Doing so will advance the reader to the end of the container making impossible to 264 | * match (foo bar). 265 | *
  • 266 | *
267 | * 268 | * @param pathComponents search path as a list of path components. 269 | * @param callback callback to be registered. 270 | * @param annotations annotations used with this search path. 271 | * @return builder for chaining. 272 | */ 273 | public PathExtractorBuilder withSearchPath(final List pathComponents, 274 | final BiFunction callback, 275 | final String[] annotations) { 276 | checkArgument(pathComponents != null, "pathComponents cannot be null"); 277 | checkArgument(callback != null, "callback cannot be null"); 278 | checkArgument(annotations != null, "annotations cannot be null"); 279 | 280 | searchPaths.add(new SearchPath<>(pathComponents, callback, new Annotations(annotations))); 281 | 282 | return this; 283 | } 284 | 285 | /** 286 | * Add a search path by its components, with no annotations matching on the top-level-values. 287 | *
288 | * @see PathExtractorBuilder#withSearchPath(List, BiFunction, String[]) 289 | */ 290 | public PathExtractorBuilder withSearchPath(final List pathComponents, 291 | final BiFunction callback) { 292 | return withSearchPath(pathComponents, callback, EMPTY_STRING_ARRAY); 293 | } 294 | } 295 | -------------------------------------------------------------------------------- /src/main/java/com/amazon/ionpathextraction/FsmMatcherBuilder.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | * Licensed under the Apache License, Version 2.0 (the "License"). 4 | * You may not use this file except in compliance with the License. 5 | * A copy of the License is located at: 6 | * 7 | * http://aws.amazon.com/apache2.0/ 8 | * 9 | * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific 11 | * language governing permissions and limitations under the License. 12 | */ 13 | 14 | package com.amazon.ionpathextraction; 15 | 16 | import com.amazon.ion.IonReader; 17 | import com.amazon.ion.IonType; 18 | import com.amazon.ionpathextraction.internal.Annotations; 19 | import com.amazon.ionpathextraction.pathcomponents.Index; 20 | import com.amazon.ionpathextraction.pathcomponents.PathComponent; 21 | import com.amazon.ionpathextraction.pathcomponents.Text; 22 | import com.amazon.ionpathextraction.pathcomponents.Wildcard; 23 | import java.util.ArrayList; 24 | import java.util.Arrays; 25 | import java.util.HashMap; 26 | import java.util.List; 27 | import java.util.Map; 28 | import java.util.function.BiFunction; 29 | import java.util.function.Supplier; 30 | import java.util.stream.Collectors; 31 | 32 | /** 33 | * Builds a root FsmMatcher for a set of SearchPaths. 34 | *
35 | * One key principle in the implementation is to close over as much branching as possible at build time. 36 | * For example: for a case-insensitive field lookup, lower case the field names once, at build time. 37 | *
38 | * The second key principle is that there should be at-most-one Matcher state for a given reader context. 39 | * So any combination of different paths which could both be active for the same reader context are disallowed. 40 | * For example: allowing a mix of field names and ordinal positions for a given sub-path. 41 | *
42 | * Beyond that, there are some usage patterns which could be included, such as annotations filtering on 43 | * field names or ordinals, but for which there was no observed usage. 44 | */ 45 | class FsmMatcherBuilder { 46 | private final PathTreeNode root = new PathTreeNode(); 47 | private final boolean caseInsensitiveAll; 48 | private final boolean caseInsensitiveFields; 49 | 50 | FsmMatcherBuilder(final boolean caseInsensitiveAll, final boolean caseInsensitiveFields) { 51 | this.caseInsensitiveAll = caseInsensitiveAll; 52 | this.caseInsensitiveFields = caseInsensitiveFields; 53 | } 54 | 55 | /** 56 | * Incorporate the searchPath into the matcher tree to be built. 57 | * 58 | * @throws UnsupportedPathExpression if the SearchPath is not supported. 59 | */ 60 | void accept(final SearchPath searchPath) { 61 | List steps = searchPath.getNormalizedPath(); 62 | PathTreeNode currentNode = root; 63 | for (PathComponent step : steps) { 64 | currentNode = currentNode.acceptStep(step); 65 | } 66 | currentNode.setCallback(searchPath.getCallback()); 67 | } 68 | 69 | /** 70 | * Build the FsmMatcher for the set of paths. 71 | * 72 | * @throws UnsupportedPathExpression if the combination of SearchPaths is not supported. 73 | */ 74 | FsmMatcher build() { 75 | return root.buildMatcher(); 76 | } 77 | 78 | /** 79 | * Mutable builder node to model the path tree before building into a FsmMatcher. 80 | */ 81 | private class PathTreeNode { 82 | BiFunction callback; 83 | PathTreeNode wildcard; 84 | Map annotatedSplats = new HashMap<>(); 85 | Map fields = new HashMap<>(); 86 | Map indexes = new HashMap<>(); 87 | 88 | /** 89 | * Find or create a new PathTreeNode for the child step. 90 | * 91 | * @return the new or existing node. 92 | * @throws UnsupportedPathExpression if the step contains path components that are not supported 93 | */ 94 | private PathTreeNode acceptStep(final PathComponent step) { 95 | if (step.hasAnnotations() && caseInsensitiveAll) { 96 | throw new UnsupportedPathExpression( 97 | "Case Insensitive Matching of Annotations is not yet supported by this matcher.\n" 98 | + "Use the legacy matcher or the withMatchFieldNamesCaseInsensitive option instead."); 99 | } 100 | 101 | PathTreeNode child; 102 | if (step instanceof Wildcard) { 103 | if (step.hasAnnotations()) { 104 | child = annotatedSplats.computeIfAbsent(step.getAnnotations(), a -> new PathTreeNode()); 105 | } else { 106 | if (wildcard == null) { 107 | wildcard = new PathTreeNode(); 108 | } 109 | child = wildcard; 110 | } 111 | } else { 112 | if (step.hasAnnotations()) { 113 | // this is not too bad to do, but it takes care to do without impacting the non-annotated case 114 | // which is the majority of usage. one would also want to mind the principle to avoid multiple 115 | // distinct match paths for a given reader context and only allow either annotated or not 116 | // for a given field name or index ordinal. 117 | throw new UnsupportedPathExpression("Annotations are only supported on wildcards!"); 118 | } 119 | 120 | if (step instanceof Text) { 121 | String fieldName = caseInsensitiveFields 122 | ? ((Text) step).getFieldName().toLowerCase() 123 | : ((Text) step).getFieldName(); 124 | child = fields.computeIfAbsent(fieldName, f -> new PathTreeNode()); 125 | } else if (step instanceof Index) { 126 | child = indexes.computeIfAbsent(((Index) step).getOrdinal(), i -> new PathTreeNode()); 127 | } else { 128 | throw new IllegalArgumentException("step of unknown runtime type: " + step.getClass()); 129 | } 130 | } 131 | return child; 132 | } 133 | 134 | private void setCallback(final BiFunction callback) { 135 | if (this.callback == null) { 136 | this.callback = callback; 137 | } else { 138 | // this would actually be pretty simple to do: just create a ComposedCallback of BiFunctions. 139 | throw new UnsupportedPathExpression("Cannot set multiple callbacks for same path!"); 140 | } 141 | } 142 | 143 | private FsmMatcher buildMatcher() { 144 | List> matchers = new ArrayList<>(); 145 | if (wildcard != null) { 146 | matchers.add(new SplatMatcher<>(wildcard.buildMatcher(), callback)); 147 | } 148 | if (!annotatedSplats.isEmpty()) { 149 | List> children = new ArrayList<>(annotatedSplats.size()); 150 | List annotations = new ArrayList<>(annotatedSplats.size()); 151 | for (Map.Entry entry : annotatedSplats.entrySet()) { 152 | children.add(entry.getValue().buildMatcher()); 153 | annotations.add(entry.getKey().getAnnotations()); 154 | } 155 | matchers.add(new AnnotationsMatcher<>(annotations, children)); 156 | } 157 | if (!fields.isEmpty()) { 158 | Map> children = fields.entrySet().stream() 159 | .collect(Collectors.toMap(Map.Entry::getKey, (e) -> e.getValue().buildMatcher())); 160 | FsmMatcher fieldMatcher = caseInsensitiveFields 161 | ? new CaseInsensitiveFieldMatcher<>(children, callback) 162 | : new FieldMatcher<>(children, callback); 163 | matchers.add(fieldMatcher); 164 | } 165 | if (!indexes.isEmpty()) { 166 | Map> children = indexes.entrySet().stream() 167 | .collect(Collectors.toMap(Map.Entry::getKey, (e) -> e.getValue().buildMatcher())); 168 | matchers.add(new IndexMatcher<>(children, callback)); 169 | } 170 | 171 | if (matchers.isEmpty()) { 172 | return new TerminalMatcher<>(callback); 173 | } else if (matchers.size() == 1) { 174 | return matchers.get(0); 175 | } else { 176 | // the main issue with allowing more than one is that it means that any given match context 177 | // may produce multiple matches, and search path writers may become reliant on the order 178 | // in which callbacks for such cases are called. And in the general case, that might mean 179 | // a crazy mix between the different types of matching, which devolves to the for-each loop 180 | // we see in the PathExtractorImpl. 181 | // That seems like a lot of complexity for a usage pattern of questionable value. 182 | // So if you're reading this, and you think "oh this is a silly restriction", then take 183 | // the time to understand why it's important to the path writer and reconsider accordingly. 184 | throw new UnsupportedPathExpression( 185 | "Only one variant of wildcard, annotated wildcard, field names, or ordinals is supported!"); 186 | } 187 | } 188 | } 189 | 190 | private static class SplatMatcher extends FsmMatcher { 191 | FsmMatcher child; 192 | 193 | SplatMatcher( 194 | final FsmMatcher child, 195 | final BiFunction callback) { 196 | this.child = child; 197 | this.callback = callback; 198 | } 199 | 200 | @Override 201 | FsmMatcher transition(final String fieldName, final int position, final Supplier annotations) { 202 | return child; 203 | } 204 | } 205 | 206 | private static class FieldMatcher extends FsmMatcher { 207 | Map> fields; 208 | 209 | FieldMatcher( 210 | final Map> fields, 211 | final BiFunction callback) { 212 | this.fields = fields; 213 | this.callback = callback; 214 | } 215 | 216 | @Override 217 | Transitionable transitionsFrom(final IonType ionType) { 218 | if (ionType == IonType.STRUCT) { 219 | return Transitionable.POSSIBLE; 220 | } 221 | if (ionType == IonType.NULL) { 222 | return Transitionable.TERMINAL; 223 | } 224 | return Transitionable.MISTYPED; 225 | } 226 | 227 | @Override 228 | FsmMatcher transition(final String fieldName, final int position, final Supplier annotations) { 229 | return fields.get(fieldName); 230 | } 231 | } 232 | 233 | private static class CaseInsensitiveFieldMatcher extends FieldMatcher { 234 | CaseInsensitiveFieldMatcher( 235 | final Map> fields, 236 | final BiFunction callback) { 237 | super(fields, callback); 238 | } 239 | 240 | @Override 241 | FsmMatcher transition(final String fieldName, final int position, final Supplier annotations) { 242 | return fields.get(fieldName.toLowerCase()); 243 | } 244 | } 245 | 246 | private static class IndexMatcher extends FsmMatcher { 247 | Map> indexes; 248 | 249 | IndexMatcher( 250 | final Map> indexes, 251 | final BiFunction callback) { 252 | this.indexes = indexes; 253 | this.callback = callback; 254 | } 255 | 256 | @Override 257 | FsmMatcher transition(final String fieldName, final int position, final Supplier annotations) { 258 | return indexes.get(position); 259 | } 260 | } 261 | 262 | private static class TerminalMatcher extends FsmMatcher { 263 | TerminalMatcher(final BiFunction callback) { 264 | this.callback = callback; 265 | } 266 | 267 | @Override 268 | Transitionable transitionsFrom(final IonType ionType) { 269 | return Transitionable.TERMINAL; 270 | } 271 | 272 | @Override 273 | FsmMatcher transition(final String fieldName, final int position, final Supplier annotations) { 274 | return null; 275 | } 276 | } 277 | 278 | private static class AnnotationsMatcher extends FsmMatcher { 279 | List candidates; 280 | List> matchers; 281 | 282 | AnnotationsMatcher(final List candidates, final List> matchers) { 283 | this.candidates = candidates; 284 | this.matchers = matchers; 285 | } 286 | 287 | @Override 288 | FsmMatcher transition(final String fieldName, final int position, final Supplier annotations) { 289 | for (int i = 0; i < candidates.size(); i++) { 290 | if (Arrays.equals(candidates.get(i), annotations.get())) { 291 | return matchers.get(i); 292 | } 293 | } 294 | return null; 295 | } 296 | } 297 | } 298 | -------------------------------------------------------------------------------- /src/test/kotlin/com/amazon/ionpathextraction/PathExtractorTest.kt: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | * Licensed under the Apache License, Version 2.0 (the "License"). 4 | * You may not use this file except in compliance with the License. 5 | * A copy of the License is located at: 6 | * 7 | * http://aws.amazon.com/apache2.0/ 8 | * 9 | * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific 11 | * language governing permissions and limitations under the License. 12 | */ 13 | 14 | package com.amazon.ionpathextraction 15 | 16 | import com.amazon.ion.* 17 | import com.amazon.ion.system.* 18 | import com.amazon.ionpathextraction.exceptions.PathExtractionException 19 | import com.amazon.ionpathextraction.pathcomponents.PathComponent 20 | import org.junit.jupiter.api.Assertions.assertEquals 21 | import org.junit.jupiter.api.Assertions.assertNull 22 | import org.junit.jupiter.api.Test 23 | import org.junit.jupiter.api.assertAll 24 | import org.junit.jupiter.api.assertThrows 25 | import org.junit.jupiter.params.ParameterizedTest 26 | import org.junit.jupiter.params.provider.EnumSource 27 | import org.junit.jupiter.params.provider.MethodSource 28 | import org.junit.jupiter.params.provider.ValueSource 29 | import java.io.ByteArrayOutputStream 30 | import java.io.File 31 | import java.util.stream.Stream 32 | import kotlin.test.assertTrue 33 | 34 | abstract class PathExtractorTest { 35 | companion object { 36 | val ION: IonSystem = IonSystemBuilder.standard().build() 37 | 38 | data class TestCase(val searchPaths: List, 39 | val data: String, 40 | val expected: IonList, 41 | val stepOutNumber: Int, 42 | val hasMultipleTopLevelValues: Boolean, 43 | val legacyOnly: Boolean = false, 44 | val caseInsensitive: String = "None") { 45 | override fun toString(): String = "SearchPaths=$searchPaths, " + 46 | "Data=$data, " + 47 | "Expected=$expected, " + 48 | "StepOutN=$stepOutNumber" + 49 | "Legacy=$legacyOnly" + 50 | "CaseInsensitive=$caseInsensitive" 51 | } 52 | 53 | private fun IonValue.toText(): String { 54 | val out = StringBuilder() 55 | 56 | ION.newTextWriter(out).use { writer -> 57 | if (hasTypeAnnotation("${'$'}datagram") && this is IonContainer) { 58 | forEach { it -> it.writeTo(writer) } 59 | } else { 60 | this.writeTo(writer) 61 | } 62 | } 63 | 64 | return out.toString() 65 | } 66 | 67 | @JvmStatic 68 | fun testCases(): Stream = 69 | ION.loader.load(File("src/test/resources/test-cases.ion")) 70 | .map { it as IonStruct } 71 | .map { struct -> 72 | // single 73 | val searchPaths = if (struct.containsKey("searchPath")) { 74 | listOf(struct["searchPath"].toText()) 75 | } 76 | // multiple 77 | else { 78 | (struct["searchPaths"] as IonSequence).map { it.toText() } 79 | } 80 | 81 | TestCase( 82 | searchPaths, 83 | struct["data"].toText(), 84 | struct["expected"] as IonList, 85 | struct["stepOutN"]?.let { (it as IonInt).intValue() } ?: 0, 86 | struct["data"].hasTypeAnnotation("${'$'}datagram"), 87 | struct.hasTypeAnnotation("legacy"), 88 | struct.get("caseInsensitive")?.toText() ?: "None" 89 | ) 90 | }.stream() 91 | 92 | enum class API { 93 | MATCH { 94 | override fun match(extractor: PathExtractor, reader: IonReader, context: T?) { 95 | extractor.match(reader, context) 96 | } 97 | }, 98 | MATCH_CURRENT_VALUE { 99 | override fun match(extractor: PathExtractor, reader: IonReader, context: T?) { 100 | reader.next() 101 | extractor.matchCurrentValue(reader, context) 102 | } 103 | }; 104 | 105 | abstract fun match(extractor: PathExtractor, reader: IonReader, context: T? = null) 106 | } 107 | } 108 | 109 | abstract fun PathExtractorBuilder.buildExtractor(): PathExtractor 110 | 111 | private val emptyCallback: (IonReader) -> Int = { 0 } 112 | 113 | private fun collectToIonList(stepOutN: Int): (IonReader, IonList) -> Int = { reader, out -> 114 | ION.newWriter(out).use { it.writeValue(reader) } 115 | stepOutN 116 | } 117 | 118 | @ParameterizedTest 119 | @MethodSource("testCases") 120 | open fun testSearchPaths(testCase: TestCase) { 121 | 122 | val builder = PathExtractorBuilder.standard() 123 | 124 | testCase.searchPaths.forEach { builder.withSearchPath(it, collectToIonList(testCase.stepOutNumber)) } 125 | when (testCase.caseInsensitive) { 126 | "Both" -> builder.withMatchCaseInsensitive(true) 127 | "Fields" -> builder.withMatchFieldNamesCaseInsensitive(true) 128 | "None" -> Unit 129 | else -> throw IllegalArgumentException("Unexpected value for caseInsensitive: ${testCase.caseInsensitive}") 130 | } 131 | val extractor = builder.buildExtractor() 132 | 133 | val out = ION.newEmptyList() 134 | extractor.match(ION.newReader(testCase.data), out) 135 | 136 | assertEquals(testCase.expected, out, testCase.toString()) 137 | } 138 | 139 | @ParameterizedTest 140 | @MethodSource("testCases") 141 | open fun testSearchPathsMatchCurrentValue(testCase: TestCase) { 142 | if (testCase.hasMultipleTopLevelValues) { 143 | // For simplicity, skip tests with multiple top-level values. This will be tested via other test methods. 144 | return 145 | } 146 | val builder = PathExtractorBuilder.standard() 147 | 148 | testCase.searchPaths.forEach { builder.withSearchPath(it, collectToIonList(testCase.stepOutNumber)) } 149 | val extractor = builder.buildExtractor() 150 | 151 | val out = ION.newEmptyList() 152 | val reader = ION.newReader(testCase.data) 153 | reader.next() 154 | val depth = reader.depth 155 | extractor.matchCurrentValue(reader, out) 156 | 157 | assertEquals(depth, reader.depth) 158 | assertEquals(testCase.expected, out, testCase.toString()) 159 | } 160 | 161 | @ParameterizedTest 162 | @EnumSource(API::class) 163 | fun testCorrectCallbackCalled(api: API) { 164 | var timesCallback1Called = 0 165 | var timesCallback2Called = 0 166 | 167 | val extractor: PathExtractor = PathExtractorBuilder.standard() 168 | .withSearchPath("(foo)") { _ -> 169 | timesCallback1Called++ 170 | 0 171 | } 172 | .withSearchPath("(bar)") { _ -> 173 | timesCallback2Called++ 174 | 0 175 | } 176 | .buildExtractor() 177 | 178 | api.match(extractor, ION.newReader("{ bar: 1, bar: 2, foo: 3 }")) 179 | 180 | assertAll( 181 | { assertEquals(1, timesCallback1Called) }, 182 | { assertEquals(2, timesCallback2Called) } 183 | ) 184 | } 185 | 186 | @Test 187 | fun matchCurrentValueOnlyMatchesCurrentValue() { 188 | val extractor1 = PathExtractorBuilder.standard() 189 | .withSearchPath("(foo)", collectToIonList(0)) 190 | .buildExtractor() 191 | val extractor2 = PathExtractorBuilder.standard() 192 | .withSearchPath("(*)", collectToIonList(1)) 193 | .withMatchRelativePaths(true) 194 | .buildExtractor() 195 | 196 | val reader = ION.newReader("{foo: 123, foo: [456]} {bar: [42, 43, 44]} end") 197 | val out = ION.newEmptyList() 198 | assertEquals(IonType.STRUCT, reader.next()) 199 | extractor1.matchCurrentValue(reader, out) 200 | assertEquals(ION.singleValue("[123, [456]]"), out) 201 | assertEquals(IonType.STRUCT, reader.next()) 202 | reader.stepIn() 203 | assertEquals(IonType.LIST, reader.next()) 204 | assertEquals("bar", reader.fieldName) 205 | extractor2.matchCurrentValue(reader, out) 206 | assertEquals(ION.singleValue("[123, [456], 42]"), out) 207 | assertEquals(1, reader.depth) 208 | reader.stepOut() 209 | assertEquals(IonType.SYMBOL, reader.next()) 210 | assertEquals("end", reader.stringValue()) 211 | assertNull(reader.next()) 212 | } 213 | 214 | @Test 215 | fun matchCurrentValueWhenNotPositionedOnValueFails() { 216 | val extractor = PathExtractorBuilder.standard() 217 | .withSearchPath("(foo)") { _ -> 0 } 218 | .buildExtractor() 219 | 220 | val reader = ION.newReader("[{foo: 1}]") 221 | val exception = assertThrows { extractor.matchCurrentValue(reader) } 222 | assertEquals("reader must be positioned at a value; call IonReader.next() first.", exception.message) 223 | } 224 | 225 | @ParameterizedTest 226 | @EnumSource(API::class) 227 | fun readerAtInvalidDepth(api: API) { 228 | val extractor = PathExtractorBuilder.standard() 229 | .withSearchPath("(foo)") { _ -> 0 } 230 | .buildExtractor() 231 | 232 | val reader = ION.newReader("[{foo: 1}]") 233 | assertTrue(reader.next() != null) 234 | reader.stepIn() 235 | 236 | val exception = assertThrows { api.match(extractor, reader) } 237 | assertEquals("reader must be at depth zero, it was at: 1", exception.message) 238 | } 239 | 240 | @ParameterizedTest 241 | @EnumSource(API::class) 242 | fun matchRelative(api: API) { 243 | val extractor = PathExtractorBuilder.standard() 244 | .withMatchRelativePaths(true) 245 | .withSearchPath("(foo)", collectToIonList(0)) 246 | .buildExtractor() 247 | 248 | val reader = ION.newReader("[{foo: 1}]") 249 | assertTrue(reader.next() != null) 250 | reader.stepIn() 251 | 252 | val out = ION.newEmptyList() 253 | api.match(extractor, reader, out) 254 | 255 | assertEquals(ION.singleValue("[1]"), out) 256 | } 257 | 258 | @ParameterizedTest 259 | @EnumSource(API::class) 260 | fun stepOutMoreThanPermitted(api: API) { 261 | val extractor = PathExtractorBuilder.standard() 262 | .withSearchPath("(foo)") { _ -> 200 } 263 | .buildExtractor() 264 | 265 | val exception = assertThrows { 266 | api.match(extractor, ION.newReader("{foo: 1}")) 267 | } 268 | 269 | assertEquals("Callback return cannot be greater than the reader current relative depth. " + 270 | "return: 200, relative reader depth: 1", exception.message) 271 | } 272 | 273 | @ParameterizedTest 274 | @EnumSource(API::class) 275 | fun stepOutMoreThanPermittedWithRelative(api: API) { 276 | val extractor = PathExtractorBuilder.standard() 277 | .withMatchRelativePaths(true) 278 | // even though you could step out twice in reader you can't given the initial reader depth 279 | .withSearchPath("(bar)") { _ -> 2 } 280 | .buildExtractor() 281 | 282 | val newReader = ION.newReader("{foo: {bar: 1}}") 283 | newReader.next() 284 | newReader.stepIn() // positioned at the beginning of {bar: 1} 285 | 286 | val exception = assertThrows { 287 | api.match(extractor, newReader) 288 | } 289 | 290 | assertEquals("Callback return cannot be greater than the reader current relative depth. return: 2, " + 291 | "relative reader depth: 1", exception.message) 292 | } 293 | 294 | @ParameterizedTest 295 | @EnumSource(API::class) 296 | fun nestedSearchPaths(api: API) { 297 | // Test only that the correct callbacks were called as reading the value for (foo) 298 | // will advance the reader making (foo bar) not match 299 | 300 | val counter = mutableMapOf( 301 | "()" to 0, 302 | "(foo)" to 0, 303 | "(foo bar)" to 0 304 | ) 305 | 306 | val extractor = PathExtractorBuilder.standard().apply { 307 | counter.forEach { (sp, _) -> 308 | withSearchPath(sp) { _ -> 309 | counter[sp] = counter[sp]!! + 1 310 | 0 311 | } 312 | } 313 | }.buildExtractor() 314 | 315 | api.match(extractor, ION.newReader("{foo: {bar: 1}}")) 316 | 317 | assertEquals(3, counter.size) 318 | assertEquals(1, counter["()"]) 319 | assertEquals(1, counter["(foo)"]) 320 | assertEquals(1, counter["(foo bar)"]) 321 | } 322 | 323 | // Invalid configuration ----------------------------------------------------------------------------- 324 | 325 | @Test 326 | fun nullStringPath() { 327 | val exception = assertThrows { 328 | PathExtractorBuilder.standard().withSearchPath(null as String?, emptyCallback) 329 | } 330 | 331 | assertEquals("searchPathAsIon cannot be null", exception.message) 332 | } 333 | 334 | @Test 335 | fun nullListPath() { 336 | val exception = assertThrows { 337 | PathExtractorBuilder.standard().withSearchPath(null as List?, emptyCallback, emptyArray()) 338 | } 339 | 340 | assertEquals("pathComponents cannot be null", exception.message) 341 | } 342 | 343 | @Test 344 | fun nullCallback() { 345 | val exception = assertThrows { 346 | val callback: java.util.function.Function? = null 347 | 348 | PathExtractorBuilder.standard().withSearchPath("(foo)", callback) 349 | } 350 | 351 | assertEquals("callback cannot be null", exception.message) 352 | } 353 | 354 | @Test 355 | fun emptySearchPath() { 356 | val exception = assertThrows { 357 | PathExtractorBuilder.standard().withSearchPath("", emptyCallback) 358 | } 359 | 360 | assertEquals("ionPathExpression cannot be empty", exception.message) 361 | } 362 | 363 | @Test 364 | fun searchPathNotSequence() { 365 | val exception = assertThrows { 366 | PathExtractorBuilder.standard().withSearchPath("1", emptyCallback) 367 | } 368 | 369 | assertEquals("ionPathExpression must be a s-expression or list", exception.message) 370 | } 371 | 372 | private fun newReader(value: IonValue, isBinary: Boolean): IonReader { 373 | val baos = ByteArrayOutputStream() 374 | val ionWriter = if (isBinary) IonBinaryWriterBuilder.standard().build(baos) else IonTextWriterBuilder.standard().build(baos) 375 | value.writeTo(ionWriter) 376 | ionWriter.close() 377 | return IonReaderBuilder.standard().build(baos.toByteArray()) 378 | } 379 | 380 | @ParameterizedTest 381 | @ValueSource(strings = ["binary", "text"]) 382 | fun evaluateSecondPathOnTheSameValueAfterTheFirstPathMatches(encoding: String) { 383 | val value = ION.singleValue("{col1:\"foo\", col2:[{col21:\"bar\",col22:12}]}") as IonStruct 384 | val ionReader = newReader(value, encoding == "binary") 385 | val extractor = PathExtractorBuilder.standard().withSearchPath("(col2)") { ionReader1: IonReader? -> 386 | val actualData = ION.newValue(ionReader1) 387 | assertEquals(value["col2"], actualData) 388 | 0 389 | }.withSearchPath("(col1)") { _ -> 0 }.buildExtractor() 390 | extractor.match(ionReader) 391 | } 392 | } -------------------------------------------------------------------------------- /config/intellij/codestyle.xml: -------------------------------------------------------------------------------- 1 | 13 | 14 | 15 | 22 | 23 | 31 | 38 | 45 | 46 | 49 | 50 | 51 | 55 | 56 | 57 | 75 | 76 | 77 | 81 | 82 | 83 | 108 | 109 | 110 | 113 | 114 | 115 | 137 | 138 | 154 | 155 | 162 | 163 | 173 | 174 | 175 | 178 | 179 | 180 | 181 | 184 | 185 | 186 | 187 | 190 | 191 | 192 | 193 | 197 | 198 | 199 |
200 | 201 | 202 | 203 | xmlns:android 204 | 205 | ^$ 206 | 207 | 208 | 209 |
210 |
211 | 212 | 213 | 214 | xmlns:.* 215 | 216 | ^$ 217 | 218 | 219 | BY_NAME 220 | 221 |
222 |
223 | 224 | 225 | 226 | .*:id 227 | 228 | http://schemas.android.com/apk/res/android 229 | 230 | 231 | 232 |
233 |
234 | 235 | 236 | 237 | style 238 | 239 | ^$ 240 | 241 | 242 | 243 |
244 |
245 | 246 | 247 | 248 | .* 249 | 250 | ^$ 251 | 252 | 253 | BY_NAME 254 | 255 |
256 |
257 | 258 | 259 | 260 | .*:.*Style 261 | 262 | http://schemas.android.com/apk/res/android 263 | 264 | 265 | BY_NAME 266 | 267 |
268 |
269 | 270 | 271 | 272 | .*:layout_width 273 | 274 | http://schemas.android.com/apk/res/android 275 | 276 | 277 | 278 |
279 |
280 | 281 | 282 | 283 | .*:layout_height 284 | 285 | http://schemas.android.com/apk/res/android 286 | 287 | 288 | 289 |
290 |
291 | 292 | 293 | 294 | .*:layout_weight 295 | 296 | http://schemas.android.com/apk/res/android 297 | 298 | 299 | 300 |
301 |
302 | 303 | 304 | 305 | .*:layout_margin 306 | 307 | http://schemas.android.com/apk/res/android 308 | 309 | 310 | 311 |
312 |
313 | 314 | 315 | 316 | .*:layout_marginTop 317 | 318 | http://schemas.android.com/apk/res/android 319 | 320 | 321 | 322 |
323 |
324 | 325 | 326 | 327 | .*:layout_marginBottom 328 | 329 | http://schemas.android.com/apk/res/android 330 | 331 | 332 | 333 |
334 |
335 | 336 | 337 | 338 | .*:layout_marginStart 339 | 340 | http://schemas.android.com/apk/res/android 341 | 342 | 343 | 344 |
345 |
346 | 347 | 348 | 349 | .*:layout_marginEnd 350 | 351 | http://schemas.android.com/apk/res/android 352 | 353 | 354 | 355 |
356 |
357 | 358 | 359 | 360 | .*:layout_marginLeft 361 | 362 | http://schemas.android.com/apk/res/android 363 | 364 | 365 | 366 |
367 |
368 | 369 | 370 | 371 | .*:layout_marginRight 372 | 373 | http://schemas.android.com/apk/res/android 374 | 375 | 376 | 377 |
378 |
379 | 380 | 381 | 382 | .*:layout_.* 383 | 384 | http://schemas.android.com/apk/res/android 385 | 386 | 387 | BY_NAME 388 | 389 |
390 |
391 | 392 | 393 | 394 | .*:padding 395 | 396 | http://schemas.android.com/apk/res/android 397 | 398 | 399 | 400 |
401 |
402 | 403 | 404 | 405 | .*:paddingTop 406 | 407 | http://schemas.android.com/apk/res/android 408 | 409 | 410 | 411 |
412 |
413 | 414 | 415 | 416 | .*:paddingBottom 417 | 418 | http://schemas.android.com/apk/res/android 419 | 420 | 421 | 422 |
423 |
424 | 425 | 426 | 427 | .*:paddingStart 428 | 429 | http://schemas.android.com/apk/res/android 430 | 431 | 432 | 433 |
434 |
435 | 436 | 437 | 438 | .*:paddingEnd 439 | 440 | http://schemas.android.com/apk/res/android 441 | 442 | 443 | 444 |
445 |
446 | 447 | 448 | 449 | .*:paddingLeft 450 | 451 | http://schemas.android.com/apk/res/android 452 | 453 | 454 | 455 |
456 |
457 | 458 | 459 | 460 | .*:paddingRight 461 | 462 | http://schemas.android.com/apk/res/android 463 | 464 | 465 | 466 |
467 |
468 | 469 | 470 | 471 | .* 472 | http://schemas.android.com/apk/res/android 473 | 474 | 475 | BY_NAME 476 | 477 |
478 |
479 | 480 | 481 | 482 | .* 483 | http://schemas.android.com/apk/res-auto 484 | 485 | 486 | BY_NAME 487 | 488 |
489 |
490 | 491 | 492 | 493 | .* 494 | http://schemas.android.com/tools 495 | 496 | 497 | BY_NAME 498 | 499 |
500 |
501 | 502 | 503 | 504 | .* 505 | .* 506 | 507 | 508 | BY_NAME 509 | 510 |
511 |
512 |
513 |
514 | 515 | 522 |
--------------------------------------------------------------------------------