├── .gitignore ├── .mvn ├── maven.config ├── wrapper │ └── maven-wrapper.properties ├── extensions.xml └── jvm.config ├── catalog └── git.properties ├── Dockerfile ├── jreleaser.yml ├── .github ├── problem-matcher.json ├── dependabot.yml └── workflows │ ├── maven.yaml │ └── release.yaml ├── .project ├── docker-build.sh ├── src ├── main │ ├── resources │ │ └── sql │ │ │ ├── commit_stats.sql │ │ │ └── idents.sql │ └── java │ │ └── pl │ │ └── net │ │ └── was │ │ └── trino │ │ └── git │ │ ├── GitTransactionHandle.java │ │ ├── RecordCursorProvider.java │ │ ├── GitPlugin.java │ │ ├── GitConfig.java │ │ ├── GitRecordSetProvider.java │ │ ├── GitConnectorFactory.java │ │ ├── GitTable.java │ │ ├── GitColumn.java │ │ ├── GitSplit.java │ │ ├── GitConnector.java │ │ ├── GitColumnHandle.java │ │ ├── GitModule.java │ │ ├── GitTableHandle.java │ │ ├── BranchesRecordCursor.java │ │ ├── GitSplitManager.java │ │ ├── ObjectsRecordCursor.java │ │ ├── TagsRecordCursor.java │ │ ├── GitRecordSet.java │ │ ├── CommitsRecordCursor.java │ │ ├── TreesRecordCursor.java │ │ ├── GitClient.java │ │ ├── DiffStatsRecordCursor.java │ │ └── GitMetadata.java └── test │ └── java │ └── pl │ └── net │ └── was │ └── trino │ └── git │ ├── TestGitSplit.java │ ├── TestGitTableHandle.java │ ├── TestGitRecordSetProvider.java │ ├── GitQueryRunner.java │ ├── TestGitClient.java │ ├── TestGitMetadata.java │ └── TestGitRecordSet.java ├── README.md ├── pom.xml ├── LICENSE ├── mvnw └── examples └── achievements.sql /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | 3 | target 4 | .idea 5 | *.iml 6 | out 7 | -------------------------------------------------------------------------------- /.mvn/maven.config: -------------------------------------------------------------------------------- 1 | --strict-checksums 2 | -b 3 | smart 4 | -T2C 5 | -------------------------------------------------------------------------------- /catalog/git.properties: -------------------------------------------------------------------------------- 1 | connector.name=git 2 | metadata-uri=${ENV:REPO_URL} 3 | -------------------------------------------------------------------------------- /.mvn/wrapper/maven-wrapper.properties: -------------------------------------------------------------------------------- 1 | wrapperVersion=3.3.4 2 | distributionType=only-script 3 | distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.9.8/apache-maven-3.9.8-bin.zip 4 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | ARG TRINO_VERSION 2 | FROM trinodb/trino-core:$TRINO_VERSION 3 | 4 | ARG VERSION 5 | 6 | ADD target/trino-git-$VERSION/ /usr/lib/trino/plugin/git/ 7 | ADD catalog/git.properties /etc/trino/catalog/git.properties 8 | -------------------------------------------------------------------------------- /.mvn/extensions.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | io.takari.maven 5 | takari-smart-builder 6 | 1.1.0 7 | 8 | 9 | -------------------------------------------------------------------------------- /jreleaser.yml: -------------------------------------------------------------------------------- 1 | project: 2 | name: trino-git 3 | description: This is a Trino connector to access git repos 4 | license: Apache-2 5 | java: 6 | groupId: pl.net.was 7 | version: 11 8 | authors: 9 | - Jan Waś 10 | extraProperties: 11 | inceptionYear: 2021 12 | 13 | files: 14 | artifacts: 15 | - path: "target/{{projectName}}-{{projectVersion}}.zip" 16 | 17 | release: 18 | github: 19 | owner: nineinchnick 20 | -------------------------------------------------------------------------------- /.github/problem-matcher.json: -------------------------------------------------------------------------------- 1 | { 2 | "problemMatcher": [ 3 | { 4 | "owner": "maven", 5 | "pattern": [ 6 | { 7 | "regexp": "^.*\\[(ERROR|WARN(?:ING)?)\\]\\s+(.*):\\[(\\d+),(\\d+)\\] (?:error: )?[\\[\\(](.*)[\\]\\)] (.*)$", 8 | "severity": 1, 9 | "file": 2, 10 | "line": 3, 11 | "column": 4, 12 | "message": 6, 13 | "code": 5 14 | } 15 | ] 16 | } 17 | ] 18 | } 19 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "maven" 4 | directory: "/" 5 | schedule: 6 | interval: "weekly" 7 | groups: 8 | dependency-updates: 9 | applies-to: version-updates 10 | update-types: 11 | - major 12 | - minor 13 | - patch 14 | security-updates: 15 | applies-to: security-updates 16 | dependency-type: production 17 | - package-ecosystem: "github-actions" 18 | directory: "/" 19 | schedule: 20 | interval: "weekly" 21 | -------------------------------------------------------------------------------- /.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | trino-git 4 | 5 | 6 | 7 | 8 | 9 | org.eclipse.jdt.core.javabuilder 10 | 11 | 12 | 13 | 14 | org.eclipse.m2e.core.maven2Builder 15 | 16 | 17 | 18 | 19 | 20 | org.eclipse.jdt.core.javanature 21 | org.eclipse.m2e.core.maven2Nature 22 | 23 | 24 | -------------------------------------------------------------------------------- /docker-build.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -euo pipefail 4 | set -x 5 | 6 | if [ -f release.properties ]; then 7 | VERSION=$(grep 'project.rel.pl.net.was\\:trino-git=' release.properties | cut -d'=' -f2) 8 | else 9 | VERSION=$(mvn help:evaluate -Dexpression=project.version -q -DforceStdout) 10 | fi 11 | TRINO_VERSION=$(mvn help:evaluate -Dexpression=dep.trino.version -q -DforceStdout) 12 | TAG=nineinchnick/trino-git:$VERSION 13 | 14 | docker buildx build \ 15 | --platform linux/amd64,linux/arm64 \ 16 | -t "$TAG" \ 17 | --build-arg TRINO_VERSION="$TRINO_VERSION" \ 18 | --build-arg VERSION="$VERSION" \ 19 | --push \ 20 | . 21 | -------------------------------------------------------------------------------- /.mvn/jvm.config: -------------------------------------------------------------------------------- 1 | -Xmx8192m 2 | --add-exports jdk.compiler/com.sun.tools.javac.api=ALL-UNNAMED 3 | --add-exports jdk.compiler/com.sun.tools.javac.file=ALL-UNNAMED 4 | --add-exports jdk.compiler/com.sun.tools.javac.main=ALL-UNNAMED 5 | --add-exports jdk.compiler/com.sun.tools.javac.model=ALL-UNNAMED 6 | --add-exports jdk.compiler/com.sun.tools.javac.parser=ALL-UNNAMED 7 | --add-exports jdk.compiler/com.sun.tools.javac.processing=ALL-UNNAMED 8 | --add-exports jdk.compiler/com.sun.tools.javac.tree=ALL-UNNAMED 9 | --add-exports jdk.compiler/com.sun.tools.javac.util=ALL-UNNAMED 10 | --add-opens jdk.compiler/com.sun.tools.javac.code=ALL-UNNAMED 11 | --add-opens jdk.compiler/com.sun.tools.javac.comp=ALL-UNNAMED 12 | --enable-native-access=ALL-UNNAMED 13 | -------------------------------------------------------------------------------- /src/main/resources/sql/commit_stats.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | c.object_id, 3 | c.author_name, 4 | c.author_email, 5 | c.committer_name, 6 | c.committer_email, 7 | c.message, 8 | c.parents, 9 | c.tree_id, 10 | c.commit_time, 11 | sum(s.added_lines) AS added_lines, 12 | sum(s.deleted_lines) AS deleted_lines, 13 | count(s.commit_id) AS changed_files, 14 | avg(s.similarity_score) AS similarity_score, 15 | array_agg(s.change_type) AS change_types 16 | FROM 17 | commits c 18 | JOIN diff_stats s ON 19 | s.commit_id = c.object_id 20 | GROUP BY 21 | c.object_id, 22 | c.author_email, 23 | c.author_name, 24 | c.committer_email, 25 | c.committer_name, 26 | c.message, 27 | c.parents, 28 | c.tree_id, 29 | c.commit_time -------------------------------------------------------------------------------- /src/main/java/pl/net/was/trino/git/GitTransactionHandle.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package pl.net.was.trino.git; 15 | 16 | import io.trino.spi.connector.ConnectorTransactionHandle; 17 | 18 | public enum GitTransactionHandle 19 | implements ConnectorTransactionHandle 20 | { 21 | INSTANCE 22 | } 23 | -------------------------------------------------------------------------------- /src/main/java/pl/net/was/trino/git/RecordCursorProvider.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package pl.net.was.trino.git; 15 | 16 | import io.trino.spi.connector.RecordCursor; 17 | import org.eclipse.jgit.api.Git; 18 | 19 | import java.util.List; 20 | import java.util.Optional; 21 | 22 | public interface RecordCursorProvider 23 | { 24 | RecordCursor create(List columnHandles, Git repo, Optional> commitIds); 25 | } 26 | -------------------------------------------------------------------------------- /src/main/java/pl/net/was/trino/git/GitPlugin.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package pl.net.was.trino.git; 15 | 16 | import io.trino.spi.Plugin; 17 | import io.trino.spi.connector.ConnectorFactory; 18 | 19 | import java.util.List; 20 | 21 | public class GitPlugin 22 | implements Plugin 23 | { 24 | @Override 25 | public Iterable getConnectorFactories() 26 | { 27 | return List.of(new GitConnectorFactory()); 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /src/main/java/pl/net/was/trino/git/GitConfig.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package pl.net.was.trino.git; 15 | 16 | import io.airlift.configuration.Config; 17 | import jakarta.validation.constraints.NotNull; 18 | 19 | import java.net.URI; 20 | 21 | public class GitConfig 22 | { 23 | private URI uri; 24 | 25 | @NotNull 26 | public URI getUri() 27 | { 28 | return uri; 29 | } 30 | 31 | @Config("metadata-uri") 32 | public GitConfig setUri(URI uri) 33 | { 34 | this.uri = uri; 35 | return this; 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /.github/workflows/maven.yaml: -------------------------------------------------------------------------------- 1 | # This workflow will build a Java project with Maven 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/building-and-testing-java-with-maven 3 | 4 | name: Java CI with Maven 5 | 6 | on: 7 | pull_request: 8 | branches: [main] 9 | 10 | jobs: 11 | build: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v6 15 | - name: Set up the JDK 16 | uses: actions/setup-java@v5 17 | with: 18 | java-version: '25' 19 | distribution: 'temurin' 20 | server-id: github 21 | cache: 'maven' 22 | - name: Configure Problem Matchers 23 | run: | 24 | echo "::add-matcher::.github/problem-matcher.json" 25 | echo "::remove-matcher owner=java::" 26 | 27 | - name: Build with Maven 28 | run: ./mvnw -B package 29 | 30 | - name: Annotate run 31 | uses: trinodb/github-actions/action-surefire-report@b63800bedfbc7ab1ff2e5fe7eaecf5ab82ce6a70 32 | if: always() 33 | with: 34 | fail_if_no_tests: false 35 | skip_publishing: true 36 | -------------------------------------------------------------------------------- /src/main/resources/sql/idents.sql: -------------------------------------------------------------------------------- 1 | -- This can produce too many stages, see the queries in the examples dir 2 | -- on how to break it down using temporary tables 3 | WITH RECURSIVE 4 | nodes (email, name) AS ( 5 | SELECT DISTINCT author_email, author_name 6 | FROM commits 7 | UNION 8 | SELECT DISTINCT committer_email, committer_name 9 | FROM commits 10 | ), 11 | edges (name1, name2) AS ( 12 | SELECT n1.name, n2.name 13 | FROM nodes n1 14 | INNER JOIN nodes n2 USING (email) 15 | ), 16 | walk (name1, name2, visited) AS ( 17 | SELECT name1, name2, ARRAY[name1] 18 | FROM edges 19 | WHERE name1 = name2 20 | UNION ALL 21 | SELECT w.name1, e.name2, w.visited || e.name2 22 | FROM walk w 23 | INNER JOIN edges e ON e.name1 = w.name2 24 | WHERE NOT contains(w.visited, e.name2) 25 | ), 26 | result (name1, name2s) AS ( 27 | SELECT name1, array_agg(DISTINCT name2 ORDER BY name2) 28 | FROM walk 29 | GROUP BY name1 30 | ), 31 | grouped (names, emails) AS ( 32 | SELECT 33 | array_agg(DISTINCT n.name ORDER BY n.name) AS names, 34 | array_agg(DISTINCT n.email ORDER BY n.email) AS emails 35 | FROM result r 36 | INNER JOIN nodes n ON n.name = r.name1 37 | GROUP BY r.name2s; 38 | ) 39 | SELECT 40 | emails[1] AS email, 41 | names[1] AS name, 42 | slice(emails, 2, cardinality(emails)) AS extra_emails, 43 | slice(names, 2, cardinality(emails)) AS extra_names 44 | FROM grouped 45 | ORDER BY name, names 46 | -------------------------------------------------------------------------------- /src/main/java/pl/net/was/trino/git/GitRecordSetProvider.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package pl.net.was.trino.git; 15 | 16 | import com.google.common.collect.ImmutableList; 17 | import io.trino.spi.connector.ColumnHandle; 18 | import io.trino.spi.connector.ConnectorRecordSetProvider; 19 | import io.trino.spi.connector.ConnectorSession; 20 | import io.trino.spi.connector.ConnectorSplit; 21 | import io.trino.spi.connector.ConnectorTableHandle; 22 | import io.trino.spi.connector.ConnectorTransactionHandle; 23 | import io.trino.spi.connector.RecordSet; 24 | 25 | import java.util.List; 26 | 27 | public class GitRecordSetProvider 28 | implements ConnectorRecordSetProvider 29 | { 30 | @Override 31 | public RecordSet getRecordSet( 32 | ConnectorTransactionHandle transaction, 33 | ConnectorSession session, 34 | ConnectorSplit split, 35 | ConnectorTableHandle table, 36 | List columns) 37 | { 38 | GitSplit gitSplit = (GitSplit) split; 39 | 40 | ImmutableList.Builder handles = ImmutableList.builder(); 41 | for (ColumnHandle handle : columns) { 42 | handles.add((GitColumnHandle) handle); 43 | } 44 | 45 | return new GitRecordSet(gitSplit, (GitTableHandle) table, handles.build()); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/test/java/pl/net/was/trino/git/TestGitSplit.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package pl.net.was.trino.git; 15 | 16 | import io.airlift.json.JsonCodec; 17 | import org.junit.jupiter.api.Test; 18 | 19 | import java.net.URI; 20 | import java.net.URISyntaxException; 21 | import java.util.Optional; 22 | 23 | import static io.airlift.json.JsonCodec.jsonCodec; 24 | import static org.assertj.core.api.Assertions.assertThat; 25 | 26 | public class TestGitSplit 27 | { 28 | private final GitSplit split = new GitSplit("tableName", new URI("url.invalid"), Optional.empty()); 29 | 30 | public TestGitSplit() 31 | throws URISyntaxException 32 | {} 33 | 34 | @Test 35 | public void testAddresses() 36 | throws URISyntaxException 37 | { 38 | URI testURI = new URI("url.invalid"); 39 | GitSplit httpSplit = new GitSplit("tableName", testURI, Optional.empty()); 40 | assertThat(httpSplit.isRemotelyAccessible()).isTrue(); 41 | } 42 | 43 | @Test 44 | public void testJsonRoundTrip() 45 | { 46 | JsonCodec codec = jsonCodec(GitSplit.class); 47 | String json = codec.toJson(split); 48 | GitSplit copy = codec.fromJson(json); 49 | assertThat(copy.getTableName()).isEqualTo(split.getTableName()); 50 | 51 | assertThat(copy.isRemotelyAccessible()).isTrue(); 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /src/main/java/pl/net/was/trino/git/GitConnectorFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package pl.net.was.trino.git; 15 | 16 | import com.google.inject.Injector; 17 | import io.airlift.bootstrap.Bootstrap; 18 | import io.airlift.json.JsonModule; 19 | import io.trino.spi.connector.Connector; 20 | import io.trino.spi.connector.ConnectorContext; 21 | import io.trino.spi.connector.ConnectorFactory; 22 | 23 | import java.util.Map; 24 | 25 | import static com.google.common.base.Throwables.throwIfUnchecked; 26 | import static java.util.Objects.requireNonNull; 27 | 28 | public class GitConnectorFactory 29 | implements ConnectorFactory 30 | { 31 | @Override 32 | public String getName() 33 | { 34 | return "git"; 35 | } 36 | 37 | @Override 38 | public Connector create(String catalogName, Map requiredConfig, ConnectorContext context) 39 | { 40 | requireNonNull(requiredConfig, "requiredConfig is null"); 41 | try { 42 | // A plugin is not required to use Guice; it is just very convenient 43 | Bootstrap app = new Bootstrap( 44 | new JsonModule(), 45 | new GitModule(catalogName, context.getTypeManager())); 46 | 47 | Injector injector = app 48 | .doNotInitializeLogging() 49 | .setRequiredConfigurationProperties(requiredConfig) 50 | .initialize(); 51 | return injector.getInstance(GitConnector.class); 52 | } 53 | catch (Exception e) { 54 | throwIfUnchecked(e); 55 | throw new RuntimeException(e); 56 | } 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /.github/workflows/release.yaml: -------------------------------------------------------------------------------- 1 | name: Release with Maven 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | 7 | jobs: 8 | release: 9 | runs-on: ubuntu-latest 10 | if: "!contains(github.event.head_commit.message, '[ci skip]')" 11 | permissions: 12 | contents: write 13 | packages: write 14 | steps: 15 | - uses: actions/checkout@v6 16 | with: 17 | fetch-depth: 0 18 | - uses: actions/setup-java@v5 19 | with: 20 | java-version: '25' 21 | distribution: 'temurin' 22 | cache: 'maven' 23 | - name: Configure Problem Matchers 24 | run: | 25 | echo "::add-matcher::.github/problem-matcher.json" 26 | echo "::remove-matcher owner=java::" 27 | - name: Configure Git user 28 | run: | 29 | git config user.name "${{ github.event.head_commit.committer.name }}" 30 | git config user.email "${{ github.event.head_commit.committer.email }}" 31 | 32 | - name: Set up Docker Buildx 33 | uses: docker/setup-buildx-action@v3 34 | - name: Login to Docker Hub 35 | uses: docker/login-action@v3 36 | with: 37 | username: ${{ secrets.DOCKERHUB_USERNAME }} 38 | password: ${{ secrets.DOCKERHUB_TOKEN }} 39 | 40 | - name: Prepare release 41 | run: ./mvnw -B release:prepare 42 | 43 | - name: Build and push Docker image 44 | run: | 45 | ./docker-build.sh 46 | 47 | - name: Save version number in env 48 | run: | 49 | echo "VERSION=$(grep 'project.rel.pl.net.was\\:trino-git=' release.properties | cut -d'=' -f2)" >> $GITHUB_ENV 50 | 51 | - name: Publish JAR 52 | run: ./mvnw -B release:perform -Darguments="-Dgpg.skip -Dmaven.deploy.skip=true" 53 | env: 54 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 55 | 56 | - name: Run JReleaser 57 | uses: jreleaser/release-action@v2 58 | env: 59 | JRELEASER_PROJECT_VERSION: ${{ env.VERSION }} 60 | JRELEASER_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 61 | with: 62 | setup-java: false 63 | 64 | - name: Annotate run 65 | uses: trinodb/github-actions/action-surefire-report@b63800bedfbc7ab1ff2e5fe7eaecf5ab82ce6a70 66 | if: always() 67 | with: 68 | fail_if_no_tests: false 69 | skip_publishing: true 70 | -------------------------------------------------------------------------------- /src/main/java/pl/net/was/trino/git/GitTable.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package pl.net.was.trino.git; 15 | 16 | import com.fasterxml.jackson.annotation.JsonCreator; 17 | import com.fasterxml.jackson.annotation.JsonProperty; 18 | import com.google.common.collect.ImmutableList; 19 | import io.trino.spi.connector.ColumnMetadata; 20 | 21 | import java.util.List; 22 | 23 | import static com.google.common.base.Preconditions.checkArgument; 24 | import static com.google.common.base.Strings.isNullOrEmpty; 25 | import static java.util.Objects.requireNonNull; 26 | 27 | public class GitTable 28 | { 29 | private final String name; 30 | private final List columns; 31 | private final List columnsMetadata; 32 | 33 | @JsonCreator 34 | public GitTable( 35 | @JsonProperty("name") String name, 36 | @JsonProperty("columns") List columns) 37 | { 38 | checkArgument(!isNullOrEmpty(name), "name is null or is empty"); 39 | this.name = requireNonNull(name, "name is null"); 40 | this.columns = List.copyOf(requireNonNull(columns, "columns is null")); 41 | 42 | ImmutableList.Builder columnsMetadata = ImmutableList.builder(); 43 | for (GitColumn column : this.columns) { 44 | columnsMetadata.add(new ColumnMetadata(column.getName(), column.getType())); 45 | } 46 | this.columnsMetadata = columnsMetadata.build(); 47 | } 48 | 49 | @JsonProperty 50 | public String getName() 51 | { 52 | return name; 53 | } 54 | 55 | @JsonProperty 56 | public List getColumns() 57 | { 58 | return columns; 59 | } 60 | 61 | public List getColumnsMetadata() 62 | { 63 | return columnsMetadata; 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /src/main/java/pl/net/was/trino/git/GitColumn.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package pl.net.was.trino.git; 15 | 16 | import com.fasterxml.jackson.annotation.JsonCreator; 17 | import com.fasterxml.jackson.annotation.JsonProperty; 18 | import io.trino.spi.type.Type; 19 | 20 | import java.util.Objects; 21 | 22 | import static com.google.common.base.Preconditions.checkArgument; 23 | import static com.google.common.base.Strings.isNullOrEmpty; 24 | import static java.util.Objects.requireNonNull; 25 | 26 | public final class GitColumn 27 | { 28 | private final String name; 29 | private final Type type; 30 | 31 | @JsonCreator 32 | public GitColumn( 33 | @JsonProperty("name") String name, 34 | @JsonProperty("type") Type type) 35 | { 36 | checkArgument(!isNullOrEmpty(name), "name is null or is empty"); 37 | this.name = name; 38 | this.type = requireNonNull(type, "type is null"); 39 | } 40 | 41 | @JsonProperty 42 | public String getName() 43 | { 44 | return name; 45 | } 46 | 47 | @JsonProperty 48 | public Type getType() 49 | { 50 | return type; 51 | } 52 | 53 | @Override 54 | public int hashCode() 55 | { 56 | return Objects.hash(name, type); 57 | } 58 | 59 | @Override 60 | public boolean equals(Object obj) 61 | { 62 | if (this == obj) { 63 | return true; 64 | } 65 | if (obj == null || getClass() != obj.getClass()) { 66 | return false; 67 | } 68 | 69 | GitColumn other = (GitColumn) obj; 70 | return Objects.equals(this.name, other.name) && 71 | Objects.equals(this.type, other.type); 72 | } 73 | 74 | @Override 75 | public String toString() 76 | { 77 | return name + ":" + type; 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /src/test/java/pl/net/was/trino/git/TestGitTableHandle.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package pl.net.was.trino.git; 15 | 16 | import io.airlift.json.JsonCodec; 17 | import io.airlift.testing.EquivalenceTester; 18 | import org.junit.jupiter.api.Test; 19 | 20 | import java.util.Optional; 21 | import java.util.OptionalLong; 22 | 23 | import static io.airlift.json.JsonCodec.jsonCodec; 24 | import static org.assertj.core.api.Assertions.assertThat; 25 | 26 | public class TestGitTableHandle 27 | { 28 | private final GitTableHandle tableHandle = new GitTableHandle("schemaName", "tableName", Optional.empty(), OptionalLong.empty()); 29 | 30 | @Test 31 | public void testJsonRoundTrip() 32 | { 33 | JsonCodec codec = jsonCodec(GitTableHandle.class); 34 | String json = codec.toJson(tableHandle); 35 | GitTableHandle copy = codec.fromJson(json); 36 | assertThat(copy).isEqualTo(tableHandle); 37 | } 38 | 39 | @Test 40 | public void testEquivalence() 41 | { 42 | EquivalenceTester.equivalenceTester() 43 | .addEquivalentGroup( 44 | new GitTableHandle("schema", "table", Optional.empty(), OptionalLong.empty()), 45 | new GitTableHandle("schema", "table", Optional.empty(), OptionalLong.empty())) 46 | .addEquivalentGroup( 47 | new GitTableHandle("schemaX", "table", Optional.empty(), OptionalLong.empty()), 48 | new GitTableHandle("schemaX", "table", Optional.empty(), OptionalLong.empty())) 49 | .addEquivalentGroup( 50 | new GitTableHandle("schema", "tableX", Optional.empty(), OptionalLong.empty()), 51 | new GitTableHandle("schema", "tableX", Optional.empty(), OptionalLong.empty())) 52 | .check(); 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /src/main/java/pl/net/was/trino/git/GitSplit.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package pl.net.was.trino.git; 15 | 16 | import com.fasterxml.jackson.annotation.JsonCreator; 17 | import com.fasterxml.jackson.annotation.JsonProperty; 18 | import io.trino.spi.HostAddress; 19 | import io.trino.spi.connector.ConnectorSplit; 20 | 21 | import java.net.URI; 22 | import java.util.List; 23 | import java.util.Optional; 24 | 25 | import static java.util.Objects.requireNonNull; 26 | 27 | public class GitSplit 28 | implements ConnectorSplit 29 | { 30 | // split needs to track the URI from config to use it in RecordSet 31 | private final URI uri; 32 | // split needs to track for which table it was created for to use it in RecordSetProvider 33 | private final String tableName; 34 | private final Optional> commitIds; 35 | 36 | @JsonCreator 37 | public GitSplit( 38 | @JsonProperty("tableName") String tableName, 39 | @JsonProperty("uri") URI uri, 40 | @JsonProperty("commitIds") Optional> commitIds) 41 | { 42 | this.tableName = requireNonNull(tableName, "table name is null"); 43 | this.uri = requireNonNull(uri, "uri is null"); 44 | this.commitIds = requireNonNull(commitIds, "commitIds is null"); 45 | } 46 | 47 | @JsonProperty 48 | public String getTableName() 49 | { 50 | return tableName; 51 | } 52 | 53 | @JsonProperty 54 | public URI getUri() 55 | { 56 | return uri; 57 | } 58 | 59 | @JsonProperty 60 | public Optional> getCommitIds() 61 | { 62 | return commitIds; 63 | } 64 | 65 | @Override 66 | public boolean isRemotelyAccessible() 67 | { 68 | // only http or https is remotely accessible 69 | return true; 70 | } 71 | 72 | @Override 73 | public List getAddresses() 74 | { 75 | return List.of(); 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /src/main/java/pl/net/was/trino/git/GitConnector.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package pl.net.was.trino.git; 15 | 16 | import com.google.inject.Inject; 17 | import io.airlift.bootstrap.LifeCycleManager; 18 | import io.trino.spi.connector.Connector; 19 | import io.trino.spi.connector.ConnectorMetadata; 20 | import io.trino.spi.connector.ConnectorRecordSetProvider; 21 | import io.trino.spi.connector.ConnectorSession; 22 | import io.trino.spi.connector.ConnectorSplitManager; 23 | import io.trino.spi.connector.ConnectorTransactionHandle; 24 | import io.trino.spi.transaction.IsolationLevel; 25 | 26 | import static java.util.Objects.requireNonNull; 27 | import static pl.net.was.trino.git.GitTransactionHandle.INSTANCE; 28 | 29 | public class GitConnector 30 | implements Connector 31 | { 32 | private final LifeCycleManager lifeCycleManager; 33 | private final GitMetadata metadata; 34 | private final GitSplitManager splitManager; 35 | private final GitRecordSetProvider recordSetProvider; 36 | 37 | @Inject 38 | public GitConnector( 39 | LifeCycleManager lifeCycleManager, 40 | GitMetadata metadata, 41 | GitSplitManager splitManager, 42 | GitRecordSetProvider recordSetProvider) 43 | { 44 | this.lifeCycleManager = requireNonNull(lifeCycleManager, "lifeCycleManager is null"); 45 | this.metadata = requireNonNull(metadata, "metadata is null"); 46 | this.splitManager = requireNonNull(splitManager, "splitManager is null"); 47 | this.recordSetProvider = requireNonNull(recordSetProvider, "recordSetProvider is null"); 48 | } 49 | 50 | @Override 51 | public ConnectorTransactionHandle beginTransaction(IsolationLevel isolationLevel, boolean readOnly, boolean autoCommit) 52 | { 53 | return INSTANCE; 54 | } 55 | 56 | @Override 57 | public ConnectorMetadata getMetadata(ConnectorSession session, ConnectorTransactionHandle transactionHandle) 58 | { 59 | return metadata; 60 | } 61 | 62 | @Override 63 | public ConnectorSplitManager getSplitManager() 64 | { 65 | return splitManager; 66 | } 67 | 68 | @Override 69 | public ConnectorRecordSetProvider getRecordSetProvider() 70 | { 71 | return recordSetProvider; 72 | } 73 | 74 | @Override 75 | public final void shutdown() 76 | { 77 | lifeCycleManager.stop(); 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /src/test/java/pl/net/was/trino/git/TestGitRecordSetProvider.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package pl.net.was.trino.git; 15 | 16 | import com.google.common.collect.ImmutableMap; 17 | import io.trino.spi.connector.RecordCursor; 18 | import io.trino.spi.connector.RecordSet; 19 | import org.eclipse.jgit.api.errors.GitAPIException; 20 | import org.junit.jupiter.api.BeforeAll; 21 | import org.junit.jupiter.api.Test; 22 | 23 | import java.io.IOException; 24 | import java.net.URI; 25 | import java.util.LinkedHashMap; 26 | import java.util.List; 27 | import java.util.Map; 28 | import java.util.Optional; 29 | import java.util.OptionalLong; 30 | 31 | import static io.trino.spi.type.VarcharType.createUnboundedVarcharType; 32 | import static io.trino.testing.TestingConnectorSession.SESSION; 33 | import static org.assertj.core.api.Assertions.assertThat; 34 | 35 | public class TestGitRecordSetProvider 36 | { 37 | private static final URI uri = URI.create("fake.invalid"); 38 | 39 | @BeforeAll 40 | public static void setUp() 41 | throws IOException, GitAPIException 42 | { 43 | TestGitClient.setupRepo(uri); 44 | } 45 | 46 | @Test 47 | public void testGetRecordSet() 48 | { 49 | GitRecordSetProvider recordSetProvider = new GitRecordSetProvider(); 50 | RecordSet recordSet = recordSetProvider.getRecordSet( 51 | GitTransactionHandle.INSTANCE, 52 | SESSION, 53 | new GitSplit("commits", uri, Optional.empty()), 54 | new GitTableHandle("default", "commits", Optional.empty(), OptionalLong.empty()), 55 | List.of( 56 | new GitColumnHandle("object_id", createUnboundedVarcharType(), 0), 57 | new GitColumnHandle("author_name", createUnboundedVarcharType(), 1))); 58 | assertThat(recordSet).isNotNull(); 59 | 60 | RecordCursor cursor = recordSet.cursor(); 61 | assertThat(cursor).isNotNull(); 62 | 63 | Map data = new LinkedHashMap<>(); 64 | while (cursor.advanceNextPosition()) { 65 | data.put(cursor.getSlice(0).toStringUtf8(), cursor.getSlice(1).toStringUtf8()); 66 | } 67 | assertThat(data).isEqualTo(ImmutableMap.builder() 68 | .put("080dfdf0aac7d302dc31d57f62942bb6533944f7", "test") 69 | .put("c3b14e59f88d0d6597b98ee93cf61e7556d540a4", "test") 70 | .build()); 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /src/main/java/pl/net/was/trino/git/GitColumnHandle.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package pl.net.was.trino.git; 15 | 16 | import com.fasterxml.jackson.annotation.JsonCreator; 17 | import com.fasterxml.jackson.annotation.JsonProperty; 18 | import io.trino.spi.connector.ColumnHandle; 19 | import io.trino.spi.connector.ColumnMetadata; 20 | import io.trino.spi.type.Type; 21 | 22 | import static com.google.common.base.MoreObjects.toStringHelper; 23 | import static java.util.Objects.requireNonNull; 24 | 25 | public final class GitColumnHandle 26 | implements ColumnHandle 27 | { 28 | private final String columnName; 29 | private final Type columnType; 30 | private final int ordinalPosition; 31 | 32 | @JsonCreator 33 | public GitColumnHandle( 34 | @JsonProperty("columnName") String columnName, 35 | @JsonProperty("columnType") Type columnType, 36 | @JsonProperty("ordinalPosition") int ordinalPosition) 37 | { 38 | this.columnName = requireNonNull(columnName, "columnName is null"); 39 | this.columnType = requireNonNull(columnType, "columnType is null"); 40 | this.ordinalPosition = ordinalPosition; 41 | } 42 | 43 | @JsonProperty 44 | public String getColumnName() 45 | { 46 | return columnName; 47 | } 48 | 49 | @JsonProperty 50 | public Type getColumnType() 51 | { 52 | return columnType; 53 | } 54 | 55 | @JsonProperty 56 | public int getOrdinalPosition() 57 | { 58 | return ordinalPosition; 59 | } 60 | 61 | public ColumnMetadata getColumnMetadata() 62 | { 63 | return new ColumnMetadata(columnName, columnType); 64 | } 65 | 66 | @Override 67 | public int hashCode() 68 | { 69 | return columnName.hashCode(); 70 | } 71 | 72 | @Override 73 | public boolean equals(Object obj) 74 | { 75 | if (this == obj) { 76 | return true; 77 | } 78 | if ((obj == null) || (getClass() != obj.getClass())) { 79 | return false; 80 | } 81 | 82 | GitColumnHandle other = (GitColumnHandle) obj; 83 | return columnName.equals(other.columnName); 84 | } 85 | 86 | @Override 87 | public String toString() 88 | { 89 | return toStringHelper(this) 90 | .add("columnName", columnName) 91 | .add("columnType", columnType) 92 | .add("ordinalPosition", ordinalPosition) 93 | .toString(); 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /src/main/java/pl/net/was/trino/git/GitModule.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package pl.net.was.trino.git; 15 | 16 | import com.fasterxml.jackson.databind.DeserializationContext; 17 | import com.fasterxml.jackson.databind.deser.std.FromStringDeserializer; 18 | import com.google.inject.Binder; 19 | import com.google.inject.Inject; 20 | import com.google.inject.Module; 21 | import com.google.inject.Scopes; 22 | import io.trino.spi.type.Type; 23 | import io.trino.spi.type.TypeId; 24 | import io.trino.spi.type.TypeManager; 25 | 26 | import static io.airlift.configuration.ConfigBinder.configBinder; 27 | import static io.airlift.json.JsonBinder.jsonBinder; 28 | import static io.airlift.json.JsonCodec.listJsonCodec; 29 | import static io.airlift.json.JsonCodecBinder.jsonCodecBinder; 30 | import static java.util.Objects.requireNonNull; 31 | 32 | public class GitModule 33 | implements Module 34 | { 35 | private final String catalogName; 36 | private final TypeManager typeManager; 37 | 38 | @Inject 39 | public GitModule(String catalogName, TypeManager typeManager) 40 | { 41 | this.catalogName = requireNonNull(catalogName, "catalogName is null"); 42 | this.typeManager = requireNonNull(typeManager, "typeManager is null"); 43 | } 44 | 45 | @Override 46 | public void configure(Binder binder) 47 | { 48 | binder.bindConstant().annotatedWith(GitMetadata.CatalogName.class).to(catalogName); 49 | binder.bind(TypeManager.class).toInstance(typeManager); 50 | 51 | binder.bind(GitConnector.class).in(Scopes.SINGLETON); 52 | binder.bind(GitMetadata.class).in(Scopes.SINGLETON); 53 | binder.bind(GitClient.class).in(Scopes.SINGLETON); 54 | binder.bind(GitSplitManager.class).in(Scopes.SINGLETON); 55 | binder.bind(GitRecordSetProvider.class).in(Scopes.SINGLETON); 56 | configBinder(binder).bindConfig(GitConfig.class); 57 | 58 | jsonBinder(binder).addDeserializerBinding(Type.class).to(TypeDeserializer.class); 59 | jsonCodecBinder(binder).bindMapJsonCodec(String.class, listJsonCodec(GitTable.class)); 60 | } 61 | 62 | public static final class TypeDeserializer 63 | extends FromStringDeserializer 64 | { 65 | private final TypeManager typeManager; 66 | 67 | @Inject 68 | public TypeDeserializer(TypeManager typeManager) 69 | { 70 | super(Type.class); 71 | this.typeManager = requireNonNull(typeManager, "typeManager is null"); 72 | } 73 | 74 | @Override 75 | protected Type _deserialize(String value, DeserializationContext context) 76 | { 77 | return typeManager.getType(TypeId.of(value)); 78 | } 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /src/main/java/pl/net/was/trino/git/GitTableHandle.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package pl.net.was.trino.git; 15 | 16 | import com.fasterxml.jackson.annotation.JsonCreator; 17 | import com.fasterxml.jackson.annotation.JsonProperty; 18 | import io.trino.spi.connector.ConnectorTableHandle; 19 | import io.trino.spi.connector.SchemaTableName; 20 | 21 | import java.util.List; 22 | import java.util.Objects; 23 | import java.util.Optional; 24 | import java.util.OptionalLong; 25 | 26 | import static java.util.Objects.requireNonNull; 27 | 28 | public final class GitTableHandle 29 | implements ConnectorTableHandle 30 | { 31 | private final String schemaName; 32 | private final String tableName; 33 | private final Optional> commitIds; 34 | private final OptionalLong limit; 35 | 36 | @JsonCreator 37 | public GitTableHandle( 38 | @JsonProperty("schemaName") String schemaName, 39 | @JsonProperty("tableName") String tableName, 40 | @JsonProperty("commitIds") Optional> commitIds, 41 | @JsonProperty("limit") OptionalLong limit) 42 | { 43 | this.schemaName = requireNonNull(schemaName, "schemaName is null"); 44 | this.tableName = requireNonNull(tableName, "tableName is null"); 45 | this.commitIds = requireNonNull(commitIds, "commitIds is null"); 46 | this.limit = requireNonNull(limit, "limit is null"); 47 | } 48 | 49 | @JsonProperty 50 | public String getSchemaName() 51 | { 52 | return schemaName; 53 | } 54 | 55 | @JsonProperty 56 | public String getTableName() 57 | { 58 | return tableName; 59 | } 60 | 61 | public SchemaTableName toSchemaTableName() 62 | { 63 | return new SchemaTableName(schemaName, tableName); 64 | } 65 | 66 | @JsonProperty 67 | public Optional> getCommitIds() 68 | { 69 | return commitIds; 70 | } 71 | 72 | @JsonProperty 73 | public OptionalLong getLimit() 74 | { 75 | return limit; 76 | } 77 | 78 | @Override 79 | public int hashCode() 80 | { 81 | return Objects.hash(schemaName, tableName); 82 | } 83 | 84 | @Override 85 | public boolean equals(Object obj) 86 | { 87 | if (this == obj) { 88 | return true; 89 | } 90 | if ((obj == null) || (getClass() != obj.getClass())) { 91 | return false; 92 | } 93 | 94 | GitTableHandle other = (GitTableHandle) obj; 95 | return Objects.equals(this.schemaName, other.schemaName) && 96 | Objects.equals(this.tableName, other.tableName) && 97 | Objects.equals(this.commitIds, other.commitIds) && 98 | Objects.equals(this.limit, other.limit); 99 | } 100 | 101 | @Override 102 | public String toString() 103 | { 104 | return schemaName + ":" + tableName; 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /src/test/java/pl/net/was/trino/git/GitQueryRunner.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package pl.net.was.trino.git; 15 | 16 | import io.airlift.log.Level; 17 | import io.airlift.log.Logger; 18 | import io.airlift.log.Logging; 19 | import io.trino.Session; 20 | import io.trino.plugin.memory.MemoryPlugin; 21 | import io.trino.plugin.tpch.TpchPlugin; 22 | import io.trino.testing.DistributedQueryRunner; 23 | 24 | import java.util.HashMap; 25 | import java.util.Map; 26 | 27 | import static io.airlift.testing.Closeables.closeAllSuppress; 28 | import static io.trino.testing.TestingSession.testSessionBuilder; 29 | import static java.util.Objects.requireNonNullElse; 30 | 31 | public final class GitQueryRunner 32 | { 33 | private GitQueryRunner() {} 34 | 35 | private static final String TPCH_SCHEMA = "tpch"; 36 | 37 | public static DistributedQueryRunner createGitQueryRunner( 38 | Map extraProperties, 39 | Map connectorProperties) 40 | throws Exception 41 | { 42 | DistributedQueryRunner queryRunner = DistributedQueryRunner.builder(createSession()) 43 | .setExtraProperties(extraProperties) 44 | .build(); 45 | try { 46 | queryRunner.installPlugin(new MemoryPlugin()); 47 | queryRunner.createCatalog("memory", "memory"); 48 | 49 | queryRunner.installPlugin(new TpchPlugin()); 50 | queryRunner.createCatalog("tpch", "tpch"); 51 | 52 | connectorProperties = new HashMap<>(Map.copyOf(connectorProperties)); 53 | 54 | queryRunner.installPlugin(new GitPlugin()); 55 | queryRunner.createCatalog("git", "git", connectorProperties); 56 | 57 | return queryRunner; 58 | } 59 | catch (Throwable e) { 60 | closeAllSuppress(e, queryRunner); 61 | throw e; 62 | } 63 | } 64 | 65 | private static Session createSession() 66 | { 67 | return testSessionBuilder() 68 | .setCatalog("git") 69 | .setSchema(TPCH_SCHEMA) 70 | .build(); 71 | } 72 | 73 | public static void main(String[] args) 74 | throws Exception 75 | { 76 | Logging logging = Logging.initialize(); 77 | logging.setLevel("io.trino.plugin", Level.DEBUG); 78 | logging.setLevel("io.trino.spi", Level.DEBUG); 79 | logging.setLevel("pl.net.was", Level.DEBUG); 80 | 81 | String url = "https://github.com/nineinchnick/trino-git.git"; 82 | if (args.length > 0) { 83 | url = args[0]; 84 | } 85 | 86 | DistributedQueryRunner queryRunner = createGitQueryRunner( 87 | Map.of("http-server.http.port", requireNonNullElse(System.getenv("TRINO_PORT"), "8081")), 88 | Map.of("metadata-uri", url)); 89 | 90 | Logger log = Logger.get(GitQueryRunner.class); 91 | log.info("======== SERVER STARTED ========"); 92 | log.info("\n====\n%s\n====", queryRunner.getCoordinator().getBaseUrl()); 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /src/test/java/pl/net/was/trino/git/TestGitClient.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package pl.net.was.trino.git; 15 | 16 | import org.eclipse.jgit.api.Git; 17 | import org.eclipse.jgit.api.errors.GitAPIException; 18 | import org.eclipse.jgit.errors.ConfigInvalidException; 19 | import org.eclipse.jgit.lib.PersonIdent; 20 | import org.eclipse.jgit.lib.Repository; 21 | import org.eclipse.jgit.storage.file.FileRepositoryBuilder; 22 | import org.eclipse.jgit.util.SystemReader; 23 | import org.junit.jupiter.api.Test; 24 | 25 | import java.io.File; 26 | import java.io.IOException; 27 | import java.io.PrintWriter; 28 | import java.net.URI; 29 | import java.time.Instant; 30 | import java.util.Set; 31 | 32 | import static com.google.common.io.MoreFiles.deleteRecursively; 33 | import static com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE; 34 | import static java.time.ZoneOffset.UTC; 35 | import static org.assertj.core.api.Assertions.assertThat; 36 | 37 | public class TestGitClient 38 | { 39 | @Test 40 | public void testMetadata() 41 | { 42 | GitClient client = new GitClient(new GitConfig()); 43 | assertThat(client.getSchemaNames()).isEqualTo(Set.of("default")); 44 | } 45 | 46 | public static void setupRepo(URI uri) 47 | throws IOException, GitAPIException 48 | { 49 | // make sure the global Git config is not being used 50 | try { 51 | SystemReader.getInstance().getUserConfig().clear(); 52 | } 53 | catch (ConfigInvalidException e) { 54 | // ignore 55 | } 56 | 57 | // ensure the repo dir exists, remove and recreate if necessary 58 | File localPath; 59 | try { 60 | localPath = GitRecordSet.ensureDir(uri.toString()); 61 | } 62 | catch (IOException ignored) { 63 | return; 64 | } 65 | if (localPath.exists()) { 66 | deleteRecursively(localPath.toPath(), ALLOW_INSECURE); 67 | } 68 | 69 | Repository repository = FileRepositoryBuilder.create(new File(localPath, ".git")); 70 | repository.create(); 71 | 72 | // create a new file 73 | File myFile = new File(repository.getDirectory().getParent(), "testfile"); 74 | if (!myFile.createNewFile()) { 75 | throw new IOException("Could not create file " + myFile); 76 | } 77 | 78 | PersonIdent author = new PersonIdent("test", "test@invalid.com", Instant.ofEpochSecond(1580897313L), UTC); 79 | // commit the new file 80 | Git git = new Git(repository); 81 | git.add().addFilepattern(".").call(); 82 | git.commit() 83 | .setMessage("Commit all changes including additions") 84 | .setAuthor(author) 85 | .setCommitter(author) 86 | .call(); 87 | 88 | try (PrintWriter writer = new PrintWriter(myFile)) { 89 | writer.append("Hello, world!"); 90 | } 91 | if (!myFile.setLastModified(1580897600000L)) { 92 | throw new IOException("Could not set last modified on file " + myFile); 93 | } 94 | 95 | // Stage all changed files, omitting new files, and commit with one command 96 | git.commit() 97 | .setAll(true) 98 | .setMessage("Commit changes to all files") 99 | .setAuthor(author) 100 | .setCommitter(author) 101 | .call(); 102 | 103 | git.tag() 104 | .setName("tag_for_testing") 105 | .setTagger(author) 106 | .call(); 107 | 108 | git.tag() 109 | .setName("unannotated_tag_for_testing") 110 | .setAnnotated(false) 111 | .call(); 112 | 113 | // ensure all loose objects are packed 114 | git.gc().call(); 115 | } 116 | } 117 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Trino git Connector 2 | =================== 3 | 4 | [![Build Status](https://github.com/nineinchnick/trino-git/actions/workflows/release.yaml/badge.svg)](https://github.com/nineinchnick/trino-git/actions/workflows/release.yaml) 5 | 6 | This is a [Trino](http://trino.io/) connector to access git repos. Please keep in mind that this is not production ready and it was created for tests. 7 | 8 | # Quick Start 9 | 10 | To run a Docker container with the connector, run the following: 11 | ```bash 12 | docker run \ 13 | -d \ 14 | --name trino-git \ 15 | -e REPO_URL=https://github.com/nineinchnick/trino-rest.git \ 16 | -p 8080:8080 \ 17 | nineinchnick/trino-git:0.87 18 | ``` 19 | 20 | Then use your favourite SQL client to connect to Trino running at http://localhost:8080 21 | 22 | # Usage 23 | 24 | Download one of the ZIP packages, unzip it and copy the `trino-git-0.87` directory to the plugin directory on every node in your Trino cluster. 25 | Create a `github.properties` file in your Trino catalog directory and point to a remote repo. 26 | You can also use a path to a local repo if it's available on every worker node. 27 | 28 | ``` 29 | connector.name=git 30 | metadata-uri=https://github.com/nineinchnick/trino-git.git 31 | ``` 32 | 33 | After reloading Trino, you should be able to connect to the `git` catalog and see the following tables in the `default` schema: 34 | * `branches` 35 | * `commits` - all commits from every branch, with author, committer, message and commit time 36 | * `diff_stats` - any files modified, deleted or renamed in every commit, with number of added and/or deleted lines 37 | * `objects` - every file contents 38 | * `tags` 39 | * `trees` - all files in every commit, with file mode and attributes 40 | 41 | To see who has commits with only deleted lines: 42 | 43 | ```sql 44 | SELECT 45 | i.name, 46 | i.email, 47 | min(c.commit_time) FILTER (WHERE c.added_lines = 0 AND c.deleted_lines != 0) AS first_delete_only_commit_at, 48 | count(*) FILTER (WHERE c.added_lines = 0 AND c.deleted_lines != 0) AS delete_only_commit_count, 49 | CAST(count(*) FILTER (WHERE c.added_lines = 0 AND c.deleted_lines != 0) AS double) / CAST(COUNT(*) AS double) AS delete_only_commit_ratio 50 | FROM 51 | commit_stats c 52 | JOIN idents i ON 53 | c.author_email = i.email OR CONTAINS(i.extra_emails, c.author_email) 54 | GROUP BY 55 | i.name, 56 | i.email 57 | HAVING 58 | count(*) FILTER (WHERE c.added_lines = 0 AND c.deleted_lines != 0) != 0 59 | ORDER BY 60 | i.name, 61 | i.email; 62 | ``` 63 | 64 | Should return: 65 | ``` 66 | name |email |first_delete_only_commit_at|delete_only_commit_count|delete_only_commit_ratio| 67 | -------|--------------|---------------------------|------------------------|------------------------| 68 | Jan Was|jan@was.net.pl| 2021-01-09 23:22:28| 2| 0.08695652173913043| 69 | ``` 70 | 71 | # Build 72 | 73 | Run all the unit test classes. 74 | ``` 75 | mvn test 76 | ``` 77 | 78 | Creates a deployable jar file 79 | ``` 80 | mvn clean compile package 81 | ``` 82 | 83 | Copy jar files in target directory to use git connector in your Trino cluster. 84 | ``` 85 | cp -p target/*.jar ${PLUGIN_DIRECTORY}/git/ 86 | ``` 87 | 88 | # Deploy 89 | 90 | An example command to run the Trino server with the git plugin and catalog enabled: 91 | 92 | ```bash 93 | src=$(git rev-parse --show-toplevel) 94 | docker run \ 95 | -v $src/target/trino-git-0.70-SNAPSHOT:/usr/lib/trino/plugin/git \ 96 | -v $src/catalog:/usr/lib/trino/default/etc/catalog \ 97 | -p 8080:8080 \ 98 | --name trino \ 99 | -d \ 100 | trinodb/trino:462 101 | ``` 102 | 103 | Connect to that server using: 104 | ```bash 105 | docker run -it --rm --link trino trinodb/trino:462 trino --server trino:8080 --catalog git --schema default 106 | ``` 107 | 108 | # References 109 | 110 | If you're looking to analize the structure or contents of a Git repo, [gitbase](https://github.com/src-d/gitbase) could be more suitable for such task. 111 | It could even work with Trino, since Trino has a [MySQL connector](https://trino.io/docs/current/connector/mysql.html). 112 | 113 | If you also want to analyze Github issues, pull requests (with review comments) or workflow runs and jobs, 114 | check out the Github connector in [trino-rest](https://github.com/nineinchnick/trino-rest). 115 | 116 | This effort is inspired by [Acha](https://github.com/someteam/acha), to be able to calculate achievements based on contents of a Git repository using SQL. 117 | -------------------------------------------------------------------------------- /src/main/java/pl/net/was/trino/git/BranchesRecordCursor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package pl.net.was.trino.git; 15 | 16 | import io.airlift.slice.Slice; 17 | import io.airlift.slice.Slices; 18 | import io.trino.spi.connector.RecordCursor; 19 | import io.trino.spi.type.Type; 20 | import org.eclipse.jgit.api.Git; 21 | import org.eclipse.jgit.api.ListBranchCommand; 22 | import org.eclipse.jgit.api.errors.GitAPIException; 23 | import org.eclipse.jgit.lib.Ref; 24 | import org.eclipse.jgit.revwalk.RevCommit; 25 | import org.eclipse.jgit.revwalk.RevWalk; 26 | 27 | import java.io.IOException; 28 | import java.util.Iterator; 29 | import java.util.List; 30 | 31 | import static com.google.common.base.Preconditions.checkArgument; 32 | import static com.google.common.base.Preconditions.checkState; 33 | 34 | public class BranchesRecordCursor 35 | implements RecordCursor 36 | { 37 | private final List columnHandles; 38 | private final int[] fieldToColumnIndex; 39 | 40 | private final RevWalk walk; 41 | private RevCommit main; 42 | 43 | private Iterator branches; 44 | private Ref branch; 45 | 46 | private List fields; 47 | 48 | public BranchesRecordCursor(List columnHandles, Git repo) 49 | { 50 | this.columnHandles = columnHandles; 51 | 52 | fieldToColumnIndex = new int[columnHandles.size()]; 53 | for (int i = 0; i < columnHandles.size(); i++) { 54 | fieldToColumnIndex[i] = columnHandles.get(i).getOrdinalPosition(); 55 | } 56 | 57 | try { 58 | branches = repo.branchList().setListMode(ListBranchCommand.ListMode.ALL).call().iterator(); 59 | } 60 | catch (GitAPIException ignored) { 61 | //pass 62 | } 63 | 64 | walk = new RevWalk(repo.getRepository()); 65 | try { 66 | Ref head = repo.getRepository().findRef("HEAD"); 67 | main = walk.parseCommit(head.getObjectId()); 68 | } 69 | catch (IOException ignored) { 70 | // pass 71 | } 72 | } 73 | 74 | @Override 75 | public long getCompletedBytes() 76 | { 77 | return 0; 78 | } 79 | 80 | @Override 81 | public long getReadTimeNanos() 82 | { 83 | return 0; 84 | } 85 | 86 | @Override 87 | public Type getType(int field) 88 | { 89 | checkArgument(field < columnHandles.size(), "Invalid field index"); 90 | return columnHandles.get(field).getColumnType(); 91 | } 92 | 93 | @Override 94 | public boolean advanceNextPosition() 95 | { 96 | if (!branches.hasNext()) { 97 | return false; 98 | } 99 | branch = branches.next(); 100 | 101 | fields = List.of( 102 | branch.getObjectId().getName(), 103 | branch.getName()); 104 | 105 | return true; 106 | } 107 | 108 | private String getFieldValue(int field) 109 | { 110 | checkState(fields != null, "Cursor has not been advanced yet"); 111 | 112 | int columnIndex = fieldToColumnIndex[field]; 113 | return fields.get(columnIndex); 114 | } 115 | 116 | @Override 117 | public boolean getBoolean(int field) 118 | { 119 | try { 120 | RevCommit current = walk.parseCommit(branch.getObjectId()); 121 | return walk.isMergedInto(current, main); 122 | } 123 | catch (IOException e) { 124 | return false; 125 | } 126 | } 127 | 128 | @Override 129 | public long getLong(int field) 130 | { 131 | throw new UnsupportedOperationException(); 132 | } 133 | 134 | @Override 135 | public double getDouble(int field) 136 | { 137 | throw new UnsupportedOperationException(); 138 | } 139 | 140 | @Override 141 | public Slice getSlice(int field) 142 | { 143 | return Slices.utf8Slice(getFieldValue(field)); 144 | } 145 | 146 | @Override 147 | public Object getObject(int field) 148 | { 149 | throw new UnsupportedOperationException(); 150 | } 151 | 152 | @Override 153 | public boolean isNull(int field) 154 | { 155 | return false; 156 | } 157 | 158 | @Override 159 | public void close() 160 | { 161 | } 162 | } 163 | -------------------------------------------------------------------------------- /src/main/java/pl/net/was/trino/git/GitSplitManager.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package pl.net.was.trino.git; 15 | 16 | import com.google.inject.Inject; 17 | import io.trino.spi.connector.ColumnHandle; 18 | import io.trino.spi.connector.ConnectorSession; 19 | import io.trino.spi.connector.ConnectorSplitManager; 20 | import io.trino.spi.connector.ConnectorSplitSource; 21 | import io.trino.spi.connector.ConnectorTableHandle; 22 | import io.trino.spi.connector.ConnectorTransactionHandle; 23 | import io.trino.spi.connector.Constraint; 24 | import io.trino.spi.connector.DynamicFilter; 25 | import io.trino.spi.connector.FixedSplitSource; 26 | import io.trino.spi.predicate.TupleDomain; 27 | 28 | import java.util.List; 29 | import java.util.concurrent.CompletableFuture; 30 | import java.util.concurrent.ExecutionException; 31 | 32 | import static io.trino.spi.connector.DynamicFilter.NOT_BLOCKED; 33 | import static java.util.Objects.requireNonNull; 34 | import static java.util.concurrent.TimeUnit.MILLISECONDS; 35 | import static pl.net.was.trino.git.GitMetadata.getCommitIds; 36 | 37 | public class GitSplitManager 38 | implements ConnectorSplitManager 39 | { 40 | private final GitConfig config; 41 | 42 | @Inject 43 | public GitSplitManager(GitConfig config) 44 | { 45 | this.config = requireNonNull(config, "config is null"); 46 | } 47 | 48 | @Override 49 | public ConnectorSplitSource getSplits( 50 | ConnectorTransactionHandle transaction, 51 | ConnectorSession session, 52 | ConnectorTableHandle connectorTableHandle, 53 | DynamicFilter dynamicFilter, 54 | Constraint constraint) 55 | { 56 | long timeoutMillis = 20000; 57 | if (!dynamicFilter.isAwaitable()) { 58 | return getSplitSource(connectorTableHandle, dynamicFilter); 59 | } 60 | CompletableFuture dynamicFilterFuture = whenCompleted(dynamicFilter) 61 | .completeOnTimeout(null, timeoutMillis, MILLISECONDS); 62 | CompletableFuture splitSourceFuture = dynamicFilterFuture.thenApply( 63 | ignored -> getSplitSource(connectorTableHandle, dynamicFilter)); 64 | return new GitDynamicFilteringSplitSource(dynamicFilterFuture, splitSourceFuture); 65 | } 66 | 67 | private ConnectorSplitSource getSplitSource( 68 | ConnectorTableHandle table, 69 | DynamicFilter dynamicFilter) 70 | { 71 | GitTableHandle handle = (GitTableHandle) table; 72 | 73 | TupleDomain constraint = dynamicFilter.getCurrentPredicate().simplify(100); 74 | 75 | List splits = List.of(new GitSplit(handle.getTableName(), config.getUri(), getCommitIds(constraint))); 76 | 77 | return new FixedSplitSource(splits); 78 | } 79 | 80 | private static CompletableFuture whenCompleted(DynamicFilter dynamicFilter) 81 | { 82 | if (dynamicFilter.isAwaitable()) { 83 | return dynamicFilter.isBlocked().thenCompose(ignored -> whenCompleted(dynamicFilter)); 84 | } 85 | return NOT_BLOCKED; 86 | } 87 | 88 | private static class GitDynamicFilteringSplitSource 89 | implements ConnectorSplitSource 90 | { 91 | private final CompletableFuture dynamicFilterFuture; 92 | private final CompletableFuture splitSourceFuture; 93 | 94 | private GitDynamicFilteringSplitSource( 95 | CompletableFuture dynamicFilterFuture, 96 | CompletableFuture splitSourceFuture) 97 | { 98 | this.dynamicFilterFuture = requireNonNull(dynamicFilterFuture, "dynamicFilterFuture is null"); 99 | this.splitSourceFuture = requireNonNull(splitSourceFuture, "splitSourceFuture is null"); 100 | } 101 | 102 | @Override 103 | public CompletableFuture getNextBatch(int maxSize) 104 | { 105 | return splitSourceFuture.thenCompose(splitSource -> splitSource.getNextBatch(maxSize)); 106 | } 107 | 108 | @Override 109 | public void close() 110 | { 111 | if (!dynamicFilterFuture.cancel(true)) { 112 | splitSourceFuture.thenAccept(ConnectorSplitSource::close); 113 | } 114 | } 115 | 116 | @Override 117 | public boolean isFinished() 118 | { 119 | if (!splitSourceFuture.isDone()) { 120 | return false; 121 | } 122 | if (splitSourceFuture.isCompletedExceptionally()) { 123 | return false; 124 | } 125 | try { 126 | return splitSourceFuture.get().isFinished(); 127 | } 128 | catch (InterruptedException | ExecutionException e) { 129 | throw new RuntimeException(e); 130 | } 131 | } 132 | } 133 | } 134 | -------------------------------------------------------------------------------- /src/main/java/pl/net/was/trino/git/ObjectsRecordCursor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package pl.net.was.trino.git; 15 | 16 | import io.airlift.slice.Slice; 17 | import io.airlift.slice.Slices; 18 | import io.trino.spi.connector.RecordCursor; 19 | import io.trino.spi.type.Type; 20 | import org.eclipse.jgit.api.Git; 21 | import org.eclipse.jgit.internal.storage.file.FileRepository; 22 | import org.eclipse.jgit.internal.storage.file.Pack; 23 | import org.eclipse.jgit.internal.storage.file.PackIndex; 24 | import org.eclipse.jgit.lib.ObjectId; 25 | import org.eclipse.jgit.lib.ObjectLoader; 26 | 27 | import java.io.IOException; 28 | import java.util.Collection; 29 | import java.util.HashMap; 30 | import java.util.Iterator; 31 | import java.util.List; 32 | import java.util.Map; 33 | import java.util.function.Function; 34 | 35 | import static com.google.common.base.Preconditions.checkArgument; 36 | import static org.eclipse.jgit.lib.Constants.OBJ_BLOB; 37 | 38 | public class ObjectsRecordCursor 39 | implements RecordCursor 40 | { 41 | private final List columnHandles; 42 | 43 | private final FileRepository fileRepo; 44 | private final Iterator packs; 45 | private Iterator entries; 46 | private ObjectId objectId; 47 | private ObjectLoader loader; 48 | 49 | private final Map> strFieldGetters = new HashMap<>(); 50 | 51 | public ObjectsRecordCursor(List columnHandles, Git repo) 52 | { 53 | this.columnHandles = columnHandles; 54 | 55 | Map nameToIndex = new HashMap<>(); 56 | for (int i = 0; i < columnHandles.size(); i++) { 57 | nameToIndex.put(columnHandles.get(i).getColumnName(), i); 58 | } 59 | 60 | Map> getters = Map.of( 61 | "object_id", ObjectsRecordCursor::getObjectId, 62 | "contents", ObjectsRecordCursor::getContents); 63 | 64 | for (Map.Entry> entry : getters.entrySet()) { 65 | String k = entry.getKey(); 66 | if (nameToIndex.containsKey(k)) { 67 | strFieldGetters.put(nameToIndex.get(k), entry.getValue()); 68 | } 69 | } 70 | 71 | fileRepo = (FileRepository) repo.getRepository(); 72 | Collection packs = fileRepo.getObjectDatabase().getPacks(); 73 | this.packs = packs.iterator(); 74 | } 75 | 76 | @Override 77 | public long getCompletedBytes() 78 | { 79 | return 0; 80 | } 81 | 82 | @Override 83 | public long getReadTimeNanos() 84 | { 85 | return 0; 86 | } 87 | 88 | @Override 89 | public Type getType(int field) 90 | { 91 | checkArgument(field < columnHandles.size(), "Invalid field index"); 92 | return columnHandles.get(field).getColumnType(); 93 | } 94 | 95 | @Override 96 | public boolean advanceNextPosition() 97 | { 98 | if (packs == null) { 99 | return false; 100 | } 101 | if (entries == null || !entries.hasNext()) { 102 | if (!packs.hasNext()) { 103 | return false; 104 | } 105 | entries = packs.next().iterator(); 106 | } 107 | 108 | objectId = entries.next().toObjectId(); 109 | try { 110 | loader = fileRepo.open(objectId); 111 | if (loader.getType() != OBJ_BLOB) { 112 | return advanceNextPosition(); 113 | } 114 | } 115 | catch (IOException e) { 116 | return advanceNextPosition(); 117 | } 118 | 119 | return true; 120 | } 121 | 122 | @Override 123 | public boolean getBoolean(int field) 124 | { 125 | throw new UnsupportedOperationException(); 126 | } 127 | 128 | @Override 129 | public long getLong(int field) 130 | { 131 | throw new UnsupportedOperationException(); 132 | } 133 | 134 | @Override 135 | public double getDouble(int field) 136 | { 137 | throw new UnsupportedOperationException(); 138 | } 139 | 140 | @Override 141 | public Slice getSlice(int field) 142 | { 143 | checkArgument(strFieldGetters.containsKey(field), "Invalid field index"); 144 | return strFieldGetters.get(field).apply(this); 145 | } 146 | 147 | @Override 148 | public Object getObject(int field) 149 | { 150 | throw new UnsupportedOperationException(); 151 | } 152 | 153 | @Override 154 | public boolean isNull(int field) 155 | { 156 | return false; 157 | } 158 | 159 | @Override 160 | public void close() 161 | { 162 | } 163 | 164 | private Slice getObjectId() 165 | { 166 | return Slices.utf8Slice(objectId.getName()); 167 | } 168 | 169 | private Slice getContents() 170 | { 171 | byte[] bytes = loader.getBytes(); 172 | return Slices.wrappedBuffer(bytes, 0, bytes.length); 173 | } 174 | } 175 | -------------------------------------------------------------------------------- /src/main/java/pl/net/was/trino/git/TagsRecordCursor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package pl.net.was.trino.git; 15 | 16 | import io.airlift.slice.Slice; 17 | import io.airlift.slice.Slices; 18 | import io.trino.spi.connector.RecordCursor; 19 | import io.trino.spi.type.Type; 20 | import org.eclipse.jgit.api.Git; 21 | import org.eclipse.jgit.api.errors.GitAPIException; 22 | import org.eclipse.jgit.errors.IncorrectObjectTypeException; 23 | import org.eclipse.jgit.lib.Ref; 24 | import org.eclipse.jgit.revwalk.RevTag; 25 | import org.eclipse.jgit.revwalk.RevWalk; 26 | 27 | import java.io.IOException; 28 | import java.io.UncheckedIOException; 29 | import java.util.Iterator; 30 | import java.util.List; 31 | 32 | import static com.google.common.base.Preconditions.checkArgument; 33 | import static com.google.common.base.Preconditions.checkState; 34 | import static io.trino.spi.type.DateTimeEncoding.packDateTimeWithZone; 35 | import static io.trino.spi.type.TimeZoneKey.getTimeZoneKey; 36 | import static java.util.Arrays.asList; 37 | 38 | public class TagsRecordCursor 39 | implements RecordCursor 40 | { 41 | private final List columnHandles; 42 | private final int[] fieldToColumnIndex; 43 | 44 | private final boolean parseTag; 45 | private final RevWalk walk; 46 | private Iterator tags; 47 | 48 | private List fields; 49 | 50 | public TagsRecordCursor(List columnHandles, Git repo) 51 | { 52 | this.columnHandles = columnHandles; 53 | 54 | fieldToColumnIndex = new int[columnHandles.size()]; 55 | for (int i = 0; i < columnHandles.size(); i++) { 56 | fieldToColumnIndex[i] = columnHandles.get(i).getOrdinalPosition(); 57 | } 58 | 59 | parseTag = columnHandles.stream() 60 | .anyMatch(column -> column.getColumnName().equals("tag_time")); 61 | walk = parseTag ? new RevWalk(repo.getRepository()) : null; 62 | try { 63 | tags = repo.tagList().call().iterator(); 64 | } 65 | catch (GitAPIException ignored) { 66 | //pass 67 | } 68 | } 69 | 70 | @Override 71 | public long getCompletedBytes() 72 | { 73 | return 0; 74 | } 75 | 76 | @Override 77 | public long getReadTimeNanos() 78 | { 79 | return 0; 80 | } 81 | 82 | @Override 83 | public Type getType(int field) 84 | { 85 | checkArgument(field < columnHandles.size(), "Invalid field index"); 86 | return columnHandles.get(field).getColumnType(); 87 | } 88 | 89 | @Override 90 | public boolean advanceNextPosition() 91 | { 92 | if (walk != null) { 93 | walk.reset(); 94 | } 95 | if (!tags.hasNext()) { 96 | return false; 97 | } 98 | Ref tag = tags.next(); 99 | 100 | boolean annotated; 101 | Long tagTime = null; 102 | if (parseTag) { 103 | RevTag revTag = null; 104 | try { 105 | revTag = walk.parseTag(tag.getObjectId()); 106 | annotated = true; 107 | } 108 | catch (IncorrectObjectTypeException e) { 109 | annotated = false; 110 | } 111 | catch (IOException e) { 112 | throw new UncheckedIOException(e); 113 | } 114 | if (annotated) { 115 | tagTime = packDateTimeWithZone(revTag.getTaggerIdent().getWhenAsInstant().toEpochMilli(), getTimeZoneKey(revTag.getTaggerIdent().getZoneId().getId())); 116 | } 117 | } 118 | 119 | fields = asList( 120 | tag.getObjectId().getName(), 121 | tag.getName(), 122 | tagTime); 123 | 124 | return true; 125 | } 126 | 127 | private Object getFieldValue(int field) 128 | { 129 | checkState(fields != null, "Cursor has not been advanced yet"); 130 | 131 | int columnIndex = fieldToColumnIndex[field]; 132 | return fields.get(columnIndex); 133 | } 134 | 135 | @Override 136 | public boolean getBoolean(int field) 137 | { 138 | throw new UnsupportedOperationException(); 139 | } 140 | 141 | @Override 142 | public long getLong(int field) 143 | { 144 | return (Long) getFieldValue(field); 145 | } 146 | 147 | @Override 148 | public double getDouble(int field) 149 | { 150 | throw new UnsupportedOperationException(); 151 | } 152 | 153 | @Override 154 | public Slice getSlice(int field) 155 | { 156 | return Slices.utf8Slice((String) getFieldValue(field)); 157 | } 158 | 159 | @Override 160 | public Object getObject(int field) 161 | { 162 | throw new UnsupportedOperationException(); 163 | } 164 | 165 | @Override 166 | public boolean isNull(int field) 167 | { 168 | return getFieldValue(field) == null; 169 | } 170 | 171 | @Override 172 | public void close() 173 | { 174 | if (walk != null) { 175 | walk.close(); 176 | } 177 | } 178 | } 179 | -------------------------------------------------------------------------------- /src/main/java/pl/net/was/trino/git/GitRecordSet.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package pl.net.was.trino.git; 15 | 16 | import com.google.common.collect.ImmutableList; 17 | import com.google.common.hash.Hashing; 18 | import io.trino.spi.connector.RecordCursor; 19 | import io.trino.spi.connector.RecordSet; 20 | import io.trino.spi.type.Type; 21 | import org.eclipse.jgit.api.CloneCommand; 22 | import org.eclipse.jgit.api.Git; 23 | import org.eclipse.jgit.api.errors.GitAPIException; 24 | import org.eclipse.jgit.lib.Repository; 25 | import org.eclipse.jgit.storage.file.FileRepositoryBuilder; 26 | import org.eclipse.jgit.transport.RemoteConfig; 27 | import org.eclipse.jgit.transport.UsernamePasswordCredentialsProvider; 28 | 29 | import java.io.File; 30 | import java.io.IOException; 31 | import java.io.UncheckedIOException; 32 | import java.net.URI; 33 | import java.util.List; 34 | import java.util.Map; 35 | import java.util.Optional; 36 | 37 | import static java.nio.charset.StandardCharsets.UTF_8; 38 | import static java.util.Objects.requireNonNull; 39 | 40 | public class GitRecordSet 41 | implements RecordSet 42 | { 43 | private final List columnHandles; 44 | private final List columnTypes; 45 | private final String tableName; 46 | private final Git repo; 47 | private final Optional> commitIds; 48 | 49 | public GitRecordSet(GitSplit split, GitTableHandle table, List columnHandles) 50 | { 51 | requireNonNull(split, "split is null"); 52 | 53 | this.columnHandles = requireNonNull(columnHandles, "column handles is null"); 54 | ImmutableList.Builder types = ImmutableList.builder(); 55 | for (GitColumnHandle column : columnHandles) { 56 | types.add(column.getColumnType()); 57 | } 58 | this.columnTypes = types.build(); 59 | this.tableName = split.getTableName(); 60 | Optional> splitCommits = split.getCommitIds(); 61 | if (splitCommits.isEmpty()) { 62 | splitCommits = table.getCommitIds(); 63 | } 64 | else if (table.getCommitIds().isPresent()) { 65 | splitCommits.get().addAll(table.getCommitIds().get()); 66 | } 67 | this.commitIds = splitCommits; 68 | 69 | this.repo = getRepo(split.getUri()); 70 | } 71 | 72 | @Override 73 | public List getColumnTypes() 74 | { 75 | return columnTypes; 76 | } 77 | 78 | @Override 79 | public RecordCursor cursor() 80 | { 81 | Map map = Map.of( 82 | "branches", (columnHandles, repo, commitIds) -> new BranchesRecordCursor(columnHandles, repo), 83 | "commits", CommitsRecordCursor::new, 84 | "diff_stats", DiffStatsRecordCursor::new, 85 | "objects", (columnHandles, repo, commitIds) -> new ObjectsRecordCursor(columnHandles, repo), 86 | "tags", (columnHandles, repo, commitIds) -> new TagsRecordCursor(columnHandles, repo), 87 | "trees", TreesRecordCursor::new); 88 | RecordCursorProvider recordCursorProvider = map.get(tableName); 89 | if (recordCursorProvider == null) { 90 | return null; 91 | } 92 | return recordCursorProvider.create(columnHandles, repo, commitIds); 93 | } 94 | 95 | private Git getRepo(URI uri) 96 | { 97 | String url = uri.toString(); 98 | File localPath; 99 | try { 100 | localPath = ensureDir(url); 101 | } 102 | catch (IOException e) { 103 | throw new UncheckedIOException(e); 104 | } 105 | if (!localPath.exists()) { 106 | CloneCommand repo = Git.cloneRepository() 107 | .setURI(url) 108 | .setDirectory(localPath); 109 | if (uri.getUserInfo() != null && !uri.getUserInfo().isEmpty()) { 110 | String[] parts = uri.getUserInfo().split(":", 2); 111 | UsernamePasswordCredentialsProvider credentials = new UsernamePasswordCredentialsProvider(parts[0], parts.length > 1 ? parts[1] : ""); 112 | repo.setCredentialsProvider(credentials); 113 | } 114 | try { 115 | return repo.call(); 116 | } 117 | catch (GitAPIException e) { 118 | throw new RuntimeException(e); 119 | } 120 | } 121 | Repository fileRepo; 122 | try { 123 | fileRepo = new FileRepositoryBuilder() 124 | .setGitDir(new File(localPath, ".git")) 125 | .build(); 126 | } 127 | catch (IOException e) { 128 | throw new RuntimeException(e); 129 | } 130 | Git repo = new Git(fileRepo); 131 | try { 132 | List remotes = repo.remoteList().call(); 133 | if (!remotes.isEmpty()) { 134 | repo.fetch().setCheckFetchedObjects(true).call(); 135 | } 136 | } 137 | catch (GitAPIException e) { 138 | throw new RuntimeException(e); 139 | } 140 | return repo; 141 | } 142 | 143 | public static File ensureDir(String prefix) 144 | throws IOException 145 | { 146 | String tmpDirStr = System.getProperty("java.io.tmpdir"); 147 | if (tmpDirStr == null) { 148 | throw new IOException( 149 | "System property 'java.io.tmpdir' does not specify a tmp dir"); 150 | } 151 | 152 | File tmpDir = new File(tmpDirStr); 153 | if (!tmpDir.exists()) { 154 | boolean created = tmpDir.mkdirs(); 155 | if (!created) { 156 | throw new IOException("Unable to create tmp dir " + tmpDir); 157 | } 158 | } 159 | 160 | return new File(tmpDir, Hashing.sha256().hashString(prefix, UTF_8).toString()); 161 | } 162 | } 163 | -------------------------------------------------------------------------------- /src/main/java/pl/net/was/trino/git/CommitsRecordCursor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package pl.net.was.trino.git; 15 | 16 | import io.airlift.slice.Slice; 17 | import io.airlift.slice.Slices; 18 | import io.trino.spi.block.Block; 19 | import io.trino.spi.block.BlockBuilder; 20 | import io.trino.spi.connector.RecordCursor; 21 | import io.trino.spi.type.DateTimeEncoding; 22 | import io.trino.spi.type.Type; 23 | import io.trino.spi.type.TypeUtils; 24 | import io.trino.spi.type.VarcharType; 25 | import org.eclipse.jgit.api.Git; 26 | import org.eclipse.jgit.lib.ObjectId; 27 | import org.eclipse.jgit.lib.Ref; 28 | import org.eclipse.jgit.lib.RefDatabase; 29 | import org.eclipse.jgit.revwalk.RevCommit; 30 | import org.eclipse.jgit.revwalk.RevWalk; 31 | 32 | import java.io.IOException; 33 | import java.util.Collection; 34 | import java.util.HashMap; 35 | import java.util.Iterator; 36 | import java.util.List; 37 | import java.util.Map; 38 | import java.util.Objects; 39 | import java.util.Optional; 40 | import java.util.function.Function; 41 | 42 | import static com.google.common.base.Preconditions.checkArgument; 43 | 44 | public class CommitsRecordCursor 45 | implements RecordCursor 46 | { 47 | private final List columnHandles; 48 | private final Map> longFieldGetters = new HashMap<>(); 49 | private final Map> strFieldGetters = new HashMap<>(); 50 | private final Map> objFieldGetters = new HashMap<>(); 51 | 52 | private final Iterator commits; 53 | 54 | private RevCommit commit; 55 | 56 | public CommitsRecordCursor(List columnHandles, Git repo, Optional> commitIds) 57 | { 58 | this.columnHandles = columnHandles; 59 | 60 | Map nameToIndex = new HashMap<>(); 61 | for (int i = 0; i < columnHandles.size(); i++) { 62 | nameToIndex.put(columnHandles.get(i).getColumnName(), i); 63 | } 64 | 65 | if (nameToIndex.containsKey("commit_time")) { 66 | longFieldGetters.put( 67 | nameToIndex.get("commit_time"), 68 | c -> DateTimeEncoding.packDateTimeWithZone(c.getCommitTime() * 1000L, c.getCommitterIdent().getTimeZoneOffset())); 69 | } 70 | 71 | Map> getters = Map.of( 72 | "object_id", RevCommit::getName, 73 | "author_name", c -> c.getAuthorIdent().getName(), 74 | "author_email", c -> c.getAuthorIdent().getEmailAddress(), 75 | "committer_name", c -> c.getCommitterIdent().getName(), 76 | "committer_email", c -> c.getCommitterIdent().getEmailAddress(), 77 | "message", RevCommit::getFullMessage, 78 | "tree_id", c -> c.getTree().getName()); 79 | 80 | for (Map.Entry> entry : getters.entrySet()) { 81 | String k = entry.getKey(); 82 | if (nameToIndex.containsKey(k)) { 83 | strFieldGetters.put(nameToIndex.get(k), entry.getValue()); 84 | } 85 | } 86 | 87 | if (nameToIndex.containsKey("parents")) { 88 | objFieldGetters.put(nameToIndex.get("parents"), CommitsRecordCursor::getParents); 89 | } 90 | 91 | RefDatabase refDb = repo.getRepository().getRefDatabase(); 92 | RevWalk revWalk = new RevWalk(repo.getRepository()); 93 | 94 | if (commitIds.isEmpty()) { 95 | try { 96 | Collection allRefs = refDb.getRefs(); 97 | for (Ref ref : allRefs) { 98 | revWalk.markStart(revWalk.parseCommit(ref.getObjectId())); 99 | } 100 | } 101 | catch (IOException ignored) { 102 | // pass 103 | } 104 | commits = revWalk.iterator(); 105 | } 106 | else { 107 | commits = commitIds.get().stream().map(id -> { 108 | try { 109 | return revWalk.parseCommit(ObjectId.fromString(id)); 110 | } 111 | catch (IOException ignored) { 112 | // ignore invalid commits 113 | return null; 114 | } 115 | }).filter(Objects::nonNull).iterator(); 116 | } 117 | } 118 | 119 | public static Block getParents(RevCommit c) 120 | { 121 | Type elementType = VarcharType.VARCHAR; 122 | RevCommit[] parents = c.getParents(); 123 | BlockBuilder builder = elementType.createBlockBuilder(null, parents.length); 124 | for (RevCommit p : parents) { 125 | TypeUtils.writeNativeValue(elementType, builder, p.getName()); 126 | } 127 | return builder.build(); 128 | } 129 | 130 | @Override 131 | public long getCompletedBytes() 132 | { 133 | return 0; 134 | } 135 | 136 | @Override 137 | public long getReadTimeNanos() 138 | { 139 | return 0; 140 | } 141 | 142 | @Override 143 | public Type getType(int field) 144 | { 145 | checkArgument(field < columnHandles.size(), "Invalid field index"); 146 | return columnHandles.get(field).getColumnType(); 147 | } 148 | 149 | @Override 150 | public boolean advanceNextPosition() 151 | { 152 | if (commits == null || !commits.hasNext()) { 153 | return false; 154 | } 155 | 156 | commit = commits.next(); 157 | 158 | return true; 159 | } 160 | 161 | @Override 162 | public boolean getBoolean(int field) 163 | { 164 | throw new UnsupportedOperationException(); 165 | } 166 | 167 | @Override 168 | public long getLong(int field) 169 | { 170 | checkArgument(longFieldGetters.containsKey(field), "Invalid field index"); 171 | return longFieldGetters.get(field).apply(commit); 172 | } 173 | 174 | @Override 175 | public double getDouble(int field) 176 | { 177 | throw new UnsupportedOperationException(); 178 | } 179 | 180 | @Override 181 | public Slice getSlice(int field) 182 | { 183 | checkArgument(strFieldGetters.containsKey(field), "Invalid field index"); 184 | return Slices.utf8Slice(strFieldGetters.get(field).apply(commit)); 185 | } 186 | 187 | @Override 188 | public Object getObject(int field) 189 | { 190 | checkArgument(objFieldGetters.containsKey(field), "Invalid field index"); 191 | return objFieldGetters.get(field).apply(commit); 192 | } 193 | 194 | @Override 195 | public boolean isNull(int field) 196 | { 197 | return false; 198 | } 199 | 200 | @Override 201 | public void close() 202 | { 203 | } 204 | } 205 | -------------------------------------------------------------------------------- /src/main/java/pl/net/was/trino/git/TreesRecordCursor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package pl.net.was.trino.git; 15 | 16 | import io.airlift.slice.Slice; 17 | import io.airlift.slice.Slices; 18 | import io.trino.spi.connector.RecordCursor; 19 | import io.trino.spi.type.Type; 20 | import org.eclipse.jgit.api.Git; 21 | import org.eclipse.jgit.lib.FileMode; 22 | import org.eclipse.jgit.lib.ObjectId; 23 | import org.eclipse.jgit.lib.Ref; 24 | import org.eclipse.jgit.lib.RefDatabase; 25 | import org.eclipse.jgit.revwalk.RevCommit; 26 | import org.eclipse.jgit.revwalk.RevWalk; 27 | import org.eclipse.jgit.treewalk.TreeWalk; 28 | 29 | import java.io.IOException; 30 | import java.util.Collection; 31 | import java.util.HashMap; 32 | import java.util.Iterator; 33 | import java.util.List; 34 | import java.util.Map; 35 | import java.util.Objects; 36 | import java.util.Optional; 37 | import java.util.function.Function; 38 | 39 | import static com.google.common.base.Preconditions.checkArgument; 40 | 41 | public class TreesRecordCursor 42 | implements RecordCursor 43 | { 44 | private final List columnHandles; 45 | private final Map> intFieldGetters = new HashMap<>(); 46 | private final Map> strFieldGetters = new HashMap<>(); 47 | 48 | private final Git repo; 49 | private final Iterator commits; 50 | private TreeWalk treeWalk; 51 | private RevCommit commit; 52 | 53 | private final Map fileModeNames = Map.of( 54 | FileMode.EXECUTABLE_FILE, "Executable File", 55 | FileMode.REGULAR_FILE, "Normal File", 56 | FileMode.TREE, "Directory", 57 | FileMode.SYMLINK, "Symlink", 58 | FileMode.GITLINK, "Gitlink"); 59 | 60 | public TreesRecordCursor(List columnHandles, Git repo, Optional> commitIds) 61 | { 62 | this.repo = repo; 63 | this.columnHandles = columnHandles; 64 | 65 | Map nameToIndex = new HashMap<>(); 66 | for (int i = 0; i < columnHandles.size(); i++) { 67 | nameToIndex.put(columnHandles.get(i).getColumnName(), i); 68 | } 69 | 70 | if (nameToIndex.containsKey("depth")) { 71 | intFieldGetters.put(nameToIndex.get("depth"), TreeWalk::getDepth); 72 | } 73 | 74 | Map> getters = Map.of( 75 | "commit_id", TreesRecordCursor::getCommitId, 76 | "object_type", TreesRecordCursor::getObjectType, 77 | "object_id", TreesRecordCursor::getObjectId, 78 | "file_name", TreesRecordCursor::getFileName, 79 | "path_name", TreesRecordCursor::getPathName, 80 | "attributes", TreesRecordCursor::getAttributes); 81 | 82 | for (Map.Entry> entry : getters.entrySet()) { 83 | String k = entry.getKey(); 84 | if (nameToIndex.containsKey(k)) { 85 | strFieldGetters.put(nameToIndex.get(k), entry.getValue()); 86 | } 87 | } 88 | 89 | RefDatabase refDb = repo.getRepository().getRefDatabase(); 90 | RevWalk revWalk = new RevWalk(repo.getRepository()); 91 | if (commitIds.isEmpty()) { 92 | try { 93 | Collection allRefs = refDb.getRefs(); 94 | for (Ref ref : allRefs) { 95 | revWalk.markStart(revWalk.parseCommit(ref.getObjectId())); 96 | } 97 | } 98 | catch (IOException ignored) { 99 | // pass 100 | } 101 | commits = revWalk.iterator(); 102 | } 103 | else { 104 | commits = commitIds.get().stream().map(id -> { 105 | try { 106 | return revWalk.parseCommit(ObjectId.fromString(id)); 107 | } 108 | catch (IOException ignored) { 109 | // ignore invalid commits 110 | return null; 111 | } 112 | }).filter(Objects::nonNull).iterator(); 113 | } 114 | } 115 | 116 | private String getFileMode(FileMode fileMode) 117 | { 118 | if (!fileModeNames.containsKey(fileMode)) { 119 | // there are a few others, see FileMode javadoc for details 120 | throw new IllegalArgumentException("Unknown type of file encountered: " + fileMode); 121 | } 122 | return fileModeNames.get(fileMode); 123 | } 124 | 125 | @Override 126 | public long getCompletedBytes() 127 | { 128 | return 0; 129 | } 130 | 131 | @Override 132 | public long getReadTimeNanos() 133 | { 134 | return 0; 135 | } 136 | 137 | @Override 138 | public Type getType(int field) 139 | { 140 | checkArgument(field < columnHandles.size(), "Invalid field index"); 141 | return columnHandles.get(field).getColumnType(); 142 | } 143 | 144 | @Override 145 | public boolean advanceNextPosition() 146 | { 147 | if (commits == null) { 148 | return false; 149 | } 150 | 151 | try { 152 | if (treeWalk == null || !treeWalk.next()) { 153 | if (!commits.hasNext()) { 154 | return false; 155 | } 156 | commit = commits.next(); 157 | treeWalk = new TreeWalk(repo.getRepository()); 158 | treeWalk.addTree(commit.getTree()); 159 | treeWalk.setRecursive(true); 160 | if (!treeWalk.next()) { 161 | return false; 162 | } 163 | } 164 | } 165 | catch (IOException ignores) { 166 | // pass 167 | } 168 | 169 | return true; 170 | } 171 | 172 | @Override 173 | public boolean getBoolean(int field) 174 | { 175 | throw new UnsupportedOperationException(); 176 | } 177 | 178 | @Override 179 | public long getLong(int field) 180 | { 181 | checkArgument(intFieldGetters.containsKey(field), "Invalid field index"); 182 | return intFieldGetters.get(field).apply(treeWalk); 183 | } 184 | 185 | @Override 186 | public double getDouble(int field) 187 | { 188 | throw new UnsupportedOperationException(); 189 | } 190 | 191 | @Override 192 | public Slice getSlice(int field) 193 | { 194 | checkArgument(strFieldGetters.containsKey(field), "Invalid field index"); 195 | return Slices.utf8Slice(strFieldGetters.get(field).apply(this)); 196 | } 197 | 198 | @Override 199 | public Object getObject(int field) 200 | { 201 | throw new UnsupportedOperationException(); 202 | } 203 | 204 | @Override 205 | public boolean isNull(int field) 206 | { 207 | return false; 208 | } 209 | 210 | @Override 211 | public void close() 212 | { 213 | } 214 | 215 | private String getCommitId() 216 | { 217 | return commit.getName(); 218 | } 219 | 220 | private String getObjectType() 221 | { 222 | return getFileMode(treeWalk.getFileMode()); 223 | } 224 | 225 | private String getObjectId() 226 | { 227 | return treeWalk.getObjectId(0).getName(); 228 | } 229 | 230 | private String getFileName() 231 | { 232 | return treeWalk.getNameString(); 233 | } 234 | 235 | private String getPathName() 236 | { 237 | return treeWalk.getPathString(); 238 | } 239 | 240 | private String getAttributes() 241 | { 242 | return treeWalk.getAttributes().toString(); 243 | } 244 | } 245 | -------------------------------------------------------------------------------- /src/test/java/pl/net/was/trino/git/TestGitMetadata.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package pl.net.was.trino.git; 15 | 16 | import io.trino.spi.TrinoException; 17 | import io.trino.spi.connector.ColumnMetadata; 18 | import io.trino.spi.connector.ConnectorTableMetadata; 19 | import io.trino.spi.connector.SchemaNotFoundException; 20 | import io.trino.spi.connector.SchemaTableName; 21 | import io.trino.spi.connector.TableNotFoundException; 22 | import io.trino.spi.type.ArrayType; 23 | import org.eclipse.jgit.api.errors.GitAPIException; 24 | import org.junit.jupiter.api.BeforeEach; 25 | import org.junit.jupiter.api.Test; 26 | import org.junit.jupiter.api.parallel.Execution; 27 | import org.junit.jupiter.api.parallel.ExecutionMode; 28 | 29 | import java.io.IOException; 30 | import java.net.URI; 31 | import java.net.URISyntaxException; 32 | import java.util.List; 33 | import java.util.Map; 34 | import java.util.Optional; 35 | import java.util.OptionalLong; 36 | import java.util.Set; 37 | 38 | import static io.trino.spi.type.TimestampWithTimeZoneType.createTimestampWithTimeZoneType; 39 | import static io.trino.spi.type.VarcharType.createUnboundedVarcharType; 40 | import static io.trino.testing.TestingConnectorSession.SESSION; 41 | import static org.assertj.core.api.Assertions.assertThat; 42 | import static org.assertj.core.api.Assertions.assertThatThrownBy; 43 | import static org.assertj.core.api.Assertions.fail; 44 | 45 | @Execution(ExecutionMode.SAME_THREAD) 46 | public class TestGitMetadata 47 | { 48 | private GitTableHandle commitsTableHandle; 49 | private GitMetadata metadata; 50 | 51 | @BeforeEach 52 | public void setUp() 53 | throws IOException, GitAPIException, URISyntaxException 54 | { 55 | commitsTableHandle = new GitTableHandle("default", "commits", Optional.empty(), OptionalLong.empty()); 56 | 57 | String url = "fake.example"; 58 | TestGitClient.setupRepo(URI.create(url)); 59 | 60 | GitConfig config = new GitConfig(); 61 | config.setUri(new URI(url)); 62 | GitClient client = new GitClient(config); 63 | metadata = new GitMetadata("test", client); 64 | } 65 | 66 | @Test 67 | public void testListSchemaNames() 68 | { 69 | assertThat(metadata.listSchemaNames(SESSION)).containsOnly("default"); 70 | } 71 | 72 | @Test 73 | public void testGetTableHandle() 74 | { 75 | assertThat(metadata.getTableHandle(SESSION, new SchemaTableName("example", "unknown"), Optional.empty(), Optional.empty())).isNull(); 76 | assertThat(metadata.getTableHandle(SESSION, new SchemaTableName("unknown", "numbers"), Optional.empty(), Optional.empty())).isNull(); 77 | assertThat(metadata.getTableHandle(SESSION, new SchemaTableName("unknown", "unknown"), Optional.empty(), Optional.empty())).isNull(); 78 | } 79 | 80 | @Test 81 | public void testGetColumnHandles() 82 | { 83 | // known table 84 | assertThat(metadata.getColumnHandles(SESSION, commitsTableHandle)).isEqualTo(Map.of( 85 | "object_id", new GitColumnHandle("object_id", createUnboundedVarcharType(), 0), 86 | "author_name", new GitColumnHandle("author_name", createUnboundedVarcharType(), 1), 87 | "author_email", new GitColumnHandle("author_email", createUnboundedVarcharType(), 2), 88 | "committer_name", new GitColumnHandle("committer_name", createUnboundedVarcharType(), 3), 89 | "committer_email", new GitColumnHandle("committer_email", createUnboundedVarcharType(), 4), 90 | "message", new GitColumnHandle("message", createUnboundedVarcharType(), 5), 91 | "parents", new GitColumnHandle("parents", new ArrayType(createUnboundedVarcharType()), 6), 92 | "tree_id", new GitColumnHandle("tree_id", createUnboundedVarcharType(), 7), 93 | "commit_time", new GitColumnHandle("commit_time", createTimestampWithTimeZoneType(0), 8))); 94 | 95 | // unknown table 96 | try { 97 | metadata.getColumnHandles(SESSION, new GitTableHandle("unknown", "unknown", Optional.empty(), OptionalLong.empty())); 98 | fail("Expected getColumnHandle of unknown table to throw a TableNotFoundException"); 99 | } 100 | catch (TableNotFoundException expected) { 101 | } 102 | } 103 | 104 | @Test 105 | public void getTableMetadata() 106 | { 107 | // known table 108 | ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(SESSION, commitsTableHandle); 109 | assertThat(tableMetadata.getTable().getSchemaName()).isEqualTo("default"); 110 | assertThat(tableMetadata.getColumns()).isEqualTo(List.of( 111 | new ColumnMetadata("object_id", createUnboundedVarcharType()), 112 | new ColumnMetadata("author_name", createUnboundedVarcharType()), 113 | new ColumnMetadata("author_email", createUnboundedVarcharType()), 114 | new ColumnMetadata("committer_name", createUnboundedVarcharType()), 115 | new ColumnMetadata("committer_email", createUnboundedVarcharType()), 116 | new ColumnMetadata("message", createUnboundedVarcharType()), 117 | new ColumnMetadata("parents", new ArrayType(createUnboundedVarcharType())), 118 | new ColumnMetadata("tree_id", createUnboundedVarcharType()), 119 | new ColumnMetadata("commit_time", createTimestampWithTimeZoneType(0)))); 120 | 121 | // unknown tables should produce null 122 | assertThat(metadata.getTableMetadata(SESSION, new GitTableHandle("unknown", "unknown", Optional.empty(), OptionalLong.empty()))).isNull(); 123 | assertThat(metadata.getTableMetadata(SESSION, new GitTableHandle("example", "unknown", Optional.empty(), OptionalLong.empty()))).isNull(); 124 | assertThat(metadata.getTableMetadata(SESSION, new GitTableHandle("unknown", "numbers", Optional.empty(), OptionalLong.empty()))).isNull(); 125 | } 126 | 127 | @Test 128 | public void testListTables() 129 | { 130 | // all schemas 131 | assertThat(Set.copyOf(metadata.listTables(SESSION, Optional.empty()))).isEqualTo(Set.of( 132 | new SchemaTableName("default", "commits"), 133 | new SchemaTableName("default", "branches"), 134 | new SchemaTableName("default", "diff_stats"), 135 | new SchemaTableName("default", "objects"), 136 | new SchemaTableName("default", "tags"), 137 | new SchemaTableName("default", "trees"))); 138 | 139 | // unknown schema 140 | try { 141 | metadata.listTables(SESSION, Optional.of("unknown")); 142 | fail("Expected listTables of unknown schema to throw a SchemaNotFoundException"); 143 | } 144 | catch (SchemaNotFoundException expected) { 145 | } 146 | } 147 | 148 | @Test 149 | public void getColumnMetadata() 150 | { 151 | ColumnMetadata actualColumn = metadata.getColumnMetadata(SESSION, commitsTableHandle, 152 | new GitColumnHandle("text", createUnboundedVarcharType(), 0)); 153 | assertThat(actualColumn).isEqualTo(new ColumnMetadata("text", createUnboundedVarcharType())); 154 | 155 | // example connector assumes that the table handle and column handle are 156 | // properly formed, so it will return a metadata object for any 157 | // GitTableHandle and GitColumnHandle passed in. This is on because 158 | // it is not possible for the Trino Metadata system to create the handles 159 | // directly. 160 | } 161 | 162 | @Test 163 | public void testDropTableTable() 164 | { 165 | assertThatThrownBy(() -> metadata.dropTable(SESSION, commitsTableHandle)).isInstanceOf(TrinoException.class); 166 | } 167 | } 168 | -------------------------------------------------------------------------------- /src/main/java/pl/net/was/trino/git/GitClient.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package pl.net.was.trino.git; 15 | 16 | import com.google.common.io.Resources; 17 | import com.google.inject.Inject; 18 | import io.trino.spi.connector.ConnectorViewDefinition; 19 | import io.trino.spi.type.ArrayType; 20 | import io.trino.spi.type.BigintType; 21 | import io.trino.spi.type.BooleanType; 22 | import io.trino.spi.type.DoubleType; 23 | import io.trino.spi.type.IntegerType; 24 | import io.trino.spi.type.TimestampWithTimeZoneType; 25 | import io.trino.spi.type.VarbinaryType; 26 | import io.trino.spi.type.VarcharType; 27 | 28 | import java.io.IOException; 29 | import java.util.List; 30 | import java.util.Map; 31 | import java.util.Optional; 32 | import java.util.Set; 33 | import java.util.stream.Collectors; 34 | import java.util.stream.Stream; 35 | 36 | import static java.lang.String.format; 37 | import static java.nio.charset.StandardCharsets.UTF_8; 38 | import static java.util.Objects.requireNonNull; 39 | 40 | public class GitClient 41 | { 42 | public static enum Table { 43 | branches, 44 | commits, 45 | diff_stats, 46 | tags, 47 | trees, 48 | objects, 49 | } 50 | 51 | public static enum TreesColumns { 52 | commit_id, 53 | } 54 | 55 | public static enum View { 56 | idents, 57 | commit_stats, 58 | } 59 | 60 | private final Map> columns = Map.of( 61 | Table.branches.name(), List.of( 62 | new GitColumn("object_id", VarcharType.VARCHAR), 63 | new GitColumn("name", VarcharType.VARCHAR), 64 | new GitColumn("is_merged", BooleanType.BOOLEAN)), 65 | Table.commits.name(), List.of( 66 | new GitColumn("object_id", VarcharType.VARCHAR), 67 | new GitColumn("author_name", VarcharType.VARCHAR), 68 | new GitColumn("author_email", VarcharType.VARCHAR), 69 | new GitColumn("committer_name", VarcharType.VARCHAR), 70 | new GitColumn("committer_email", VarcharType.VARCHAR), 71 | new GitColumn("message", VarcharType.VARCHAR), 72 | new GitColumn("parents", new ArrayType(VarcharType.VARCHAR)), 73 | new GitColumn("tree_id", VarcharType.VARCHAR), 74 | new GitColumn("commit_time", TimestampWithTimeZoneType.TIMESTAMP_TZ_SECONDS)), 75 | Table.diff_stats.name(), List.of( 76 | new GitColumn("commit_id", VarcharType.VARCHAR), 77 | new GitColumn("old_commit_id", VarcharType.VARCHAR), 78 | new GitColumn("object_id", VarcharType.VARCHAR), 79 | new GitColumn("path_name", VarcharType.VARCHAR), 80 | new GitColumn("old_path_name", VarcharType.VARCHAR), 81 | new GitColumn("change_type", VarcharType.VARCHAR), 82 | new GitColumn("similarity_score", IntegerType.INTEGER), 83 | new GitColumn("added_lines", IntegerType.INTEGER), 84 | new GitColumn("deleted_lines", IntegerType.INTEGER)), 85 | Table.tags.name(), List.of( 86 | new GitColumn("object_id", VarcharType.VARCHAR), 87 | new GitColumn("name", VarcharType.VARCHAR), 88 | new GitColumn("tag_time", TimestampWithTimeZoneType.TIMESTAMP_TZ_SECONDS)), 89 | Table.trees.name(), List.of( 90 | new GitColumn(TreesColumns.commit_id.name(), VarcharType.VARCHAR), 91 | new GitColumn("object_type", VarcharType.VARCHAR), 92 | new GitColumn("object_id", VarcharType.VARCHAR), 93 | new GitColumn("file_name", VarcharType.VARCHAR), 94 | new GitColumn("path_name", VarcharType.VARCHAR), 95 | new GitColumn("attributes", VarcharType.VARCHAR), 96 | new GitColumn("depth", IntegerType.INTEGER)), 97 | Table.objects.name(), List.of( 98 | new GitColumn("object_id", VarcharType.VARCHAR), 99 | new GitColumn("contents", VarbinaryType.VARBINARY))); 100 | 101 | Map> viewColumns = Map.of( 102 | View.idents.name(), 103 | List.of( 104 | new ConnectorViewDefinition.ViewColumn("email", VarcharType.VARCHAR.getTypeId(), Optional.empty()), 105 | new ConnectorViewDefinition.ViewColumn("name", VarcharType.VARCHAR.getTypeId(), Optional.empty()), 106 | new ConnectorViewDefinition.ViewColumn("extra_emails", new ArrayType(VarcharType.VARCHAR).getTypeId(), Optional.empty()), 107 | new ConnectorViewDefinition.ViewColumn("extra_names", new ArrayType(VarcharType.VARCHAR).getTypeId(), Optional.empty())), 108 | View.commit_stats.name(), 109 | List.of( 110 | new ConnectorViewDefinition.ViewColumn("object_id", VarcharType.VARCHAR.getTypeId(), Optional.empty()), 111 | new ConnectorViewDefinition.ViewColumn("author_name", VarcharType.VARCHAR.getTypeId(), Optional.empty()), 112 | new ConnectorViewDefinition.ViewColumn("author_email", VarcharType.VARCHAR.getTypeId(), Optional.empty()), 113 | new ConnectorViewDefinition.ViewColumn("committer_name", VarcharType.VARCHAR.getTypeId(), Optional.empty()), 114 | new ConnectorViewDefinition.ViewColumn("committer_email", VarcharType.VARCHAR.getTypeId(), Optional.empty()), 115 | new ConnectorViewDefinition.ViewColumn("message", VarcharType.VARCHAR.getTypeId(), Optional.empty()), 116 | new ConnectorViewDefinition.ViewColumn("parents", new ArrayType(VarcharType.VARCHAR).getTypeId(), Optional.empty()), 117 | new ConnectorViewDefinition.ViewColumn("tree_id", VarcharType.VARCHAR.getTypeId(), Optional.empty()), 118 | new ConnectorViewDefinition.ViewColumn("commit_time", TimestampWithTimeZoneType.TIMESTAMP_TZ_SECONDS.getTypeId(), Optional.empty()), 119 | 120 | new ConnectorViewDefinition.ViewColumn("added_lines", BigintType.BIGINT.getTypeId(), Optional.empty()), 121 | new ConnectorViewDefinition.ViewColumn("deleted_lines", BigintType.BIGINT.getTypeId(), Optional.empty()), 122 | new ConnectorViewDefinition.ViewColumn("changed_files", BigintType.BIGINT.getTypeId(), Optional.empty()), 123 | new ConnectorViewDefinition.ViewColumn("similarity_score", DoubleType.DOUBLE.getTypeId(), Optional.empty()), 124 | new ConnectorViewDefinition.ViewColumn("change_types", new ArrayType(VarcharType.VARCHAR).getTypeId(), Optional.empty()))); 125 | 126 | @Inject 127 | public GitClient(GitConfig config) 128 | { 129 | requireNonNull(config, "config is null"); 130 | } 131 | 132 | public Set getSchemaNames() 133 | { 134 | return Stream.of("default").collect(Collectors.toSet()); 135 | } 136 | 137 | public Set getTableNames(String schema) 138 | { 139 | requireNonNull(schema, "schema is null"); 140 | 141 | return columns.keySet(); 142 | } 143 | 144 | public GitTable getTable(String schema, String tableName) 145 | { 146 | requireNonNull(schema, "schema is null"); 147 | requireNonNull(tableName, "tableName is null"); 148 | 149 | List selected = columns.get(tableName); 150 | if (selected == null) { 151 | return null; 152 | } 153 | return new GitTable(tableName, selected); 154 | } 155 | 156 | public ConnectorViewDefinition getView(String catalog, String schema, String viewName) 157 | { 158 | if (!viewColumns.containsKey(viewName)) { 159 | return null; 160 | } 161 | String query; 162 | try { 163 | query = Resources.toString(Resources.getResource(getClass(), format("/sql/%s.sql", viewName)), UTF_8); 164 | } 165 | catch (IOException e) { 166 | return null; 167 | } 168 | return new ConnectorViewDefinition( 169 | query, 170 | Optional.of(catalog), 171 | Optional.of(schema), 172 | viewColumns.get(viewName), 173 | Optional.empty(), 174 | Optional.empty(), 175 | true, 176 | List.of()); 177 | } 178 | } 179 | -------------------------------------------------------------------------------- /src/main/java/pl/net/was/trino/git/DiffStatsRecordCursor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package pl.net.was.trino.git; 15 | 16 | import io.airlift.slice.Slice; 17 | import io.airlift.slice.Slices; 18 | import io.trino.spi.connector.RecordCursor; 19 | import io.trino.spi.type.Type; 20 | import org.eclipse.jgit.api.Git; 21 | import org.eclipse.jgit.diff.DiffEntry; 22 | import org.eclipse.jgit.diff.DiffFormatter; 23 | import org.eclipse.jgit.diff.Edit; 24 | import org.eclipse.jgit.diff.EditList; 25 | import org.eclipse.jgit.lib.ObjectId; 26 | import org.eclipse.jgit.lib.ObjectReader; 27 | import org.eclipse.jgit.lib.Ref; 28 | import org.eclipse.jgit.lib.RefDatabase; 29 | import org.eclipse.jgit.patch.FileHeader; 30 | import org.eclipse.jgit.revwalk.RevCommit; 31 | import org.eclipse.jgit.revwalk.RevWalk; 32 | import org.eclipse.jgit.treewalk.CanonicalTreeParser; 33 | import org.eclipse.jgit.util.io.NullOutputStream; 34 | 35 | import java.io.IOException; 36 | import java.util.Collection; 37 | import java.util.HashMap; 38 | import java.util.Iterator; 39 | import java.util.List; 40 | import java.util.Map; 41 | import java.util.Objects; 42 | import java.util.Optional; 43 | import java.util.function.Function; 44 | 45 | import static com.google.common.base.Preconditions.checkArgument; 46 | 47 | public class DiffStatsRecordCursor 48 | implements RecordCursor 49 | { 50 | private final List columnHandles; 51 | private final Map> intFieldGetters = new HashMap<>(); 52 | private final Map> strFieldGetters = new HashMap<>(); 53 | 54 | private final ObjectReader reader; 55 | private final DiffFormatter formatter; 56 | private final Iterator commits; 57 | private Iterator entries; 58 | 59 | private RevCommit nextCommit; 60 | private RevCommit currentCommit; 61 | private DiffEntry entry; 62 | private EditList edits; 63 | 64 | private final Map changeTypeNames = Map.of( 65 | DiffEntry.ChangeType.ADD, "Add", 66 | DiffEntry.ChangeType.MODIFY, "Modify", 67 | DiffEntry.ChangeType.DELETE, "Delete", 68 | DiffEntry.ChangeType.RENAME, "Rename", 69 | DiffEntry.ChangeType.COPY, "Copy"); 70 | 71 | public DiffStatsRecordCursor(List columnHandles, Git repo, Optional> commitIds) 72 | { 73 | this.columnHandles = columnHandles; 74 | 75 | Map nameToIndex = new HashMap<>(); 76 | for (int i = 0; i < columnHandles.size(); i++) { 77 | nameToIndex.put(columnHandles.get(i).getColumnName(), i); 78 | } 79 | 80 | Map> intGetters = Map.of( 81 | "similarity_score", c -> c.entry.getScore(), 82 | "added_lines", DiffStatsRecordCursor::getAddedLines, 83 | "deleted_lines", DiffStatsRecordCursor::getDeletedLines); 84 | 85 | for (Map.Entry> entry : intGetters.entrySet()) { 86 | String k = entry.getKey(); 87 | if (nameToIndex.containsKey(k)) { 88 | intFieldGetters.put(nameToIndex.get(k), entry.getValue()); 89 | } 90 | } 91 | 92 | Map> strGetters = Map.of( 93 | "commit_id", DiffStatsRecordCursor::getCommitId, 94 | "old_commit_id", DiffStatsRecordCursor::getOldCommitId, 95 | "object_id", DiffStatsRecordCursor::getObjectId, 96 | "path_name", DiffStatsRecordCursor::getPathName, 97 | "old_path_name", DiffStatsRecordCursor::getOldPathName, 98 | "change_type", c -> getChangeType(c.entry.getChangeType())); 99 | 100 | for (Map.Entry> entry : strGetters.entrySet()) { 101 | String k = entry.getKey(); 102 | if (nameToIndex.containsKey(k)) { 103 | strFieldGetters.put(nameToIndex.get(k), entry.getValue()); 104 | } 105 | } 106 | 107 | RefDatabase refDb = repo.getRepository().getRefDatabase(); 108 | RevWalk revWalk = new RevWalk(repo.getRepository()); 109 | 110 | if (commitIds.isEmpty()) { 111 | try { 112 | Collection allRefs = refDb.getRefs(); 113 | for (Ref ref : allRefs) { 114 | revWalk.markStart(revWalk.parseCommit(ref.getObjectId())); 115 | } 116 | } 117 | catch (IOException ignored) { 118 | // pass 119 | } 120 | commits = revWalk.iterator(); 121 | } 122 | else { 123 | commits = commitIds.get().stream().map(id -> { 124 | try { 125 | return revWalk.parseCommit(ObjectId.fromString(id)); 126 | } 127 | catch (IOException ignored) { 128 | // ignore invalid commits 129 | return null; 130 | } 131 | }).filter(Objects::nonNull).iterator(); 132 | } 133 | 134 | reader = repo.getRepository().newObjectReader(); 135 | formatter = new DiffFormatter(NullOutputStream.INSTANCE); 136 | formatter.setRepository(repo.getRepository()); 137 | formatter.setDetectRenames(true); 138 | } 139 | 140 | @Override 141 | public long getCompletedBytes() 142 | { 143 | return 0; 144 | } 145 | 146 | @Override 147 | public long getReadTimeNanos() 148 | { 149 | return 0; 150 | } 151 | 152 | @Override 153 | public Type getType(int field) 154 | { 155 | checkArgument(field < columnHandles.size(), "Invalid field index"); 156 | return columnHandles.get(field).getColumnType(); 157 | } 158 | 159 | @Override 160 | public boolean advanceNextPosition() 161 | { 162 | if (commits == null) { 163 | return false; 164 | } 165 | 166 | if (entries == null || !entries.hasNext()) { 167 | if (!commits.hasNext()) { 168 | return false; 169 | } 170 | 171 | currentCommit = nextCommit; 172 | nextCommit = commits.next(); 173 | 174 | if (currentCommit == null) { 175 | return advanceNextPosition(); 176 | } 177 | 178 | try { 179 | CanonicalTreeParser oldTreeParser = new CanonicalTreeParser(); 180 | oldTreeParser.reset(reader, currentCommit.getTree().getId()); 181 | CanonicalTreeParser newTreeParser = new CanonicalTreeParser(); 182 | newTreeParser.reset(reader, nextCommit.getTree().getId()); 183 | 184 | // args are reversed since commits are being iterated from latest to oldest 185 | entries = formatter.scan(newTreeParser, oldTreeParser).iterator(); 186 | if (!entries.hasNext()) { 187 | return advanceNextPosition(); 188 | } 189 | } 190 | catch (IOException ignored) { 191 | return false; 192 | } 193 | } 194 | 195 | entry = entries.next(); 196 | 197 | try { 198 | FileHeader header = formatter.toFileHeader(entry); 199 | edits = header.toEditList(); 200 | } 201 | catch (IOException ignored) { 202 | edits = new EditList(); 203 | } 204 | 205 | return true; 206 | } 207 | 208 | @Override 209 | public boolean getBoolean(int field) 210 | { 211 | throw new UnsupportedOperationException(); 212 | } 213 | 214 | @Override 215 | public long getLong(int field) 216 | { 217 | checkArgument(intFieldGetters.containsKey(field), "Invalid field index"); 218 | return intFieldGetters.get(field).apply(this); 219 | } 220 | 221 | @Override 222 | public double getDouble(int field) 223 | { 224 | throw new UnsupportedOperationException(); 225 | } 226 | 227 | @Override 228 | public Slice getSlice(int field) 229 | { 230 | checkArgument(strFieldGetters.containsKey(field), "Invalid field index"); 231 | return Slices.utf8Slice(strFieldGetters.get(field).apply(this)); 232 | } 233 | 234 | @Override 235 | public Object getObject(int field) 236 | { 237 | throw new UnsupportedOperationException(); 238 | } 239 | 240 | @Override 241 | public boolean isNull(int field) 242 | { 243 | return false; 244 | } 245 | 246 | @Override 247 | public void close() 248 | { 249 | } 250 | 251 | private String getChangeType(DiffEntry.ChangeType changeType) 252 | { 253 | if (!changeTypeNames.containsKey(changeType)) { 254 | // there are a few others, see FileMode javadoc for details 255 | throw new IllegalArgumentException("Unknown type of change encountered: " + changeType); 256 | } 257 | return changeTypeNames.get(changeType); 258 | } 259 | 260 | private String getCommitId() 261 | { 262 | return currentCommit.getName(); 263 | } 264 | 265 | private String getOldCommitId() 266 | { 267 | return nextCommit.getName(); 268 | } 269 | 270 | private String getObjectId() 271 | { 272 | return entry.getNewId().name(); 273 | } 274 | 275 | private String getPathName() 276 | { 277 | return entry.getNewPath(); 278 | } 279 | 280 | private String getOldPathName() 281 | { 282 | return entry.getOldPath(); 283 | } 284 | 285 | private int getAddedLines() 286 | { 287 | int result = 0; 288 | for (Edit edit : edits) { 289 | result += edit.getLengthB(); 290 | } 291 | return result; 292 | } 293 | 294 | private int getDeletedLines() 295 | { 296 | int result = 0; 297 | for (Edit edit : edits) { 298 | result += edit.getLengthA(); 299 | } 300 | return result; 301 | } 302 | } 303 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4.0.0 4 | 5 | 6 | io.airlift 7 | airbase 8 | 334 9 | 10 | 11 | pl.net.was 12 | trino-git 13 | 0.102-SNAPSHOT 14 | trino-plugin 15 | Trino git Connector 16 | 17 | 18 | 19 | Apache License 2.0 20 | http://www.apache.org/licenses/LICENSE-2.0 21 | repo 22 | 23 | 24 | 25 | 26 | scm:git:https://github.com/nineinchnick/trino-git.git 27 | HEAD 28 | 29 | 30 | 31 | 32 | github 33 | GitHub nineinchnick Apache Maven Packages 34 | https://maven.pkg.github.com/nineinchnick/trino-git 35 | 36 | 37 | 38 | 39 | 25 40 | 25 41 | 42 | ${project.basedir} 43 | 44 | true 45 | false 46 | 47 | true 48 | false 49 | 4g 50 | 51 | 479 52 | 386 53 | 2.20 54 | 1.5.22 55 | 56 | -missing 57 | github 58 | 59 | 60 | 61 | 62 | 63 | io.opentelemetry 64 | opentelemetry-bom 65 | ${dep.opentelemetry.version} 66 | pom 67 | import 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | com.fasterxml.jackson.core 76 | jackson-databind 77 | ${dep.jackson.version} 78 | 79 | 80 | 81 | com.google.guava 82 | guava 83 | ${dep.guava.version} 84 | 85 | 86 | 87 | com.google.inject 88 | guice 89 | ${dep.guice.version} 90 | classes 91 | 92 | 93 | 94 | io.airlift 95 | bootstrap 96 | ${dep.airlift.version} 97 | 98 | 99 | 100 | io.airlift 101 | configuration 102 | ${dep.airlift.version} 103 | 104 | 105 | 106 | io.airlift 107 | json 108 | ${dep.airlift.version} 109 | 110 | 111 | 112 | jakarta.validation 113 | jakarta.validation-api 114 | 115 | 116 | 117 | org.eclipse.jgit 118 | org.eclipse.jgit 119 | 7.5.0.202512021534-r 120 | 121 | 122 | 123 | com.fasterxml.jackson.core 124 | jackson-annotations 125 | ${dep.jackson-annotations.version} 126 | provided 127 | 128 | 129 | 130 | io.airlift 131 | slice 132 | ${dep.slice.version} 133 | provided 134 | 135 | 136 | 137 | io.opentelemetry 138 | opentelemetry-api 139 | provided 140 | 141 | 142 | 143 | io.opentelemetry 144 | opentelemetry-context 145 | provided 146 | 147 | 148 | 149 | io.trino 150 | trino-spi 151 | ${dep.trino.version} 152 | provided 153 | 154 | 155 | 156 | com.google.errorprone 157 | error_prone_annotations 158 | ${dep.errorprone.version} 159 | runtime 160 | 161 | 162 | 163 | io.airlift 164 | log 165 | ${dep.airlift.version} 166 | runtime 167 | 168 | 169 | 170 | io.airlift 171 | log-manager 172 | ${dep.airlift.version} 173 | runtime 174 | 175 | 176 | 177 | io.airlift 178 | units 179 | 1.12 180 | runtime 181 | 182 | 183 | 184 | io.airlift 185 | testing 186 | ${dep.airlift.version} 187 | test 188 | 189 | 190 | 191 | io.trino 192 | trino-main 193 | ${dep.trino.version} 194 | test 195 | 196 | 197 | 198 | io.trino 199 | trino-memory 200 | ${dep.trino.version} 201 | test 202 | 203 | 204 | 205 | io.trino 206 | trino-testing 207 | ${dep.trino.version} 208 | test 209 | 210 | 211 | com.squareup.okhttp3 212 | okhttp 213 | 214 | 215 | io.trino 216 | trino-exchange-filesystem 217 | 218 | 219 | 220 | 221 | 222 | io.trino 223 | trino-tpch 224 | ${dep.trino.version} 225 | test 226 | 227 | 228 | 229 | org.assertj 230 | assertj-core 231 | ${dep.assertj-core.version} 232 | test 233 | 234 | 235 | 236 | org.codehaus.plexus 237 | plexus-utils 238 | 4.0.2 239 | test 240 | 241 | 242 | 243 | org.junit.jupiter 244 | junit-jupiter 245 | test 246 | 247 | 248 | 249 | org.junit.jupiter 250 | junit-jupiter-api 251 | test 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | org.apache.maven.plugins 260 | maven-deploy-plugin 261 | 3.1.4 262 | 263 | 264 | maven-release-plugin 265 | 3.3.1 266 | 267 | [ci skip] 268 | v@{project.version} 269 | true 270 | 271 | 272 | 273 | org.apache.maven.plugins 274 | maven-scm-plugin 275 | 2.2.1 276 | 277 | true 278 | 279 | 280 | 281 | io.trino 282 | trino-maven-plugin 283 | 17 284 | true 285 | 286 | io.trino.spi.Plugin 287 | io.trino 288 | trino-spi 289 | 290 | 291 | 292 | 293 | ca.vanzyl.provisio.maven.plugins 294 | provisio-maven-plugin 295 | 1.1.1 296 | true 297 | 298 | 299 | 300 | org.apache.maven.plugins 301 | maven-wrapper-plugin 302 | 3.3.4 303 | 304 | 305 | 306 | 307 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /mvnw: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # ---------------------------------------------------------------------------- 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | # ---------------------------------------------------------------------------- 20 | 21 | # ---------------------------------------------------------------------------- 22 | # Apache Maven Wrapper startup batch script, version 3.3.4 23 | # 24 | # Optional ENV vars 25 | # ----------------- 26 | # JAVA_HOME - location of a JDK home dir, required when download maven via java source 27 | # MVNW_REPOURL - repo url base for downloading maven distribution 28 | # MVNW_USERNAME/MVNW_PASSWORD - user and password for downloading maven 29 | # MVNW_VERBOSE - true: enable verbose log; debug: trace the mvnw script; others: silence the output 30 | # ---------------------------------------------------------------------------- 31 | 32 | set -euf 33 | [ "${MVNW_VERBOSE-}" != debug ] || set -x 34 | 35 | # OS specific support. 36 | native_path() { printf %s\\n "$1"; } 37 | case "$(uname)" in 38 | CYGWIN* | MINGW*) 39 | [ -z "${JAVA_HOME-}" ] || JAVA_HOME="$(cygpath --unix "$JAVA_HOME")" 40 | native_path() { cygpath --path --windows "$1"; } 41 | ;; 42 | esac 43 | 44 | # set JAVACMD and JAVACCMD 45 | set_java_home() { 46 | # For Cygwin and MinGW, ensure paths are in Unix format before anything is touched 47 | if [ -n "${JAVA_HOME-}" ]; then 48 | if [ -x "$JAVA_HOME/jre/sh/java" ]; then 49 | # IBM's JDK on AIX uses strange locations for the executables 50 | JAVACMD="$JAVA_HOME/jre/sh/java" 51 | JAVACCMD="$JAVA_HOME/jre/sh/javac" 52 | else 53 | JAVACMD="$JAVA_HOME/bin/java" 54 | JAVACCMD="$JAVA_HOME/bin/javac" 55 | 56 | if [ ! -x "$JAVACMD" ] || [ ! -x "$JAVACCMD" ]; then 57 | echo "The JAVA_HOME environment variable is not defined correctly, so mvnw cannot run." >&2 58 | echo "JAVA_HOME is set to \"$JAVA_HOME\", but \"\$JAVA_HOME/bin/java\" or \"\$JAVA_HOME/bin/javac\" does not exist." >&2 59 | return 1 60 | fi 61 | fi 62 | else 63 | JAVACMD="$( 64 | 'set' +e 65 | 'unset' -f command 2>/dev/null 66 | 'command' -v java 67 | )" || : 68 | JAVACCMD="$( 69 | 'set' +e 70 | 'unset' -f command 2>/dev/null 71 | 'command' -v javac 72 | )" || : 73 | 74 | if [ ! -x "${JAVACMD-}" ] || [ ! -x "${JAVACCMD-}" ]; then 75 | echo "The java/javac command does not exist in PATH nor is JAVA_HOME set, so mvnw cannot run." >&2 76 | return 1 77 | fi 78 | fi 79 | } 80 | 81 | # hash string like Java String::hashCode 82 | hash_string() { 83 | str="${1:-}" h=0 84 | while [ -n "$str" ]; do 85 | char="${str%"${str#?}"}" 86 | h=$(((h * 31 + $(LC_CTYPE=C printf %d "'$char")) % 4294967296)) 87 | str="${str#?}" 88 | done 89 | printf %x\\n $h 90 | } 91 | 92 | verbose() { :; } 93 | [ "${MVNW_VERBOSE-}" != true ] || verbose() { printf %s\\n "${1-}"; } 94 | 95 | die() { 96 | printf %s\\n "$1" >&2 97 | exit 1 98 | } 99 | 100 | trim() { 101 | # MWRAPPER-139: 102 | # Trims trailing and leading whitespace, carriage returns, tabs, and linefeeds. 103 | # Needed for removing poorly interpreted newline sequences when running in more 104 | # exotic environments such as mingw bash on Windows. 105 | printf "%s" "${1}" | tr -d '[:space:]' 106 | } 107 | 108 | scriptDir="$(dirname "$0")" 109 | scriptName="$(basename "$0")" 110 | 111 | # parse distributionUrl and optional distributionSha256Sum, requires .mvn/wrapper/maven-wrapper.properties 112 | while IFS="=" read -r key value; do 113 | case "${key-}" in 114 | distributionUrl) distributionUrl=$(trim "${value-}") ;; 115 | distributionSha256Sum) distributionSha256Sum=$(trim "${value-}") ;; 116 | esac 117 | done <"$scriptDir/.mvn/wrapper/maven-wrapper.properties" 118 | [ -n "${distributionUrl-}" ] || die "cannot read distributionUrl property in $scriptDir/.mvn/wrapper/maven-wrapper.properties" 119 | 120 | case "${distributionUrl##*/}" in 121 | maven-mvnd-*bin.*) 122 | MVN_CMD=mvnd.sh _MVNW_REPO_PATTERN=/maven/mvnd/ 123 | case "${PROCESSOR_ARCHITECTURE-}${PROCESSOR_ARCHITEW6432-}:$(uname -a)" in 124 | *AMD64:CYGWIN* | *AMD64:MINGW*) distributionPlatform=windows-amd64 ;; 125 | :Darwin*x86_64) distributionPlatform=darwin-amd64 ;; 126 | :Darwin*arm64) distributionPlatform=darwin-aarch64 ;; 127 | :Linux*x86_64*) distributionPlatform=linux-amd64 ;; 128 | *) 129 | echo "Cannot detect native platform for mvnd on $(uname)-$(uname -m), use pure java version" >&2 130 | distributionPlatform=linux-amd64 131 | ;; 132 | esac 133 | distributionUrl="${distributionUrl%-bin.*}-$distributionPlatform.zip" 134 | ;; 135 | maven-mvnd-*) MVN_CMD=mvnd.sh _MVNW_REPO_PATTERN=/maven/mvnd/ ;; 136 | *) MVN_CMD="mvn${scriptName#mvnw}" _MVNW_REPO_PATTERN=/org/apache/maven/ ;; 137 | esac 138 | 139 | # apply MVNW_REPOURL and calculate MAVEN_HOME 140 | # maven home pattern: ~/.m2/wrapper/dists/{apache-maven-,maven-mvnd--}/ 141 | [ -z "${MVNW_REPOURL-}" ] || distributionUrl="$MVNW_REPOURL$_MVNW_REPO_PATTERN${distributionUrl#*"$_MVNW_REPO_PATTERN"}" 142 | distributionUrlName="${distributionUrl##*/}" 143 | distributionUrlNameMain="${distributionUrlName%.*}" 144 | distributionUrlNameMain="${distributionUrlNameMain%-bin}" 145 | MAVEN_USER_HOME="${MAVEN_USER_HOME:-${HOME}/.m2}" 146 | MAVEN_HOME="${MAVEN_USER_HOME}/wrapper/dists/${distributionUrlNameMain-}/$(hash_string "$distributionUrl")" 147 | 148 | exec_maven() { 149 | unset MVNW_VERBOSE MVNW_USERNAME MVNW_PASSWORD MVNW_REPOURL || : 150 | exec "$MAVEN_HOME/bin/$MVN_CMD" "$@" || die "cannot exec $MAVEN_HOME/bin/$MVN_CMD" 151 | } 152 | 153 | if [ -d "$MAVEN_HOME" ]; then 154 | verbose "found existing MAVEN_HOME at $MAVEN_HOME" 155 | exec_maven "$@" 156 | fi 157 | 158 | case "${distributionUrl-}" in 159 | *?-bin.zip | *?maven-mvnd-?*-?*.zip) ;; 160 | *) die "distributionUrl is not valid, must match *-bin.zip or maven-mvnd-*.zip, but found '${distributionUrl-}'" ;; 161 | esac 162 | 163 | # prepare tmp dir 164 | if TMP_DOWNLOAD_DIR="$(mktemp -d)" && [ -d "$TMP_DOWNLOAD_DIR" ]; then 165 | clean() { rm -rf -- "$TMP_DOWNLOAD_DIR"; } 166 | trap clean HUP INT TERM EXIT 167 | else 168 | die "cannot create temp dir" 169 | fi 170 | 171 | mkdir -p -- "${MAVEN_HOME%/*}" 172 | 173 | # Download and Install Apache Maven 174 | verbose "Couldn't find MAVEN_HOME, downloading and installing it ..." 175 | verbose "Downloading from: $distributionUrl" 176 | verbose "Downloading to: $TMP_DOWNLOAD_DIR/$distributionUrlName" 177 | 178 | # select .zip or .tar.gz 179 | if ! command -v unzip >/dev/null; then 180 | distributionUrl="${distributionUrl%.zip}.tar.gz" 181 | distributionUrlName="${distributionUrl##*/}" 182 | fi 183 | 184 | # verbose opt 185 | __MVNW_QUIET_WGET=--quiet __MVNW_QUIET_CURL=--silent __MVNW_QUIET_UNZIP=-q __MVNW_QUIET_TAR='' 186 | [ "${MVNW_VERBOSE-}" != true ] || __MVNW_QUIET_WGET='' __MVNW_QUIET_CURL='' __MVNW_QUIET_UNZIP='' __MVNW_QUIET_TAR=v 187 | 188 | # normalize http auth 189 | case "${MVNW_PASSWORD:+has-password}" in 190 | '') MVNW_USERNAME='' MVNW_PASSWORD='' ;; 191 | has-password) [ -n "${MVNW_USERNAME-}" ] || MVNW_USERNAME='' MVNW_PASSWORD='' ;; 192 | esac 193 | 194 | if [ -z "${MVNW_USERNAME-}" ] && command -v wget >/dev/null; then 195 | verbose "Found wget ... using wget" 196 | wget ${__MVNW_QUIET_WGET:+"$__MVNW_QUIET_WGET"} "$distributionUrl" -O "$TMP_DOWNLOAD_DIR/$distributionUrlName" || die "wget: Failed to fetch $distributionUrl" 197 | elif [ -z "${MVNW_USERNAME-}" ] && command -v curl >/dev/null; then 198 | verbose "Found curl ... using curl" 199 | curl ${__MVNW_QUIET_CURL:+"$__MVNW_QUIET_CURL"} -f -L -o "$TMP_DOWNLOAD_DIR/$distributionUrlName" "$distributionUrl" || die "curl: Failed to fetch $distributionUrl" 200 | elif set_java_home; then 201 | verbose "Falling back to use Java to download" 202 | javaSource="$TMP_DOWNLOAD_DIR/Downloader.java" 203 | targetZip="$TMP_DOWNLOAD_DIR/$distributionUrlName" 204 | cat >"$javaSource" <<-END 205 | public class Downloader extends java.net.Authenticator 206 | { 207 | protected java.net.PasswordAuthentication getPasswordAuthentication() 208 | { 209 | return new java.net.PasswordAuthentication( System.getenv( "MVNW_USERNAME" ), System.getenv( "MVNW_PASSWORD" ).toCharArray() ); 210 | } 211 | public static void main( String[] args ) throws Exception 212 | { 213 | setDefault( new Downloader() ); 214 | java.nio.file.Files.copy( java.net.URI.create( args[0] ).toURL().openStream(), java.nio.file.Paths.get( args[1] ).toAbsolutePath().normalize() ); 215 | } 216 | } 217 | END 218 | # For Cygwin/MinGW, switch paths to Windows format before running javac and java 219 | verbose " - Compiling Downloader.java ..." 220 | "$(native_path "$JAVACCMD")" "$(native_path "$javaSource")" || die "Failed to compile Downloader.java" 221 | verbose " - Running Downloader.java ..." 222 | "$(native_path "$JAVACMD")" -cp "$(native_path "$TMP_DOWNLOAD_DIR")" Downloader "$distributionUrl" "$(native_path "$targetZip")" 223 | fi 224 | 225 | # If specified, validate the SHA-256 sum of the Maven distribution zip file 226 | if [ -n "${distributionSha256Sum-}" ]; then 227 | distributionSha256Result=false 228 | if [ "$MVN_CMD" = mvnd.sh ]; then 229 | echo "Checksum validation is not supported for maven-mvnd." >&2 230 | echo "Please disable validation by removing 'distributionSha256Sum' from your maven-wrapper.properties." >&2 231 | exit 1 232 | elif command -v sha256sum >/dev/null; then 233 | if echo "$distributionSha256Sum $TMP_DOWNLOAD_DIR/$distributionUrlName" | sha256sum -c - >/dev/null 2>&1; then 234 | distributionSha256Result=true 235 | fi 236 | elif command -v shasum >/dev/null; then 237 | if echo "$distributionSha256Sum $TMP_DOWNLOAD_DIR/$distributionUrlName" | shasum -a 256 -c >/dev/null 2>&1; then 238 | distributionSha256Result=true 239 | fi 240 | else 241 | echo "Checksum validation was requested but neither 'sha256sum' or 'shasum' are available." >&2 242 | echo "Please install either command, or disable validation by removing 'distributionSha256Sum' from your maven-wrapper.properties." >&2 243 | exit 1 244 | fi 245 | if [ $distributionSha256Result = false ]; then 246 | echo "Error: Failed to validate Maven distribution SHA-256, your Maven distribution might be compromised." >&2 247 | echo "If you updated your Maven version, you need to update the specified distributionSha256Sum property." >&2 248 | exit 1 249 | fi 250 | fi 251 | 252 | # unzip and move 253 | if command -v unzip >/dev/null; then 254 | unzip ${__MVNW_QUIET_UNZIP:+"$__MVNW_QUIET_UNZIP"} "$TMP_DOWNLOAD_DIR/$distributionUrlName" -d "$TMP_DOWNLOAD_DIR" || die "failed to unzip" 255 | else 256 | tar xzf${__MVNW_QUIET_TAR:+"$__MVNW_QUIET_TAR"} "$TMP_DOWNLOAD_DIR/$distributionUrlName" -C "$TMP_DOWNLOAD_DIR" || die "failed to untar" 257 | fi 258 | 259 | # Find the actual extracted directory name (handles snapshots where filename != directory name) 260 | actualDistributionDir="" 261 | 262 | # First try the expected directory name (for regular distributions) 263 | if [ -d "$TMP_DOWNLOAD_DIR/$distributionUrlNameMain" ]; then 264 | if [ -f "$TMP_DOWNLOAD_DIR/$distributionUrlNameMain/bin/$MVN_CMD" ]; then 265 | actualDistributionDir="$distributionUrlNameMain" 266 | fi 267 | fi 268 | 269 | # If not found, search for any directory with the Maven executable (for snapshots) 270 | if [ -z "$actualDistributionDir" ]; then 271 | # enable globbing to iterate over items 272 | set +f 273 | for dir in "$TMP_DOWNLOAD_DIR"/*; do 274 | if [ -d "$dir" ]; then 275 | if [ -f "$dir/bin/$MVN_CMD" ]; then 276 | actualDistributionDir="$(basename "$dir")" 277 | break 278 | fi 279 | fi 280 | done 281 | set -f 282 | fi 283 | 284 | if [ -z "$actualDistributionDir" ]; then 285 | verbose "Contents of $TMP_DOWNLOAD_DIR:" 286 | verbose "$(ls -la "$TMP_DOWNLOAD_DIR")" 287 | die "Could not find Maven distribution directory in extracted archive" 288 | fi 289 | 290 | verbose "Found extracted Maven distribution directory: $actualDistributionDir" 291 | printf %s\\n "$distributionUrl" >"$TMP_DOWNLOAD_DIR/$actualDistributionDir/mvnw.url" 292 | mv -- "$TMP_DOWNLOAD_DIR/$actualDistributionDir" "$MAVEN_HOME" || [ -d "$MAVEN_HOME" ] || die "fail to move MAVEN_HOME" 293 | 294 | clean || : 295 | exec_maven "$@" 296 | -------------------------------------------------------------------------------- /src/test/java/pl/net/was/trino/git/TestGitRecordSet.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package pl.net.was.trino.git; 15 | 16 | import io.trino.spi.connector.RecordCursor; 17 | import io.trino.spi.connector.RecordSet; 18 | import io.trino.spi.type.IntegerType; 19 | import io.trino.spi.type.TimestampWithTimeZoneType; 20 | import io.trino.spi.type.VarbinaryType; 21 | import org.eclipse.jgit.api.errors.GitAPIException; 22 | import org.junit.jupiter.api.BeforeAll; 23 | import org.junit.jupiter.api.Test; 24 | 25 | import java.io.IOException; 26 | import java.net.URI; 27 | import java.util.LinkedHashMap; 28 | import java.util.List; 29 | import java.util.Map; 30 | import java.util.Optional; 31 | import java.util.OptionalLong; 32 | 33 | import static io.trino.spi.type.VarcharType.createUnboundedVarcharType; 34 | import static java.util.Arrays.asList; 35 | import static org.assertj.core.api.Assertions.assertThat; 36 | 37 | public class TestGitRecordSet 38 | { 39 | private static final URI uri = URI.create("fake.invalid"); 40 | 41 | @BeforeAll 42 | public static void setUp() 43 | throws IOException, GitAPIException 44 | { 45 | TestGitClient.setupRepo(uri); 46 | } 47 | 48 | @Test 49 | public void testGetColumnTypes() 50 | { 51 | GitSplit split = new GitSplit("commits", uri, Optional.empty()); 52 | GitTableHandle table = new GitTableHandle("default", "commits", Optional.empty(), OptionalLong.empty()); 53 | RecordSet recordSet = new GitRecordSet(split, table, List.of( 54 | new GitColumnHandle("object_id", createUnboundedVarcharType(), 0), 55 | new GitColumnHandle("author_name", createUnboundedVarcharType(), 1))); 56 | assertThat(recordSet.getColumnTypes()).isEqualTo(List.of(createUnboundedVarcharType(), createUnboundedVarcharType())); 57 | 58 | recordSet = new GitRecordSet(split, table, List.of( 59 | new GitColumnHandle("object_id", createUnboundedVarcharType(), 1), 60 | new GitColumnHandle("author_name", createUnboundedVarcharType(), 0))); 61 | assertThat(recordSet.getColumnTypes()).isEqualTo(List.of(createUnboundedVarcharType(), createUnboundedVarcharType())); 62 | 63 | recordSet = new GitRecordSet(split, table, List.of( 64 | new GitColumnHandle("object_id", createUnboundedVarcharType(), 1), 65 | new GitColumnHandle("author_name", createUnboundedVarcharType(), 1), 66 | new GitColumnHandle("author_email", createUnboundedVarcharType(), 0))); 67 | assertThat(recordSet.getColumnTypes()).isEqualTo(List.of(createUnboundedVarcharType(), createUnboundedVarcharType(), createUnboundedVarcharType())); 68 | 69 | recordSet = new GitRecordSet(split, table, List.of()); 70 | assertThat(recordSet.getColumnTypes()).isEmpty(); 71 | } 72 | 73 | @Test 74 | public void testCommitsCursorSimple() 75 | { 76 | GitSplit split = new GitSplit("commits", uri, Optional.empty()); 77 | GitTableHandle table = new GitTableHandle("default", "commits", Optional.empty(), OptionalLong.empty()); 78 | RecordSet recordSet = new GitRecordSet(split, table, List.of( 79 | new GitColumnHandle("object_id", createUnboundedVarcharType(), 0), 80 | new GitColumnHandle("author_name", createUnboundedVarcharType(), 1), 81 | new GitColumnHandle("commit_time", TimestampWithTimeZoneType.TIMESTAMP_TZ_SECONDS, 2))); 82 | try (RecordCursor cursor = recordSet.cursor()) { 83 | assertThat(cursor.getType(0)).isEqualTo(createUnboundedVarcharType()); 84 | assertThat(cursor.getType(1)).isEqualTo(createUnboundedVarcharType()); 85 | 86 | Map> data = new LinkedHashMap<>(); 87 | while (cursor.advanceNextPosition()) { 88 | data.put(cursor.getSlice(0).toStringUtf8(), List.of( 89 | cursor.getSlice(1).toStringUtf8(), 90 | cursor.getLong(2))); 91 | } 92 | assertThat(data).isEqualTo(Map.of( 93 | "080dfdf0aac7d302dc31d57f62942bb6533944f7", List.of("test", 6475355394048000L), 94 | "c3b14e59f88d0d6597b98ee93cf61e7556d540a4", List.of("test", 6475355394048000L))); 95 | } 96 | } 97 | 98 | @Test 99 | public void testBranchesCursorSimple() 100 | { 101 | GitSplit split = new GitSplit("branches", uri, Optional.empty()); 102 | GitTableHandle table = new GitTableHandle("default", "branches", Optional.empty(), OptionalLong.empty()); 103 | RecordSet recordSet = new GitRecordSet(split, table, List.of( 104 | new GitColumnHandle("object_id", createUnboundedVarcharType(), 0), 105 | new GitColumnHandle("name", createUnboundedVarcharType(), 1))); 106 | try (RecordCursor cursor = recordSet.cursor()) { 107 | assertThat(cursor.getType(0)).isEqualTo(createUnboundedVarcharType()); 108 | assertThat(cursor.getType(1)).isEqualTo(createUnboundedVarcharType()); 109 | 110 | Map data = new LinkedHashMap<>(); 111 | while (cursor.advanceNextPosition()) { 112 | assertThat(cursor.isNull(0)).isFalse(); 113 | assertThat(cursor.isNull(1)).isFalse(); 114 | data.put(cursor.getSlice(0).toStringUtf8(), cursor.getSlice(1).toStringUtf8()); 115 | } 116 | assertThat(data).isEqualTo(Map.of("c3b14e59f88d0d6597b98ee93cf61e7556d540a4", "refs/heads/master")); 117 | } 118 | } 119 | 120 | @Test 121 | public void testTagsCursorSimple() 122 | { 123 | GitSplit split = new GitSplit("tags", uri, Optional.empty()); 124 | GitTableHandle table = new GitTableHandle("default", "tags", Optional.empty(), OptionalLong.empty()); 125 | RecordSet recordSet = new GitRecordSet(split, table, List.of( 126 | new GitColumnHandle("object_id", createUnboundedVarcharType(), 0), 127 | new GitColumnHandle("name", createUnboundedVarcharType(), 1))); 128 | try (RecordCursor cursor = recordSet.cursor()) { 129 | assertThat(cursor.getType(0)).isEqualTo(createUnboundedVarcharType()); 130 | assertThat(cursor.getType(1)).isEqualTo(createUnboundedVarcharType()); 131 | 132 | Map data = new LinkedHashMap<>(); 133 | while (cursor.advanceNextPosition()) { 134 | assertThat(cursor.isNull(0)).isFalse(); 135 | assertThat(cursor.isNull(1)).isFalse(); 136 | data.put(cursor.getSlice(0).toStringUtf8(), cursor.getSlice(1).toStringUtf8()); 137 | } 138 | assertThat(data).isEqualTo(Map.of( 139 | "7afcc1aaeab61c3fd7f2b1b5df5178a823cbf77e", "refs/tags/tag_for_testing", 140 | "c3b14e59f88d0d6597b98ee93cf61e7556d540a4", "refs/tags/unannotated_tag_for_testing")); 141 | } 142 | } 143 | 144 | @Test 145 | public void testTagsCursorWithTagTime() 146 | { 147 | GitSplit split = new GitSplit("tags", uri, Optional.empty()); 148 | GitTableHandle table = new GitTableHandle("default", "tags", Optional.empty(), OptionalLong.empty()); 149 | RecordSet recordSet = new GitRecordSet(split, table, List.of( 150 | new GitColumnHandle("object_id", createUnboundedVarcharType(), 0), 151 | new GitColumnHandle("name", createUnboundedVarcharType(), 1), 152 | new GitColumnHandle("tag_time", TimestampWithTimeZoneType.TIMESTAMP_TZ_SECONDS, 2))); 153 | try (RecordCursor cursor = recordSet.cursor()) { 154 | assertThat(cursor.getType(0)).isEqualTo(createUnboundedVarcharType()); 155 | assertThat(cursor.getType(1)).isEqualTo(createUnboundedVarcharType()); 156 | assertThat(cursor.getType(2)).isEqualTo(TimestampWithTimeZoneType.TIMESTAMP_TZ_SECONDS); 157 | 158 | Map> data = new LinkedHashMap<>(); 159 | while (cursor.advanceNextPosition()) { 160 | assertThat(cursor.isNull(0)).isFalse(); 161 | assertThat(cursor.isNull(1)).isFalse(); 162 | data.put(cursor.getSlice(0).toStringUtf8(), asList( 163 | cursor.getSlice(1).toStringUtf8(), 164 | cursor.isNull(2) ? null : cursor.getLong(2))); 165 | } 166 | assertThat(data).isEqualTo(Map.of( 167 | "7afcc1aaeab61c3fd7f2b1b5df5178a823cbf77e", asList("refs/tags/tag_for_testing", 6475355394048000L), 168 | "c3b14e59f88d0d6597b98ee93cf61e7556d540a4", asList("refs/tags/unannotated_tag_for_testing", null))); 169 | } 170 | } 171 | 172 | @Test 173 | public void testTreesCursorSimple() 174 | { 175 | GitSplit split = new GitSplit("trees", uri, Optional.empty()); 176 | GitTableHandle table = new GitTableHandle("default", "trees", Optional.empty(), OptionalLong.empty()); 177 | RecordSet recordSet = new GitRecordSet(split, table, List.of( 178 | new GitColumnHandle("commit_id", createUnboundedVarcharType(), 0), 179 | new GitColumnHandle("object_id", createUnboundedVarcharType(), 1))); 180 | try (RecordCursor cursor = recordSet.cursor()) { 181 | assertThat(cursor.getType(0)).isEqualTo(createUnboundedVarcharType()); 182 | assertThat(cursor.getType(1)).isEqualTo(createUnboundedVarcharType()); 183 | 184 | Map data = new LinkedHashMap<>(); 185 | while (cursor.advanceNextPosition()) { 186 | assertThat(cursor.isNull(0)).isFalse(); 187 | assertThat(cursor.isNull(1)).isFalse(); 188 | data.put(cursor.getSlice(0).toStringUtf8(), cursor.getSlice(1).toStringUtf8()); 189 | } 190 | assertThat(data).isEqualTo(Map.of( 191 | "080dfdf0aac7d302dc31d57f62942bb6533944f7", "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391", 192 | "c3b14e59f88d0d6597b98ee93cf61e7556d540a4", "5dd01c177f5d7d1be5346a5bc18a569a7410c2ef")); 193 | } 194 | } 195 | 196 | @Test 197 | public void testObjectsCursorSimple() 198 | { 199 | GitSplit split = new GitSplit("objects", uri, Optional.empty()); 200 | GitTableHandle table = new GitTableHandle("default", "objects", Optional.empty(), OptionalLong.empty()); 201 | RecordSet recordSet = new GitRecordSet(split, table, List.of( 202 | new GitColumnHandle("object_id", createUnboundedVarcharType(), 0), 203 | new GitColumnHandle("contents", VarbinaryType.VARBINARY, 1))); 204 | try (RecordCursor cursor = recordSet.cursor()) { 205 | assertThat(cursor.getType(0)).isEqualTo(createUnboundedVarcharType()); 206 | assertThat(cursor.getType(1)).isEqualTo(VarbinaryType.VARBINARY); 207 | 208 | Map data = new LinkedHashMap<>(); 209 | while (cursor.advanceNextPosition()) { 210 | assertThat(cursor.isNull(0)).isFalse(); 211 | assertThat(cursor.isNull(1)).isFalse(); 212 | String objectId = cursor.getSlice(0).toStringUtf8(); 213 | String contents = cursor.getSlice(1).toStringUtf8(); 214 | data.put(objectId, contents); 215 | } 216 | assertThat(data).isEqualTo(Map.of( 217 | "5dd01c177f5d7d1be5346a5bc18a569a7410c2ef", "Hello, world!", 218 | "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391", "")); 219 | } 220 | } 221 | 222 | @Test 223 | public void testDiffStatsCursorSimple() 224 | { 225 | GitSplit split = new GitSplit("diff_stats", uri, Optional.empty()); 226 | GitTableHandle table = new GitTableHandle("default", "diff_stats", Optional.empty(), OptionalLong.empty()); 227 | RecordSet recordSet = new GitRecordSet(split, table, List.of( 228 | new GitColumnHandle("commit_id", createUnboundedVarcharType(), 0), 229 | new GitColumnHandle("old_commit_id", createUnboundedVarcharType(), 1), 230 | new GitColumnHandle("added_lines", IntegerType.INTEGER, 2), 231 | new GitColumnHandle("deleted_lines", IntegerType.INTEGER, 3))); 232 | try (RecordCursor cursor = recordSet.cursor()) { 233 | assertThat(cursor.getType(0)).isEqualTo(createUnboundedVarcharType()); 234 | assertThat(cursor.getType(1)).isEqualTo(createUnboundedVarcharType()); 235 | assertThat(cursor.getType(2)).isEqualTo(IntegerType.INTEGER); 236 | assertThat(cursor.getType(3)).isEqualTo(IntegerType.INTEGER); 237 | 238 | Map> data = new LinkedHashMap<>(); 239 | while (cursor.advanceNextPosition()) { 240 | assertThat(cursor.isNull(0)).isFalse(); 241 | assertThat(cursor.isNull(1)).isFalse(); 242 | assertThat(cursor.isNull(2)).isFalse(); 243 | assertThat(cursor.isNull(3)).isFalse(); 244 | data.put( 245 | cursor.getSlice(0).toStringUtf8(), 246 | List.of( 247 | cursor.getSlice(1).toStringUtf8(), 248 | cursor.getLong(2), 249 | cursor.getLong(3))); 250 | } 251 | assertThat(data).isEqualTo(Map.of( 252 | "c3b14e59f88d0d6597b98ee93cf61e7556d540a4", 253 | List.of( 254 | "080dfdf0aac7d302dc31d57f62942bb6533944f7", 255 | 1L, 256 | 0L))); 257 | } 258 | } 259 | } 260 | -------------------------------------------------------------------------------- /src/main/java/pl/net/was/trino/git/GitMetadata.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package pl.net.was.trino.git; 15 | 16 | import com.google.common.collect.ImmutableList; 17 | import com.google.common.collect.ImmutableMap; 18 | import com.google.inject.BindingAnnotation; 19 | import com.google.inject.Inject; 20 | import io.airlift.slice.Slice; 21 | import io.trino.spi.StandardErrorCode; 22 | import io.trino.spi.TrinoException; 23 | import io.trino.spi.connector.ColumnHandle; 24 | import io.trino.spi.connector.ColumnMetadata; 25 | import io.trino.spi.connector.ConnectorMetadata; 26 | import io.trino.spi.connector.ConnectorSession; 27 | import io.trino.spi.connector.ConnectorTableHandle; 28 | import io.trino.spi.connector.ConnectorTableMetadata; 29 | import io.trino.spi.connector.ConnectorTableProperties; 30 | import io.trino.spi.connector.ConnectorTableVersion; 31 | import io.trino.spi.connector.ConnectorViewDefinition; 32 | import io.trino.spi.connector.Constraint; 33 | import io.trino.spi.connector.ConstraintApplicationResult; 34 | import io.trino.spi.connector.SchemaNotFoundException; 35 | import io.trino.spi.connector.SchemaTableName; 36 | import io.trino.spi.connector.SchemaTablePrefix; 37 | import io.trino.spi.connector.TableNotFoundException; 38 | import io.trino.spi.predicate.DiscreteValues; 39 | import io.trino.spi.predicate.Domain; 40 | import io.trino.spi.predicate.EquatableValueSet; 41 | import io.trino.spi.predicate.Range; 42 | import io.trino.spi.predicate.Ranges; 43 | import io.trino.spi.predicate.SortedRangeSet; 44 | import io.trino.spi.predicate.TupleDomain; 45 | import io.trino.spi.predicate.ValueSet; 46 | import io.trino.spi.statistics.ColumnStatistics; 47 | import io.trino.spi.statistics.Estimate; 48 | import io.trino.spi.statistics.TableStatistics; 49 | 50 | import java.lang.annotation.ElementType; 51 | import java.lang.annotation.Retention; 52 | import java.lang.annotation.RetentionPolicy; 53 | import java.lang.annotation.Target; 54 | import java.util.List; 55 | import java.util.Map; 56 | import java.util.Optional; 57 | import java.util.OptionalLong; 58 | import java.util.Set; 59 | import java.util.stream.Collectors; 60 | 61 | import static com.google.common.base.Verify.verify; 62 | import static com.google.common.collect.ImmutableList.toImmutableList; 63 | import static java.util.Objects.requireNonNull; 64 | 65 | public class GitMetadata 66 | implements ConnectorMetadata 67 | { 68 | private final GitClient gitClient; 69 | private final String catalogName; 70 | 71 | @Inject 72 | public GitMetadata(@CatalogName String catalogName, GitClient gitClient) 73 | { 74 | this.catalogName = requireNonNull(catalogName, "catalogName is null"); 75 | this.gitClient = requireNonNull(gitClient, "client is null"); 76 | } 77 | 78 | @Retention(RetentionPolicy.RUNTIME) 79 | @Target({ElementType.FIELD, ElementType.PARAMETER}) 80 | @BindingAnnotation 81 | public @interface CatalogName {} 82 | 83 | @Override 84 | public List listSchemaNames(ConnectorSession session) 85 | { 86 | return listSchemaNames(); 87 | } 88 | 89 | public List listSchemaNames() 90 | { 91 | return List.copyOf(gitClient.getSchemaNames()); 92 | } 93 | 94 | @Override 95 | public GitTableHandle getTableHandle(ConnectorSession session, SchemaTableName tableName, Optional startVersion, Optional endVersion) 96 | { 97 | if (startVersion.isPresent() || endVersion.isPresent()) { 98 | throw new TrinoException(StandardErrorCode.NOT_SUPPORTED, "This connector does not support versioned tables"); 99 | } 100 | if (!listSchemaNames(session).contains(tableName.getSchemaName())) { 101 | return null; 102 | } 103 | 104 | GitTable table = gitClient.getTable(tableName.getSchemaName(), tableName.getTableName()); 105 | if (table == null) { 106 | return null; 107 | } 108 | 109 | return new GitTableHandle(tableName.getSchemaName(), tableName.getTableName(), Optional.empty(), OptionalLong.empty()); 110 | } 111 | 112 | @Override 113 | public ConnectorTableMetadata getTableMetadata(ConnectorSession session, ConnectorTableHandle table) 114 | { 115 | return getTableMetadata(((GitTableHandle) table).toSchemaTableName()); 116 | } 117 | 118 | @Override 119 | public List listTables(ConnectorSession session, Optional optionalSchemaName) 120 | { 121 | if (optionalSchemaName.isPresent() && !gitClient.getSchemaNames().contains(optionalSchemaName.get())) { 122 | throw new SchemaNotFoundException(optionalSchemaName.get()); 123 | } 124 | Set schemaNames = optionalSchemaName.map(Set::of) 125 | .orElseGet(() -> Set.copyOf(gitClient.getSchemaNames())); 126 | 127 | ImmutableList.Builder builder = ImmutableList.builder(); 128 | for (String schemaName : schemaNames) { 129 | for (String tableName : gitClient.getTableNames(schemaName)) { 130 | builder.add(new SchemaTableName(schemaName, tableName)); 131 | } 132 | } 133 | return builder.build(); 134 | } 135 | 136 | @Override 137 | public Map getColumnHandles(ConnectorSession session, ConnectorTableHandle tableHandle) 138 | { 139 | GitTableHandle gitTableHandle = (GitTableHandle) tableHandle; 140 | 141 | GitTable table = gitClient.getTable(gitTableHandle.getSchemaName(), gitTableHandle.getTableName()); 142 | if (table == null) { 143 | throw new TableNotFoundException(gitTableHandle.toSchemaTableName()); 144 | } 145 | 146 | ImmutableMap.Builder columnHandles = ImmutableMap.builder(); 147 | int index = 0; 148 | for (ColumnMetadata column : table.getColumnsMetadata()) { 149 | columnHandles.put(column.getName(), new GitColumnHandle(column.getName(), column.getType(), index)); 150 | index++; 151 | } 152 | return columnHandles.build(); 153 | } 154 | 155 | @Override 156 | public Map> listTableColumns(ConnectorSession session, SchemaTablePrefix prefix) 157 | { 158 | requireNonNull(prefix, "prefix is null"); 159 | ImmutableMap.Builder> columns = ImmutableMap.builder(); 160 | for (SchemaTableName tableName : listTables(session, prefix)) { 161 | ConnectorTableMetadata tableMetadata = getTableMetadata(tableName); 162 | // table can disappear during listing operation 163 | if (tableMetadata != null) { 164 | columns.put(tableName, tableMetadata.getColumns()); 165 | } 166 | } 167 | return columns.build(); 168 | } 169 | 170 | private ConnectorTableMetadata getTableMetadata(SchemaTableName tableName) 171 | { 172 | if (!listSchemaNames().contains(tableName.getSchemaName())) { 173 | return null; 174 | } 175 | 176 | GitTable table = gitClient.getTable(tableName.getSchemaName(), tableName.getTableName()); 177 | if (table == null) { 178 | return null; 179 | } 180 | 181 | return new ConnectorTableMetadata(tableName, table.getColumnsMetadata()); 182 | } 183 | 184 | private List listTables(ConnectorSession session, SchemaTablePrefix prefix) 185 | { 186 | if (prefix.getTable().isEmpty()) { 187 | return listTables(session, prefix.getSchema()); 188 | } 189 | return List.of(prefix.toSchemaTableName()); 190 | } 191 | 192 | @Override 193 | public ColumnMetadata getColumnMetadata(ConnectorSession session, ConnectorTableHandle tableHandle, ColumnHandle columnHandle) 194 | { 195 | return ((GitColumnHandle) columnHandle).getColumnMetadata(); 196 | } 197 | 198 | @Override 199 | public ConnectorTableProperties getTableProperties(ConnectorSession session, ConnectorTableHandle table) 200 | { 201 | return new ConnectorTableProperties(); 202 | } 203 | 204 | @Override 205 | public List listViews(ConnectorSession session, Optional optionalSchemaName) 206 | { 207 | if (optionalSchemaName.isPresent() && !gitClient.getSchemaNames().contains(optionalSchemaName.get())) { 208 | throw new SchemaNotFoundException(optionalSchemaName.get()); 209 | } 210 | Set schemaNames = optionalSchemaName.map(Set::of) 211 | .orElseGet(() -> Set.copyOf(gitClient.getSchemaNames())); 212 | 213 | ImmutableList.Builder builder = ImmutableList.builder(); 214 | for (String schemaName : schemaNames) { 215 | for (String tableName : gitClient.viewColumns.keySet()) { 216 | builder.add(new SchemaTableName(schemaName, tableName)); 217 | } 218 | } 219 | return builder.build(); 220 | } 221 | 222 | @Override 223 | public Optional getView(ConnectorSession session, SchemaTableName viewName) 224 | { 225 | return Optional.ofNullable(gitClient.getView(catalogName, viewName.getSchemaName(), viewName.getTableName())); 226 | } 227 | 228 | @Override 229 | public TableStatistics getTableStatistics(ConnectorSession session, ConnectorTableHandle tableHandle) 230 | { 231 | GitTableHandle gitTableHandle = (GitTableHandle) tableHandle; 232 | String tableName = gitTableHandle.getTableName(); 233 | 234 | Map columns = getColumnHandles(session, tableHandle); 235 | TableStatistics.Builder builder = TableStatistics.builder(); 236 | switch (tableName) { 237 | case "commits": 238 | builder.setRowCount(Estimate.of(1)); 239 | builder.setColumnStatistics(columns.get("object_id"), ColumnStatistics.builder() 240 | .setNullsFraction(Estimate.zero()) 241 | .setDistinctValuesCount(Estimate.of(1)) 242 | .setDataSize(Estimate.of(1000)) 243 | .build()); 244 | break; 245 | case "trees": 246 | builder.setRowCount(Estimate.of(1000000)); 247 | builder.setColumnStatistics(columns.get("commit_id"), ColumnStatistics.builder() 248 | .setNullsFraction(Estimate.zero()) 249 | .setDistinctValuesCount(Estimate.of(1000000)) 250 | .setDataSize(Estimate.of(1000000000)) 251 | .build()); 252 | break; 253 | } 254 | return builder.build(); 255 | } 256 | 257 | @Override 258 | public Optional> applyFilter(ConnectorSession session, ConnectorTableHandle table, Constraint constraint) 259 | { 260 | GitTableHandle handle = (GitTableHandle) table; 261 | 262 | Optional> oldCommits = handle.getCommitIds(); 263 | 264 | Optional> commitIds = Optional.empty(); 265 | TupleDomain unenforcedConstraint = constraint.getSummary(); 266 | 267 | Map columns = getColumnHandles(session, handle); 268 | 269 | if (handle.getTableName().equals(GitClient.Table.trees.name())) { 270 | commitIds = getCommitIds(constraint.getSummary()); 271 | unenforcedConstraint = constraint.getSummary().filter( 272 | (columnHandle, domain) -> !columnHandle.equals(columns.get("commit_id"))); 273 | } 274 | else if (handle.getTableName().equals(GitClient.Table.commits.name())) { 275 | // TODO merge both conditions, mapping table name to column (FK?) 276 | commitIds = getCommitIds(constraint.getSummary()); 277 | unenforcedConstraint = constraint.getSummary().filter( 278 | (columnHandle, domain) -> !columnHandle.equals(columns.get("object_id"))); 279 | } 280 | 281 | if ((oldCommits.isEmpty() && commitIds.isEmpty()) || 282 | (oldCommits.isPresent() && commitIds.isPresent() && 283 | oldCommits.get().size() == commitIds.get().size() && oldCommits.get().containsAll(commitIds.get()))) { 284 | return Optional.empty(); 285 | } 286 | if (oldCommits.isEmpty()) { 287 | oldCommits = commitIds; 288 | } 289 | else if (commitIds.isPresent()) { 290 | oldCommits.get().addAll(commitIds.get()); 291 | } 292 | 293 | return Optional.of(new ConstraintApplicationResult<>( 294 | new GitTableHandle( 295 | handle.getSchemaName(), 296 | handle.getTableName(), 297 | oldCommits, 298 | handle.getLimit()), 299 | unenforcedConstraint, 300 | constraint.getExpression(), 301 | true)); 302 | } 303 | 304 | public static Optional> getCommitIds(TupleDomain constraintSummary) 305 | { 306 | if (constraintSummary.isNone() || constraintSummary.isAll()) { 307 | return Optional.empty(); 308 | } 309 | 310 | for (Map.Entry entry : constraintSummary.getDomains().get().entrySet()) { 311 | System.out.println(entry.getKey() + "/" + entry.getValue()); 312 | GitColumnHandle column = ((GitColumnHandle) entry.getKey()); 313 | // TODO this is ambiguous and should be passed as a param, since every call to getCommitIds has a table handle in scope 314 | if (!column.getColumnName().equals("commit_id") && !column.getColumnName().equals("object_id")) { 315 | continue; 316 | } 317 | Domain domain = entry.getValue(); 318 | verify(!domain.isNone(), "Domain is none"); 319 | if (domain.isAll()) { 320 | continue; 321 | } 322 | if (domain.isOnlyNull()) { 323 | return Optional.of(List.of()); 324 | } 325 | if ((!domain.getValues().isNone() && domain.isNullAllowed()) || (domain.getValues().isAll() && !domain.isNullAllowed())) { 326 | continue; 327 | } 328 | if (domain.isSingleValue()) { 329 | String value = ((Slice) domain.getSingleValue()).toStringUtf8(); 330 | return Optional.of(List.of(value)); 331 | } 332 | ValueSet valueSet = domain.getValues(); 333 | if (valueSet instanceof EquatableValueSet) { 334 | DiscreteValues discreteValues = valueSet.getDiscreteValues(); 335 | return Optional.of(discreteValues.getValues().stream().map(value -> ((Slice) value).toStringUtf8()).collect(Collectors.toList())); 336 | } 337 | if (valueSet instanceof SortedRangeSet) { 338 | Ranges ranges = ((SortedRangeSet) valueSet).getRanges(); 339 | List rangeList = ranges.getOrderedRanges(); 340 | if (rangeList.stream().allMatch(Range::isSingleValue)) { 341 | List values = rangeList.stream() 342 | .map(range -> ((Slice) range.getSingleValue()).toStringUtf8()) 343 | .collect(toImmutableList()); 344 | return Optional.of(values); 345 | } 346 | // ignore unbounded ranges 347 | return Optional.empty(); 348 | } 349 | throw new IllegalStateException("Unexpected domain: " + domain); 350 | } 351 | return Optional.empty(); 352 | } 353 | } 354 | -------------------------------------------------------------------------------- /examples/achievements.sql: -------------------------------------------------------------------------------- 1 | -- Don't use the idents view, because it produces too many stages; instead, break it down into temporary tables 2 | CREATE TABLE memory.default.nodes AS 3 | SELECT email, name, count(*) AS count 4 | FROM ( 5 | SELECT author_email AS email, author_name AS name 6 | FROM git.default.commits 7 | UNION ALL 8 | SELECT committer_email AS email, committer_name AS name 9 | FROM git.default.commits 10 | ) names 11 | GROUP BY email, name; 12 | 13 | CREATE TABLE memory.default.edges AS 14 | SELECT n1.name AS name1, n2.name AS name2 15 | FROM memory.default.nodes n1 16 | INNER JOIN memory.default.nodes n2 USING (email); 17 | 18 | CREATE TABLE memory.default.idents AS 19 | WITH RECURSIVE 20 | walk (name1, name2, visited) AS ( 21 | SELECT name1, name2, ARRAY[name1] 22 | FROM edges 23 | WHERE name1 = name2 24 | UNION ALL 25 | SELECT w.name1, e.name2, w.visited || e.name2 26 | FROM walk w 27 | INNER JOIN edges e ON e.name1 = w.name2 28 | WHERE NOT contains(w.visited, e.name2) 29 | ), 30 | result (name1, name2s) AS ( 31 | SELECT name1, array_agg(DISTINCT name2 ORDER BY name2) 32 | FROM walk 33 | GROUP BY name1 34 | ), 35 | grouped (names, emails) AS ( 36 | SELECT 37 | array_distinct(array_agg(n.name ORDER BY n.count DESC)) AS names, 38 | array_distinct(array_agg(n.email ORDER BY n.count DESC)) AS emails 39 | FROM result r 40 | INNER JOIN nodes n ON n.name = r.name1 41 | GROUP BY r.name2s 42 | ) 43 | SELECT 44 | emails[1] AS email, 45 | names[1] AS name, 46 | slice(emails, 2, cardinality(emails)) AS extra_emails, 47 | slice(names, 2, cardinality(emails)) AS extra_names 48 | FROM grouped 49 | ORDER BY name, names; 50 | 51 | 52 | 53 | CREATE OR REPLACE VIEW memory.default.achievements_calendar AS 54 | SELECT * FROM ( 55 | -- id, name, description, "month", "day_from", "day_to", "doy", "dow", "minute_from", "minute_to" 56 | VALUES 57 | ( 58 | 'christmas' 59 | ,'Ruined Christmas' 60 | ,'Commit on Dec 25' 61 | ,12 62 | ,25 63 | ,25 64 | ,NULL 65 | ,NULL 66 | ,NULL 67 | ,NULL 68 | ) 69 | ,( 70 | 'fools-day' 71 | ,'Fools'' Code' 72 | ,'Commit on Apr 1' 73 | ,4 74 | ,1 75 | ,1 76 | ,NULL 77 | ,NULL 78 | ,NULL 79 | ,NULL 80 | ) 81 | ,( 82 | 'halloween' 83 | ,'This Code Looks Scary' 84 | ,'Commit on Oct 31' 85 | ,10 86 | ,31 87 | ,31 88 | ,NULL 89 | ,NULL 90 | ,NULL 91 | ,NULL 92 | ) 93 | ,( 94 | 'leap-day' 95 | ,'Rare Occasion' 96 | ,'Commit on Feb 29' 97 | ,2 98 | ,29 99 | ,29 100 | ,NULL 101 | ,NULL 102 | ,NULL 103 | ,NULL 104 | ) 105 | ,( 106 | 'new-year' 107 | ,'New Year, New Bugs' 108 | ,'Commit on Jan 1' 109 | ,1 110 | ,1 111 | ,1 112 | ,NULL 113 | ,NULL 114 | ,NULL 115 | ,NULL 116 | ) 117 | ,( 118 | 'programmers-day' 119 | ,'Professional Pride' 120 | ,'Commit on Programmers'' Day' 121 | ,NULL 122 | ,NULL 123 | ,NULL 124 | ,256 125 | ,NULL 126 | ,NULL 127 | ,NULL 128 | ) 129 | ,( 130 | 'russia-day' 131 | ,'From Russia with Love' 132 | ,'Commit on Russia Day' 133 | ,6 134 | ,12 135 | ,12 136 | ,NULL 137 | ,NULL 138 | ,NULL 139 | ,NULL 140 | ) 141 | ,( 142 | 'valentine' 143 | ,'In Love with Work' 144 | ,'Commit on Feb 14' 145 | ,2 146 | ,14 147 | ,14 148 | ,NULL 149 | ,NULL 150 | ,NULL 151 | ,NULL 152 | ) 153 | ,( 154 | 'thanksgiving' 155 | ,'Turkey Code' 156 | ,'Commit on Thanksgiving' 157 | ,10 158 | ,22 159 | ,28 160 | ,NULL 161 | ,5 162 | ,NULL 163 | ,NULL 164 | ) 165 | ,( 166 | 'sysadmin-day' 167 | ,'If it works, why do we need you' 168 | ,'Commit on last friday in July' 169 | ,7 170 | ,-1 171 | ,-1 172 | ,NULL 173 | ,NULL 174 | ,NULL 175 | ,NULL 176 | ) 177 | ,( 178 | 'time-get' 179 | ,'Get' 180 | ,'Commit exactly at 00:00' 181 | ,NULL 182 | ,NULL 183 | ,NULL 184 | ,NULL 185 | ,NULL 186 | ,0 187 | ,0 188 | ) 189 | ,( 190 | 'owl' 191 | ,'Owl' 192 | ,'Commit between 4am and 7am local time' 193 | ,NULL 194 | ,NULL 195 | ,NULL 196 | ,NULL 197 | ,NULL 198 | ,4 * 60 199 | ,7 * 60 200 | ) 201 | ,( 202 | 'dangerous-game' 203 | ,'Dangerous Game' 204 | ,'Commit after 6PM friday' 205 | ,NULL 206 | ,NULL 207 | ,NULL 208 | ,NULL 209 | ,6 210 | ,18 * 60 211 | ,NULL 212 | ) 213 | ) AS t(id, name, description, "month", "day_from", "day_to", "doy", "dow", "minute_from", "minute_to") 214 | UNION ALL 215 | SELECT 216 | 'anniversary' 217 | ,'Anniversary' 218 | ,'Commit on the project''s birthday' 219 | ,(SELECT MONTH(commit_time) FROM commits ORDER BY commit_time ASC LIMIT 1) 220 | ,(SELECT DAY(commit_time) FROM commits ORDER BY commit_time ASC LIMIT 1) 221 | ,(SELECT DAY(commit_time) FROM commits ORDER BY commit_time ASC LIMIT 1) 222 | ,NULL 223 | ,NULL 224 | ,NULL 225 | ,NULL; 226 | 227 | 228 | CREATE OR REPLACE VIEW memory.default.achievements_changed_lines AS 229 | SELECT * FROM ( 230 | VALUES 231 | ( 232 | 'world-balance' 233 | ,'World Balance' 234 | ,'Number of lines added == number of lines deleted' 235 | ,NULL 236 | ,NULL 237 | ,NULL 238 | ,NULL 239 | ,true 240 | ) 241 | ,( 242 | 'massive' 243 | ,'Massive' 244 | ,'Add more than 1000 lines in a single commit' 245 | ,1000 246 | ,NULL 247 | ,NULL 248 | ,NULL 249 | ,NULL 250 | ) 251 | ,( 252 | 'empty-commit' 253 | ,'' 254 | ,'Make an empty commit' 255 | ,0 256 | ,0 257 | ,0 258 | ,0 259 | ,NULL 260 | ) 261 | ,( 262 | 'eraser' 263 | ,'Eraser' 264 | ,'Make a commit with no lines added, only deletions' 265 | ,NULL 266 | ,0 267 | ,0 268 | ,NULL 269 | ,NULL 270 | ) 271 | ) AS t(id, name, description, added_from, added_to, removed_from, removed_to, changed_equal); 272 | 273 | 274 | CREATE OR REPLACE VIEW memory.default.achievements_changed_files AS 275 | SELECT * FROM ( 276 | VALUES 277 | ( 278 | 'wrecking-ball' 279 | ,'Wrecking Ball' 280 | ,'Change more than 100 files in one commit' 281 | ,100 282 | ,CAST(NULL AS INT) 283 | ,CAST(NULL AS INT) 284 | ,CAST(NULL AS INT) 285 | ,NULL 286 | ,NULL 287 | ) 288 | ,( 289 | 'mover' 290 | ,'Mover' 291 | ,'Move a file from one place to another without changing it' 292 | ,CAST(NULL AS INT) 293 | ,0 294 | ,0 295 | ,CAST(NULL AS INT) 296 | ,NULL 297 | ,NULL 298 | ) 299 | ,( 300 | 'change-of-mind' 301 | ,'Change Of Mind' 302 | ,'Change license type or edit license file' 303 | ,CAST(NULL AS INT) 304 | ,CAST(NULL AS INT) 305 | ,CAST(NULL AS INT) 306 | ,CAST(NULL AS INT) 307 | ,ARRAY['Add', 'Modify'] 308 | ,'(^|/)license$(?i)' 309 | ) 310 | ,( 311 | 'gitignore' 312 | ,'Gitignore' 313 | ,'Add .gitignore' 314 | ,CAST(NULL AS INT) 315 | ,CAST(NULL AS INT) 316 | ,CAST(NULL AS INT) 317 | ,CAST(NULL AS INT) 318 | ,ARRAY['Add'] 319 | ,'(^|/).gitignore$(?i)' 320 | ) 321 | ,( 322 | 'good-boy' 323 | ,'Good Boy' 324 | ,'Create ''test'' or ''doc'' directory' 325 | ,CAST(NULL AS INT) 326 | ,CAST(NULL AS INT) 327 | ,CAST(NULL AS INT) 328 | ,CAST(NULL AS INT) 329 | ,ARRAY['Add'] 330 | ,'(^|/)(test|docs|doc)/(?i)' 331 | ) 332 | ,( 333 | 'scribbler' 334 | ,'Scribbler' 335 | ,'Create a README' 336 | ,CAST(NULL AS INT) 337 | ,CAST(NULL AS INT) 338 | ,CAST(NULL AS INT) 339 | ,CAST(NULL AS INT) 340 | ,ARRAY['Add'] 341 | ,'(^|/)readme(\..+?)?$(?i)' 342 | ) 343 | ,( 344 | 'nothing-to-hide' 345 | ,'Nothing to Hide' 346 | ,'Commit id_rsa file' 347 | ,CAST(NULL AS INT) 348 | ,CAST(NULL AS INT) 349 | ,CAST(NULL AS INT) 350 | ,CAST(NULL AS INT) 351 | ,ARRAY['Add'] 352 | ,'(^|/)id_rsa$(?i)' 353 | ) 354 | ) AS t(id, name, description, changed_from, changed_to, moved_from, moved_to, change_types, path_regex); 355 | 356 | 357 | CREATE OR REPLACE VIEW memory.default.achievements_words AS 358 | SELECT * FROM ( 359 | VALUES 360 | ( 361 | 'beggar' 362 | ,'Beggar' 363 | ,'Ask for an achievement in a commit message' 364 | ,ARRAY['achievement', 'achievements'] 365 | ,false 366 | ,false 367 | ,NULL 368 | ,NULL 369 | ) 370 | ,( 371 | 'fix' 372 | ,'Save The Day' 373 | ,'Use word “fix” in a commit message' 374 | ,ARRAY['fix', 'fixes', 'fixed', 'fixing'] 375 | ,false 376 | ,false 377 | ,NULL 378 | ,NULL 379 | ) 380 | ,( 381 | 'forgot' 382 | ,'Second Thoughts' 383 | ,'Use word “forgot” in a commit message' 384 | ,ARRAY['forgot'] 385 | ,false 386 | ,false 387 | ,NULL 388 | ,NULL 389 | ) 390 | ,( 391 | 'google' 392 | ,'I Can Sort It out Myself' 393 | ,'Use word “google” in a commit message' 394 | ,ARRAY['google'] 395 | ,false 396 | ,false 397 | ,NULL 398 | ,NULL 399 | ) 400 | ,( 401 | 'hack' 402 | ,'Real Hacker' 403 | ,'Use word “hack” in a commit message' 404 | ,ARRAY['hack'] 405 | ,false 406 | ,false 407 | ,NULL 408 | ,NULL 409 | ) 410 | ,( 411 | 'impossible' 412 | ,'Mission Impossible' 413 | ,'Use word “impossible” in a commit message' 414 | ,ARRAY['impossible'] 415 | ,false 416 | ,false 417 | ,NULL 418 | ,NULL 419 | ) 420 | ,( 421 | 'magic' 422 | ,'The Colour of Magic' 423 | ,'Use word “magic” in a commit message' 424 | ,ARRAY['magic'] 425 | ,false 426 | ,false 427 | ,NULL 428 | ,NULL 429 | ) 430 | ,( 431 | 'never-probably' 432 | ,'Never, Probably' 433 | ,'Use word “later” in a commit message' 434 | ,ARRAY['later'] 435 | ,false 436 | ,false 437 | ,NULL 438 | ,NULL 439 | ) 440 | ,( 441 | 'secure' 442 | ,'We’re Safe Now' 443 | ,'Use word “secure” in a commit message' 444 | ,ARRAY['insecure', 'secure'] 445 | ,false 446 | ,false 447 | ,NULL 448 | ,NULL 449 | ) 450 | ,( 451 | 'sorry' 452 | ,'Salvation' 453 | ,'Use word “sorry” in a commit message' 454 | ,ARRAY['sorry'] 455 | ,false 456 | ,false 457 | ,NULL 458 | ,NULL 459 | ) 460 | ,( 461 | 'wow' 462 | ,'Wow' 463 | ,'Use word “wow” in a commit message' 464 | ,ARRAY['wow'] 465 | ,false 466 | ,false 467 | ,NULL 468 | ,NULL 469 | ) 470 | ,( 471 | 'narcissist' 472 | ,'Narcissist' 473 | ,'Use your own name in a commit message' 474 | ,ARRAY[] 475 | ,true 476 | ,false 477 | ,NULL 478 | ,NULL 479 | ) 480 | ,( 481 | 'bad-motherfucker' 482 | ,'Bad Motherf*cker' 483 | ,'Swear in a commit message' 484 | ,ARRAY['fuck', 'fucking', 'damn', 'shit'] 485 | ,false 486 | ,false 487 | ,NULL 488 | ,NULL 489 | ) 490 | ,( 491 | 'man-of-few-words' 492 | ,'A Man of Few Words' 493 | ,'Commit message with 3 letters or less' 494 | ,NULL 495 | ,false 496 | ,false 497 | ,'^([^\p{L}]*\p{L}){0,3}[^\p{L}]*$' 498 | ,NULL 499 | ) 500 | ,( 501 | 'no-more-letters' 502 | ,'No More Letters' 503 | ,'Write a commit message without any letters' 504 | ,NULL 505 | ,false 506 | ,false 507 | ,'^[^\p{L}]*$' 508 | ,NULL 509 | ) 510 | ,( 511 | 'emoji' 512 | ,'C00l kid' 513 | ,'Use emoji in a commit message' 514 | ,NULL 515 | ,false 516 | ,false 517 | ,'[\x{1f300}-\x{1f5ff}\x{1f600}-\x{1f64f}\x{1f680}-\x{1f6ff}\x{2600}-\x{26FF}\x{2700}-\x{2FBF}]' 518 | ,NULL 519 | ) 520 | ,( 521 | 'lucky' 522 | ,'Lucky' 523 | ,'Consecutive 777 in SHA-1' 524 | ,NULL 525 | ,false 526 | ,false 527 | ,NULL 528 | ,'777' 529 | ) 530 | ,( 531 | 'mark-of-the-beast' 532 | ,'Mark of the Beast' 533 | ,'Consecutive 666 in SHA-1' 534 | ,NULL 535 | ,false 536 | ,false 537 | ,NULL 538 | ,'666' 539 | ) 540 | ,( 541 | 'leo-tolstoy' 542 | ,'Leo Tolstoy' 543 | ,'More than 10 lines in a commit message' 544 | ,NULL 545 | ,false 546 | ,false 547 | ,'^([^\n]*\n){10}[^\n]*$' 548 | ,NULL 549 | ) 550 | ) AS t(id, name, description, words, include_self, exclude_self, message_regex, id_regex) 551 | UNION ALL 552 | SELECT 553 | 'blamer' 554 | ,'Blamer' 555 | ,'Use someone else’s name in a commit message' 556 | ,(SELECT array_agg(trim(s.name)) AS name 557 | FROM memory.default.idents 558 | CROSS JOIN UNNEST(extra_names) AS e(name) 559 | CROSS JOIN UNNEST(split(e.name, ' ')) AS s(name) 560 | WHERE trim(s.name) != '') 561 | ,false 562 | ,true 563 | ,NULL 564 | ,NULL; 565 | 566 | 567 | CREATE OR REPLACE VIEW memory.default.achievements_languages AS 568 | SELECT * FROM ( 569 | VALUES 570 | ( 571 | 'basic' 572 | ,'Cradle of Civilization' 573 | ,'Add Basic file to the repo' 574 | ,ARRAY['bas', 'vb', 'vbs', 'vba', 'vbproj'] 575 | ) 576 | ,( 577 | 'c-sharp' 578 | ,'It''s Dangerous to Go Alone, Take LINQ' 579 | ,'Add C# file to the repo' 580 | ,ARRAY['cs', 'csproj'] 581 | ) 582 | ,( 583 | 'clojure' 584 | ,'Even Lispers Hate Lisp' 585 | ,'Add Clojure file to the repo' 586 | ,ARRAY['clj', 'cljx'] 587 | ) 588 | ,( 589 | 'clojurescript' 590 | ,'Even Lispers Hate Lisp (in a browser)' 591 | ,'Add ClojureScript file to the repo' 592 | ,ARRAY['cljs'] 593 | ) 594 | ,( 595 | 'css' 596 | ,'You Designer Now?' 597 | ,'Add CSS file to the repo' 598 | ,ARRAY['css', 'sass', 'scss', 'less', 'haml'] 599 | ) 600 | ,( 601 | 'cxx' 602 | ,'Troubles++14' 603 | ,'Add C++ file to the repo' 604 | ,ARRAY['c++', 'cc', 'cpp', 'cxx', 'pcc', 'hh', 'hpp', 'hxx', 'vcproj'] 605 | ) 606 | ,( 607 | 'dart' 608 | ,'You Work in Google?' 609 | ,'Add Dart file to the repo' 610 | ,ARRAY['dart'] 611 | ) 612 | ,( 613 | 'erlang' 614 | ,'It’s like ObjC, but for Ericsson phones' 615 | ,'Add Erlang file to the repo' 616 | ,ARRAY['erl', 'hrl'] 617 | ) 618 | ,( 619 | 'go' 620 | ,'In Google we trust' 621 | ,'Add Go file to the repo' 622 | ,ARRAY['go'] 623 | ) 624 | ,( 625 | 'haskell' 626 | ,'Ivory Tower' 627 | ,'Add Haskell file to the repo' 628 | ,ARRAY['hs', 'lhs'] 629 | ) 630 | ,( 631 | 'java' 632 | ,'Write Once. Run. Anywhere' 633 | ,'Add Java file to the repo' 634 | ,ARRAY['java', 'jsf', 'jsp', 'jspf'] 635 | ) 636 | ,( 637 | 'javascript' 638 | ,'Happily Never After' 639 | ,'Add JavaScript file to the repo' 640 | ,ARRAY['js'] 641 | ) 642 | ,( 643 | 'json' 644 | ,'Call JSON!' 645 | ,'Add JSON file to the repo' 646 | ,ARRAY['json'] 647 | ) 648 | ,( 649 | 'objective-c' 650 | ,'NSVeryDescriptiveAchievementNameWithParame...' 651 | ,'Add Objective-C file to the repo' 652 | ,ARRAY['m', 'mm'] 653 | ) 654 | ,( 655 | 'pascal' 656 | ,'Really?' 657 | ,'Add Pascal file to the repo' 658 | ,ARRAY['pas'] 659 | ) 660 | ,( 661 | 'perl' 662 | ,'Chmod 200' 663 | ,'Add Perl file to the repo' 664 | ,ARRAY['pl'] 665 | ) 666 | ,( 667 | 'php' 668 | ,'New Facebook is Born' 669 | ,'Add PHP file to the repo' 670 | ,ARRAY['php', 'php3', 'php4', 'php5', 'phtml'] 671 | ) 672 | ,( 673 | 'python' 674 | ,'Snakes on a plane' 675 | ,'Add Python file to the repo' 676 | ,ARRAY['py'] 677 | ) 678 | ,( 679 | 'ruby' 680 | ,'Back on the Rails' 681 | ,'Add Ruby file to the repo' 682 | ,ARRAY['rake', 'rb'] 683 | ) 684 | ,( 685 | 'rust' 686 | ,'Can I borrow this?' 687 | ,'Add Rust file to the repo' 688 | ,ARRAY['rs', 'rlib'] 689 | ) 690 | ,( 691 | 'scala' 692 | ,'Well Typed, Bro' 693 | ,'Add Scala file to the repo' 694 | ,ARRAY['scala'] 695 | ) 696 | ,( 697 | 'shell' 698 | ,'We’ll Rewrite that Later' 699 | ,'Add Bash file to the repo' 700 | ,ARRAY['bash', 'sh', 'awk', 'sed'] 701 | ) 702 | ,( 703 | 'sql' 704 | ,'Not a Web Scale' 705 | ,'Add SQL file to the repo' 706 | ,ARRAY['sql'] 707 | ) 708 | ,( 709 | 'swift' 710 | ,'I Need to Sort Complex Objects Fast!' 711 | ,'Add Swift file to the repo' 712 | ,ARRAY['swift'] 713 | ) 714 | ,( 715 | 'windows-language' 716 | ,'You Can''t Program on Windows, Can You?' 717 | ,'Add PowerShell file to the repo' 718 | ,ARRAY['bat', 'btm', 'cmd', 'ps1', 'xaml'] 719 | ) 720 | ,( 721 | 'xml' 722 | ,'Zed’s Dead, Baby' 723 | ,'Add XML file to the repo' 724 | ,ARRAY['xml', 'xsl', 'xslt', 'xsd', 'dtd'] 725 | ) 726 | ,( 727 | 'yaml' 728 | ,'No means no, not Norway' 729 | ,'Add YAML file to the repo' 730 | ,ARRAY['yml', 'yaml'] 731 | ) 732 | ) AS t(id, name, description, extensions); 733 | 734 | 735 | /*+ 736 | * 737 | 738 | {:description "StackOverflow URL in a commit body or message", 739 | :key :citation-needed, 740 | :name "Citation Needed"} 741 | 742 | {:description "5+ swear words in a commit message", 743 | :key :hello-linus, 744 | :name "Hello, Linus", 745 | :level-description "One level for each 5 swear words in a message"} 746 | 747 | {:description "Make commit #1000, or #1111, or #1234", 748 | :key :get, 749 | :name "Get"} 750 | 751 | 752 | 753 | {:description "Add GPL license file to the repo", 754 | :key :for-stallman, 755 | :name "For Stallman!"} 756 | 757 | {:description "Commit 2Mb file or bigger", 758 | :key :fat-ass, 759 | :name "Fat Ass"} 760 | 761 | {:description "Commit a file with just trailing spaces removed", 762 | :key :ocd, 763 | :name "OCD"} 764 | 765 | {:description "Delete a file that has been added in the initial commit (and at least a year has passed)", 766 | :key :all-things-die, 767 | :name "All Things Die"} 768 | 769 | {:description "Commit time is 1 month or more after the author time", 770 | :key :alzheimers, 771 | :name "Alzheimer's"} 772 | 773 | {:description "Misspell a word in a commit message", 774 | :key :borat, 775 | :name "Borat", 776 | :level-description "One level for each misspelled word in a message"} 777 | 778 | {:description "Make 10+ commits with the same message", 779 | :key :catchphrase, 780 | :name "Catchphrase"} 781 | 782 | {:description 783 | "Publish commit with the same N first chars of SHA-1 as existing commit", 784 | :key :collision, 785 | :name "Collision"} 786 | 787 | {:description "10+ commits in a row", 788 | :key :combo, 789 | :name "Combo"} 790 | 791 | {:description "Make a commit after someone had 10+ commits in a row", 792 | :key :combo-breaker, 793 | :name "Combo Breaker"} 794 | 795 | {:description "Only add a comment", 796 | :key :commenter, 797 | :name "Commenter"} 798 | 799 | {:description "Swap two lines", 800 | :key :easy-fix, 801 | :name "Easy Fix"} 802 | 803 | {:description "Change tabs to spaces or vice versa", 804 | :key :holy-war, 805 | :name "Holy War"} 806 | 807 | {:description "Update master branch with force mode", 808 | :key :deal-with-it, 809 | :name "Deal with it"} 810 | 811 | {:description "Make a commit with 3+ parents", 812 | :key :hydra, 813 | :name "Hydra"} 814 | 815 | {:description 816 | "Add/edit files in 3+ different languages in a single commit", 817 | :key :multilingual, 818 | :name "Multilingual"} 819 | 820 | {:description "Two different commits within 15 seconds", 821 | :key :flash, 822 | :name "Flash"} 823 | 824 | {:description "You are the only committer for a month", 825 | :key :loneliness, 826 | :name "Loneliness"} 827 | 828 | {:description "Make a commit to a repo that hasn’t been touched for 1 month or more", 829 | :key :necromancer, 830 | :name "Necromancer"} 831 | 832 | {:description "Make 100+ non-merge commits", 833 | :key :worker-bee, 834 | :name "Worker Bee"} 835 | 836 | {:description "Commit and revert commit within 1 minute", 837 | :key :ooops, 838 | :name "Ooops"} 839 | 840 | {:description "Your commit was reverted completely by someone else", 841 | :key :waste, 842 | :name "Waste"} 843 | 844 | {:description "Edit a file that hasn’t been touched for a year", 845 | :key :what-happened-here, 846 | :name "What Happened Here?"} 847 | 848 | {:description "Resolve 100 conflicts", 849 | :key :peacemaker, 850 | :name "Peacemaker"} 851 | 852 | 853 | 854 | {:description "Get 5 achievements with 1 commit", 855 | :key :munchkin, 856 | :name "Munchkin"} 857 | 858 | {:description "Get all achievements", 859 | :key :quest-complete, 860 | :name "Quest Complete"} 861 | 862 | {:description "Zero achievments after 100 your own commits", 863 | :key :unpretending, 864 | :name "Unpretending"} 865 | ]) 866 | */ 867 | 868 | 869 | CREATE TABLE memory.default.acquired_calendar AS select 870 | a.id, 871 | a.name, 872 | a.description, 873 | i.name AS author_name, 874 | i.email, 875 | min_by(c.object_id, c.commit_time) AS achieved_in, 876 | min(c.commit_time) AS achieved_at, 877 | count(*) AS num_achieved 878 | FROM commits c 879 | JOIN memory.default.idents i ON c.author_email = i.email OR CONTAINS(i.extra_emails, c.author_email) 880 | JOIN memory.default.achievements_calendar a ON 881 | (a.month IS NULL OR MONTH(c.commit_time) = a.month) 882 | AND DAY(c.commit_time) BETWEEN COALESCE(a.day_from, 1) AND COALESCE(a.day_to, 31) 883 | AND (a.doy IS NULL OR DOY(c.commit_time) = a.doy) 884 | AND (a.dow IS NULL OR DOW(c.commit_time) = a.dow) 885 | AND (EXTRACT(HOUR FROM c.commit_time) * 60 + EXTRACT(MINUTE FROM c.commit_time)) BETWEEN COALESCE(a.minute_from, 0) AND COALESCE(a.minute_to, 60 * 24) 886 | GROUP BY 887 | a.id, 888 | a.name, 889 | a.description, 890 | i.name, 891 | i.email; 892 | 893 | CREATE TABLE memory.default.acquired_changed_files AS SELECT 894 | a.id, 895 | a.name, 896 | a.description, 897 | i.name AS author_name, 898 | i.email, 899 | min_by(c.object_id, c.commit_time) AS achieved_in, 900 | min(c.commit_time) AS achieved_at, 901 | count(*) AS num_achieved 902 | FROM commits c 903 | JOIN ( 904 | SELECT 905 | commit_id, 906 | change_type, 907 | path_name, 908 | count(*) FILTER (WHERE change_type = 'Rename' AND added_lines = 0 AND deleted_lines = 0) AS renamed, 909 | count(*) FILTER (WHERE change_type != 'Rename' OR (added_lines != 0 AND deleted_lines != 0)) AS modified 910 | FROM diff_stats 911 | GROUP BY commit_id, change_type, path_name 912 | ) s ON s.commit_id = c.object_id 913 | JOIN memory.default.idents i ON c.author_email = i.email OR CONTAINS(i.extra_emails, c.author_email) 914 | JOIN memory.default.achievements_changed_files a ON 915 | s.renamed BETWEEN COALESCE(a.moved_from, 0) AND COALESCE(a.moved_to, bitwise_right_shift(bitwise_not(0), 1)) 916 | AND s.modified BETWEEN COALESCE(a.changed_from, 0) AND COALESCE(a.changed_to, bitwise_right_shift(bitwise_not(0), 1)) 917 | AND (a.change_types IS NULL OR contains(a.change_types, s.change_type)) 918 | AND (a.path_regex IS NULL OR regexp_like(s.path_name, a.path_regex)) 919 | GROUP BY 920 | a.id, 921 | a.name, 922 | a.description, 923 | i.name, 924 | i.email; 925 | 926 | 927 | CREATE TABLE memory.default.acquired_changed_lines AS SELECT 928 | a.id, 929 | a.name, 930 | a.description, 931 | i.name AS author_name, 932 | i.email, 933 | min_by(c.object_id, c.commit_time) AS achieved_in, 934 | min(c.commit_time) AS achieved_at, 935 | count(*) AS num_achieved 936 | FROM commit_stats c 937 | JOIN memory.default.idents i ON c.author_email = i.email OR CONTAINS(i.extra_emails, c.author_email) 938 | JOIN memory.default.achievements_changed_lines a ON 939 | c.added_lines BETWEEN COALESCE(a.added_from, 0) AND COALESCE(a.added_to, bitwise_right_shift(bitwise_not(0), 1)) 940 | AND c.deleted_lines BETWEEN COALESCE(a.removed_from, 0) AND COALESCE(a.removed_from, bitwise_right_shift(bitwise_not(0), 1)) 941 | AND (NOT changed_equal OR c.added_lines = c.deleted_lines) 942 | GROUP BY 943 | a.id, 944 | a.name, 945 | a.description, 946 | i.name, 947 | i.email; 948 | 949 | 950 | CREATE TABLE memory.default.acquired_languages AS SELECT 951 | a.id, 952 | a.name, 953 | a.description, 954 | i.name AS author_name, 955 | i.email, 956 | min_by(c.object_id, c.commit_time) AS achieved_in, 957 | min(c.commit_time) AS achieved_at, 958 | count(DISTINCT c.object_id) AS num_achieved 959 | FROM ( 960 | SELECT 961 | c.commit_time, 962 | c.object_id, 963 | c.author_email, 964 | array_agg(DISTINCT reverse(split_part(reverse(s.path_name), '.', 1))) AS extensions 965 | FROM commits c 966 | JOIN diff_stats s ON s.commit_id = c.object_id AND s.change_type = 'Add' 967 | GROUP BY c.commit_time, c.object_id, c.author_email 968 | UNION 969 | SELECT 970 | c.commit_time, 971 | c.object_id, 972 | c.author_email, 973 | array_agg(DISTINCT reverse(split_part(reverse(t.file_name), '.', 1))) AS extensions 974 | FROM commits c 975 | JOIN trees t ON t.commit_id = c.object_id 976 | WHERE CARDINALITY(c.parents) = 0 977 | GROUP BY c.commit_time, c.object_id, c.author_email 978 | ) c 979 | JOIN memory.default.idents i ON c.author_email = i.email OR CONTAINS(i.extra_emails, c.author_email) 980 | JOIN memory.default.achievements_languages a ON 981 | arrays_overlap(c.extensions, a.extensions) 982 | GROUP BY 983 | a.id, 984 | a.name, 985 | a.description, 986 | i.name, 987 | i.email; 988 | 989 | 990 | CREATE TABLE memory.default.acquired_words AS SELECT 991 | a.id, 992 | a.name, 993 | a.description, 994 | i.name AS author_name, 995 | i.email, 996 | min_by(c.object_id, c.commit_time) AS achieved_in, 997 | min(c.commit_time) AS achieved_at, 998 | count(*) AS num_achieved 999 | FROM ( 1000 | SELECT 1001 | commit_time, 1002 | object_id, 1003 | author_email, 1004 | message, 1005 | TRANSFORM(FILTER(regexp_split(message, '[^\p{Alphabetic}\p{Digit}]'), x -> x != ''), x -> lower(x)) AS words 1006 | FROM commits 1007 | ) c 1008 | JOIN ( 1009 | SELECT 1010 | name, 1011 | email, 1012 | extra_emails, 1013 | TRANSFORM(FILTER(regexp_split(name || ' ' || concat_ws(' ', extra_names), '[^\p{Alphabetic}\p{Digit}]'), x -> x != ''), x -> lower(x)) AS words 1014 | FROM memory.default.idents 1015 | ) i ON c.author_email = i.email OR CONTAINS(i.extra_emails, c.author_email) 1016 | JOIN memory.default.achievements_words a ON 1017 | (a.words IS NULL OR arrays_overlap(a.words, c.words)) 1018 | AND (NOT include_self OR arrays_overlap(c.words, i.words)) 1019 | AND (NOT exclude_self OR NOT arrays_overlap(c.words, i.words)) 1020 | AND (a.message_regex IS NULL OR regexp_like(c.message, a.message_regex)) 1021 | AND (a.id_regex IS NULL OR regexp_like(c.object_id, a.id_regex)) 1022 | GROUP BY 1023 | a.id, 1024 | a.name, 1025 | a.description, 1026 | i.name, 1027 | i.email; 1028 | 1029 | -- List all achievements with num of all wins, percentage of winners, date and commit id of first win and top3 winners (by number of wins, first winner wins ties) 1030 | WITH acha AS ( 1031 | SELECT id, name, description FROM memory.default.achievements_calendar 1032 | UNION ALL 1033 | SELECT id, name, description FROM memory.default.achievements_changed_files 1034 | UNION ALL 1035 | SELECT id, name, description FROM memory.default.achievements_changed_lines 1036 | UNION ALL 1037 | SELECT id, name, description FROM memory.default.achievements_languages 1038 | UNION ALL 1039 | SELECT id, name, description FROM memory.default.achievements_words 1040 | ), acq AS ( 1041 | SELECT * FROM memory.default.acquired_calendar 1042 | UNION ALL 1043 | SELECT * FROM memory.default.acquired_changed_files 1044 | UNION ALL 1045 | SELECT * FROM memory.default.acquired_changed_lines 1046 | UNION ALL 1047 | SELECT * FROM memory.default.acquired_languages 1048 | UNION ALL 1049 | SELECT * FROM memory.default.acquired_words 1050 | ) 1051 | SELECT 1052 | acha.name, 1053 | acha.description, 1054 | count(acq.id) AS num_winners, 1055 | FORMAT('%.2f', 100 * cast(count(acq.id) AS DOUBLE) / i.idents_count) AS percent_winners, 1056 | min(acq.achieved_at) AS first_achieved_at, 1057 | min_by(acq.achieved_in, acq.achieved_at) AS first_achieved_in, 1058 | slice(array_agg(acq.author_name || ' <' || acq.email || '>' ORDER BY acq.num_achieved DESC, acq.achieved_at), 1, 3) AS top3 1059 | FROM acha 1060 | LEFT JOIN acq ON acq.id = acha.id 1061 | CROSS JOIN (SELECT COUNT(*) AS idents_count FROM memory.default.idents) i 1062 | GROUP BY acha.id, acha.name, acha.description, i.idents_count 1063 | ORDER BY NULLIF(num_winners, 0) NULLS LAST, acha.name 1064 | ; 1065 | 1066 | -- List all winners with num and percent of achievements and all achievements (with date and commit) 1067 | WITH acha AS ( 1068 | SELECT id, name, description FROM memory.default.achievements_calendar 1069 | UNION ALL 1070 | SELECT id, name, description FROM memory.default.achievements_changed_files 1071 | UNION ALL 1072 | SELECT id, name, description FROM memory.default.achievements_changed_lines 1073 | UNION ALL 1074 | SELECT id, name, description FROM memory.default.achievements_languages 1075 | UNION ALL 1076 | SELECT id, name, description FROM memory.default.achievements_words 1077 | ), acq AS ( 1078 | SELECT * FROM memory.default.acquired_calendar 1079 | UNION ALL 1080 | SELECT * FROM memory.default.acquired_changed_files 1081 | UNION ALL 1082 | SELECT * FROM memory.default.acquired_changed_lines 1083 | UNION ALL 1084 | SELECT * FROM memory.default.acquired_languages 1085 | UNION ALL 1086 | SELECT * FROM memory.default.acquired_words 1087 | ) 1088 | SELECT 1089 | acq.author_name, 1090 | acq.email, 1091 | count(acq.id) OVER w AS num_achievements, 1092 | format('%.2f', 100 * cast(count(acq.id) OVER w AS double) / a.achievements_count) AS percent_achievments, 1093 | acq.name, 1094 | acq.description, 1095 | acq.achieved_in, 1096 | acq.achieved_at, 1097 | acq.num_achieved 1098 | FROM acha 1099 | LEFT JOIN acq ON acq.id = acha.id 1100 | CROSS JOIN (SELECT COUNT(*) AS achievements_count FROM acha) a 1101 | WINDOW w (PARTITION BY acq.email) 1102 | ORDER BY acq.author_name, acq.name 1103 | ; 1104 | --------------------------------------------------------------------------------