├── .github ├── ISSUE_TEMPLATE │ ├── bug_report_template.yaml │ ├── feature_request_template.yaml │ └── openhouse_rfc.yml ├── dependabot.yml ├── pull_request_template.md └── workflows │ ├── build-run-tests.yml │ ├── build-tag-publish.yml │ └── pr-validations.yml ├── .gitignore ├── ARCHITECTURE.md ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── DEPLOY.md ├── LICENSE ├── NOTICE ├── README.md ├── SECURITY.md ├── SETUP.md ├── apps ├── spark-3.5 │ ├── build.gradle │ └── src │ │ └── test │ │ └── java │ │ └── com │ │ └── linkedin │ │ └── openhouse │ │ └── catalog │ │ └── e2e │ │ └── SparkMoRFunctionalTest.java └── spark │ ├── build.gradle │ └── src │ ├── main │ ├── java │ │ └── com │ │ │ └── linkedin │ │ │ └── openhouse │ │ │ └── jobs │ │ │ ├── client │ │ │ ├── JobsClient.java │ │ │ ├── JobsClientFactory.java │ │ │ ├── StorageClient.java │ │ │ ├── TablesClient.java │ │ │ └── TablesClientFactory.java │ │ │ ├── exception │ │ │ ├── OperationTaskException.java │ │ │ └── TableValidationException.java │ │ │ ├── scheduler │ │ │ ├── JobsScheduler.java │ │ │ └── tasks │ │ │ │ ├── OperationTask.java │ │ │ │ ├── OperationTaskFactory.java │ │ │ │ ├── OperationTasksBuilder.java │ │ │ │ ├── OrphanTableDirectoryDeletionTask.java │ │ │ │ ├── TableDataCompactionTask.java │ │ │ │ ├── TableDataLayoutStrategyExecutionTask.java │ │ │ │ ├── TableDataLayoutStrategyGenerationTask.java │ │ │ │ ├── TableDirectoryOperationTask.java │ │ │ │ ├── TableOperationTask.java │ │ │ │ ├── TableOrphanFilesDeletionTask.java │ │ │ │ ├── TableRetentionTask.java │ │ │ │ ├── TableSnapshotsExpirationTask.java │ │ │ │ ├── TableStagedFilesDeletionTask.java │ │ │ │ └── TableStatsCollectionTask.java │ │ │ ├── spark │ │ │ ├── BaseSparkApp.java │ │ │ ├── BaseTableDirectorySparkApp.java │ │ │ ├── BaseTableSparkApp.java │ │ │ ├── DataCompactionSparkApp.java │ │ │ ├── DataLayoutStrategyGeneratorSparkApp.java │ │ │ ├── NoOpSparkApp.java │ │ │ ├── OpenHouseCatalogSQLTestSparkApp.java │ │ │ ├── Operations.java │ │ │ ├── OrphanFilesDeletionSparkApp.java │ │ │ ├── OrphanTableDirectoryDeletionSparkApp.java │ │ │ ├── RetentionSparkApp.java │ │ │ ├── SnapshotsExpirationSparkApp.java │ │ │ ├── StagedFilesDeletionSparkApp.java │ │ │ ├── TableStatsCollectionSparkApp.java │ │ │ └── state │ │ │ │ └── StateManager.java │ │ │ └── util │ │ │ ├── AppConstants.java │ │ │ ├── DataLayoutUtil.java │ │ │ ├── DatabaseTableFilter.java │ │ │ ├── DirectoryMetadata.java │ │ │ ├── FilesSummary.java │ │ │ ├── HistoryConfig.java │ │ │ ├── Metadata.java │ │ │ ├── OtelConfig.java │ │ │ ├── ReplicationConfig.java │ │ │ ├── RetentionConfig.java │ │ │ ├── RetryUtil.java │ │ │ ├── SparkJobUtil.java │ │ │ ├── TableDataLayoutMetadata.java │ │ │ ├── TableMetadata.java │ │ │ ├── TableStateValidator.java │ │ │ ├── TableStatsCollector.java │ │ │ ├── TableStatsCollectorUtil.java │ │ │ └── TablesValidationQueries.java │ └── resources │ │ └── log4j2.properties │ └── test │ └── java │ └── com │ └── linkedin │ └── openhouse │ ├── catalog │ └── e2e │ │ ├── SparkSchemaEvolutionTest.java │ │ └── WapIdJavaTest.java │ └── jobs │ ├── clients │ ├── JobsClientTest.java │ └── TablesClientTest.java │ ├── scheduler │ ├── JobsSchedulerTest.java │ └── tasks │ │ ├── SnapshotExpirationTaskTest.java │ │ └── TableRetentionTaskTest.java │ ├── spark │ ├── AppsTest.java │ ├── DataLayoutStrategyGeneratorSparkAppTest.java │ ├── OperationsTest.java │ └── state │ │ └── StateManagerTest.java │ └── util │ ├── DataLayoutUtilTest.java │ ├── DatabaseTableFilterTest.java │ ├── DeleteStagedFilesTest.java │ ├── SimpleRecord.java │ ├── SparkJobUtilTest.java │ └── TableStateValidatorTest.java ├── build.gradle ├── buildSrc ├── build.gradle └── src │ └── main │ └── groovy │ ├── openhouse.apps-spark-common.gradle │ ├── openhouse.client-codegen-convention.gradle │ ├── openhouse.hadoop-conventions.gradle │ ├── openhouse.iceberg-aws-conventions.gradle │ ├── openhouse.iceberg-azure-conventions.gradle │ ├── openhouse.iceberg-conventions-1.2.gradle │ ├── openhouse.iceberg-conventions-1.5.2.gradle │ ├── openhouse.java-conventions.gradle │ ├── openhouse.java-minimal-conventions.gradle │ ├── openhouse.maven-publish.gradle │ ├── openhouse.service-specgen-convention.gradle │ ├── openhouse.springboot-conventions.gradle │ └── openhouse.springboot-ext-conventions.gradle ├── client ├── common │ ├── client_codegen.sh │ ├── codegen.build.gradle │ └── jar_download.sh ├── hts │ └── build.gradle ├── jobsclient │ └── build.gradle ├── secureclient │ ├── build.gradle │ └── src │ │ ├── main │ │ └── java │ │ │ └── com │ │ │ └── linkedin │ │ │ └── openhouse │ │ │ └── client │ │ │ └── ssl │ │ │ ├── HousetablesApiClientFactory.java │ │ │ ├── HttpConnectionStrategy.java │ │ │ ├── JobsApiClientFactory.java │ │ │ ├── TablesApiClientFactory.java │ │ │ └── WebClientFactory.java │ │ └── test │ │ └── java │ │ └── com │ │ └── linkedin │ │ └── openhouse │ │ └── client │ │ └── ssl │ │ ├── HousetablesApiClientFactoryTest.java │ │ ├── JobsApiClientFactoryTest.java │ │ └── TablesApiClientFactoryTest.java └── tableclient │ └── build.gradle ├── cluster ├── configs │ ├── build.gradle │ └── src │ │ └── main │ │ └── java │ │ └── com │ │ └── linkedin │ │ └── openhouse │ │ └── cluster │ │ └── configs │ │ ├── ClusterProperties.java │ │ ├── ClusterPropertiesUtil.java │ │ └── YamlPropertySourceFactory.java ├── metrics │ ├── build.gradle │ └── src │ │ └── main │ │ └── java │ │ └── com │ │ └── linkedin │ │ └── openhouse │ │ └── cluster │ │ └── metrics │ │ ├── TagUtils.java │ │ └── micrometer │ │ └── MetricsReporter.java └── storage │ ├── build.gradle │ └── src │ ├── main │ └── java │ │ └── com │ │ └── linkedin │ │ └── openhouse │ │ └── cluster │ │ └── storage │ │ ├── BaseStorage.java │ │ ├── BaseStorageClient.java │ │ ├── FsStorageUtils.java │ │ ├── Storage.java │ │ ├── StorageClient.java │ │ ├── StorageManager.java │ │ ├── StorageProvider.java │ │ ├── StorageType.java │ │ ├── adls │ │ ├── AdlsStorage.java │ │ └── AdlsStorageClient.java │ │ ├── configs │ │ └── StorageProperties.java │ │ ├── exception │ │ └── ConfigMismatchException.java │ │ ├── filesystem │ │ ├── FsStorageProvider.java │ │ ├── HdfsStorageProvider.java │ │ └── ParameterizedHdfsStorageProvider.java │ │ ├── hdfs │ │ ├── HdfsDelegationTokenRefresher.java │ │ ├── HdfsDelegationTokenRefresherConfig.java │ │ ├── HdfsStorage.java │ │ └── HdfsStorageClient.java │ │ ├── local │ │ ├── LocalStorage.java │ │ └── LocalStorageClient.java │ │ ├── s3 │ │ ├── S3Storage.java │ │ └── S3StorageClient.java │ │ └── selector │ │ ├── BaseStorageSelector.java │ │ ├── StorageSelector.java │ │ ├── StorageSelectorConfig.java │ │ └── impl │ │ ├── DefaultStorageSelector.java │ │ └── RegexStorageSelector.java │ └── test │ └── java │ └── com │ └── linkedin │ └── openhouse │ └── cluster │ └── storage │ └── selector │ ├── StorageSelectorConfigTest.java │ └── impl │ └── RegexStorageSelectorTest.java ├── docs ├── development │ ├── client-code-generation.md │ ├── container-images.md │ ├── ide-setup-for-shadow-jars.gif │ └── intellij-setup.md ├── images │ ├── openhouse-controlplane.jpeg │ ├── openhouse-deployed-architecture.jpeg │ └── openhouse-logo.jpeg └── specs │ ├── README.md │ ├── catalog.md │ ├── housetables.md │ └── jobs.md ├── example-workflow-template └── main.yaml ├── gradle.properties ├── gradle ├── checkstyle │ ├── checkstyle.xml │ └── suppressions.xml ├── spotbugs │ ├── spotbugsExclude.xml │ └── spotbugsInclude.xml └── wrapper │ ├── gradle-wrapper.jar │ └── gradle-wrapper.properties ├── gradlew ├── housetables-service.Dockerfile ├── iceberg ├── azure │ ├── LICENSE │ ├── NOTICE │ ├── build.gradle │ └── src │ │ ├── main │ │ └── java │ │ │ └── org │ │ │ └── apache │ │ │ └── iceberg │ │ │ └── azure │ │ │ ├── AzureProperties.java │ │ │ ├── adlsv2 │ │ │ ├── ADLSFileIO.java │ │ │ ├── ADLSInputFile.java │ │ │ ├── ADLSInputStream.java │ │ │ ├── ADLSLocation.java │ │ │ ├── ADLSOutputFile.java │ │ │ ├── ADLSOutputStream.java │ │ │ └── BaseADLSFile.java │ │ │ └── io │ │ │ └── DelegateFileIO.java │ │ └── test │ │ └── java │ │ └── org │ │ └── apache │ │ └── iceberg │ │ └── azure │ │ └── adlsv2 │ │ └── AzuriteContainer.java └── openhouse │ ├── htscatalog │ ├── build.gradle │ └── src │ │ ├── main │ │ └── java │ │ │ └── com │ │ │ └── linkedin │ │ │ └── openhouse │ │ │ └── hts │ │ │ └── catalog │ │ │ ├── api │ │ │ ├── IcebergRow.java │ │ │ └── IcebergRowPrimaryKey.java │ │ │ ├── data │ │ │ ├── GenericIcebergRowReadersWriters.java │ │ │ └── IcebergDataUtils.java │ │ │ ├── model │ │ │ ├── jobtable │ │ │ │ ├── JobIcebergRow.java │ │ │ │ └── JobIcebergRowPrimaryKey.java │ │ │ └── usertable │ │ │ │ ├── UserTableIcebergRow.java │ │ │ │ └── UserTableIcebergRowPrimaryKey.java │ │ │ └── repository │ │ │ └── IcebergHtsRepository.java │ │ └── test │ │ └── java │ │ └── com │ │ └── linkedin │ │ └── openhouse │ │ └── hts │ │ └── catalog │ │ ├── mock │ │ ├── api │ │ │ ├── TestJobIcebergRow.java │ │ │ └── TestUserTableIcebergRow.java │ │ ├── data │ │ │ ├── GenericIcebergRowReadersWritersTest.java │ │ │ └── IcebergDataUtilsTest.java │ │ ├── model │ │ │ ├── TestIcebergRow.java │ │ │ └── TestIcebergRowPrimaryKey.java │ │ └── repository │ │ │ └── IcebergHtsRepositoryTest.java │ │ └── model │ │ └── HtsCatalogConstants.java │ └── internalcatalog │ ├── build.gradle │ └── src │ ├── main │ └── java │ │ └── com │ │ └── linkedin │ │ └── openhouse │ │ └── internal │ │ └── catalog │ │ ├── CatalogConstants.java │ │ ├── InternalCatalogMetricsConstant.java │ │ ├── OpenHouseInternalCatalog.java │ │ ├── OpenHouseInternalTableOperations.java │ │ ├── SnapshotInspector.java │ │ ├── SnapshotsUtil.java │ │ ├── exception │ │ └── InvalidIcebergSnapshotException.java │ │ ├── fileio │ │ ├── FileIOConfig.java │ │ └── FileIOManager.java │ │ ├── mapper │ │ ├── HouseTableMapper.java │ │ └── HouseTableSerdeUtils.java │ │ ├── model │ │ ├── HouseTable.java │ │ └── HouseTablePrimaryKey.java │ │ ├── repository │ │ ├── CustomRetryListener.java │ │ ├── HouseTableRepository.java │ │ ├── HouseTableRepositoryImpl.java │ │ ├── HtsRetryUtils.java │ │ └── exception │ │ │ ├── HouseTableCallerException.java │ │ │ ├── HouseTableConcurrentUpdateException.java │ │ │ ├── HouseTableNotFoundException.java │ │ │ ├── HouseTableRepositoryException.java │ │ │ └── HouseTableRepositoryStateUnknownException.java │ │ └── toggle │ │ └── IcebergFeatureGate.java │ ├── test │ ├── java │ │ └── com │ │ │ └── linkedin │ │ │ └── openhouse │ │ │ └── internal │ │ │ └── catalog │ │ │ ├── IcebergTestUtil.java │ │ │ ├── MockApplication.java │ │ │ ├── OpenHouseInternalTableOperationsTest.java │ │ │ ├── SnapshotInspectorTest.java │ │ │ ├── SnapshotsUtilTest.java │ │ │ ├── fileio │ │ │ └── FileIOManagerTest.java │ │ │ ├── mapper │ │ │ └── HouseTableMapperTest.java │ │ │ ├── model │ │ │ └── HouseTableTest.java │ │ │ └── repository │ │ │ └── HouseTableRepositoryImplTest.java │ └── resources │ │ ├── extra_serialized_snapshots.json │ │ ├── future_serialized_snapshots.json │ │ ├── serialized_snapshots.json │ │ └── wap_serialized_snapshots.json │ └── testFixtures │ └── java │ └── com │ └── linkedin │ └── openhouse │ └── internal │ └── catalog │ └── HouseTableModelConstants.java ├── infra └── recipes │ ├── docker-compose │ ├── common │ │ ├── debug-profile │ │ │ ├── housetables.yml │ │ │ ├── jobs.yml │ │ │ └── tables.yml │ │ ├── hadoop │ │ │ └── hadoop.env │ │ ├── hdfs-services.yml │ │ ├── intellij-setup.png │ │ ├── mysql-services.yml │ │ ├── oauth-services.yml │ │ ├── oauth │ │ │ ├── fetch_token.sh │ │ │ ├── openhouse-realm.json │ │ │ └── validate_token.sh │ │ ├── oh-services.yml │ │ ├── opa-services.yml │ │ ├── opa │ │ │ ├── data.json │ │ │ └── policy.rego │ │ ├── prometheus │ │ │ └── prometheus.yml │ │ ├── s3-services.yml │ │ ├── spark-services.yml │ │ └── spark │ │ │ ├── livy_spark3_hadoop3.patch │ │ │ ├── spark-3.5-base-hadoop3.2.dockerfile │ │ │ ├── spark-base-hadoop2.8.dockerfile │ │ │ └── start-spark.sh │ ├── oh-abs-spark │ │ ├── cluster.yaml │ │ └── docker-compose.yml │ ├── oh-hadoop-spark │ │ ├── cluster.yaml │ │ ├── docker-compose.yml │ │ └── jobs.yaml │ ├── oh-hadoop │ │ ├── cluster.yaml │ │ └── docker-compose.yml │ ├── oh-only │ │ ├── cluster.yaml │ │ └── docker-compose.yml │ ├── oh-s3-spark │ │ ├── cluster.yaml │ │ └── docker-compose.yml │ └── spark-only │ │ └── docker-compose.yml │ ├── k8s │ ├── config │ │ ├── housetables │ │ │ └── minikube │ │ │ │ └── cluster.yaml │ │ └── tables │ │ │ └── minikube │ │ │ └── cluster.yaml │ ├── environments │ │ ├── azure │ │ │ └── sandbox │ │ │ │ ├── housetables │ │ │ │ └── values.yaml │ │ │ │ └── tables │ │ │ │ └── values.yaml │ │ └── minikube │ │ │ ├── housetables │ │ │ └── values.yaml │ │ │ └── tables │ │ │ └── values.yaml │ ├── helm │ │ ├── housetables │ │ │ ├── Chart.yaml │ │ │ ├── config │ │ │ │ ├── azure │ │ │ │ │ └── sandbox │ │ │ │ │ │ └── cluster.yaml │ │ │ │ └── minikube │ │ │ │ │ └── cluster.yaml │ │ │ └── templates │ │ │ │ ├── hts-configmap.yaml │ │ │ │ ├── hts-deployment.yaml │ │ │ │ └── hts-service.yaml │ │ └── tables │ │ │ ├── Chart.yaml │ │ │ ├── config │ │ │ ├── azure │ │ │ │ └── sandbox │ │ │ │ │ └── cluster.yaml │ │ │ └── minikube │ │ │ │ └── cluster.yaml │ │ │ └── templates │ │ │ ├── tables-configmap.yaml │ │ │ ├── tables-deployment.yaml │ │ │ └── tables-service.yaml │ └── templates │ │ ├── housetables │ │ ├── hts-configmap.yaml │ │ ├── hts-deployment.yaml │ │ └── hts-service.yaml │ │ └── tables │ │ ├── tables-configmap.yaml │ │ ├── tables-deployment.yaml │ │ └── tables-service.yaml │ └── terraform │ └── azure │ ├── README.md │ ├── backend.tfvars.template │ ├── environments │ ├── container │ │ ├── backend.tf │ │ ├── common_variables.tf │ │ ├── main.tf │ │ ├── outputs.tf │ │ ├── variables.tf │ │ └── versions.tf │ └── sandbox │ │ ├── README.md │ │ ├── backend.tf │ │ ├── common_provider.tf │ │ ├── common_variables.tf │ │ ├── common_versions.tf │ │ ├── main.tf │ │ ├── outputs.tf │ │ ├── scripts │ │ ├── populate-tables.scala │ │ └── spark-client.sh │ │ └── variables.tf │ ├── k8s_variables.tf │ ├── modules │ ├── container │ │ ├── common_variables.tf │ │ ├── main.tf │ │ ├── outputs.tf │ │ └── variables.tf │ ├── helm_release │ │ ├── main.tf │ │ ├── outputs.tf │ │ └── variables.tf │ ├── image │ │ ├── common_variables.tf │ │ ├── common_versions.tf │ │ ├── main.tf │ │ ├── outputs.tf │ │ └── variables.tf │ ├── k8s │ │ ├── common_variables.tf │ │ ├── main.tf │ │ ├── outputs.tf │ │ └── variables.tf │ ├── mysql │ │ ├── common_variables.tf │ │ ├── main.tf │ │ ├── outputs.tf │ │ └── variables.tf │ ├── storage │ │ ├── common_variables.tf │ │ ├── main.tf │ │ ├── outputs.tf │ │ └── variables.tf │ └── vm │ │ ├── common_variables.tf │ │ ├── main.tf │ │ ├── outputs.tf │ │ └── variables.tf │ ├── provider.tf │ ├── variables.tf │ └── versions.tf ├── integrations ├── java │ ├── iceberg-1.2 │ │ ├── openhouse-java-itest │ │ │ ├── build.gradle │ │ │ └── src │ │ │ │ └── test │ │ │ │ └── java │ │ │ │ └── com │ │ │ │ └── linkedin │ │ │ │ └── openhouse │ │ │ │ └── javaclient │ │ │ │ ├── OpenHouseTableOperationsTest.java │ │ │ │ └── SmokeTest.java │ │ └── openhouse-java-runtime │ │ │ ├── build.gradle │ │ │ └── src │ │ │ └── main │ │ │ ├── java │ │ │ └── com │ │ │ │ └── linkedin │ │ │ │ └── openhouse │ │ │ │ └── javaclient │ │ │ │ ├── OpenHouseCatalog.java │ │ │ │ ├── OpenHouseMetricsReporter.java │ │ │ │ ├── OpenHouseTableOperations.java │ │ │ │ ├── api │ │ │ │ └── SupportsGrantRevoke.java │ │ │ │ ├── audit │ │ │ │ ├── OpenHouseReportHandler.java │ │ │ │ └── impl │ │ │ │ │ └── OpenHouseReportLogPublish.java │ │ │ │ ├── builder │ │ │ │ ├── ClusteringSpecBuilder.java │ │ │ │ ├── PartitionSpecBuilder.java │ │ │ │ └── TimePartitionSpecBuilder.java │ │ │ │ ├── exception │ │ │ │ ├── WebClientRequestWithMessageException.java │ │ │ │ ├── WebClientResponseWithMessageException.java │ │ │ │ └── WebClientWithMessageException.java │ │ │ │ └── mapper │ │ │ │ ├── Privileges.java │ │ │ │ └── SparkMapper.java │ │ │ └── resources │ │ │ └── com │ │ │ └── linkedin │ │ │ └── openhouse │ │ │ └── relocated │ │ │ └── org │ │ │ └── springframework │ │ │ └── http │ │ │ └── codec │ │ │ └── CodecConfigurer.properties │ └── iceberg-1.5 │ │ ├── openhouse-java-itest │ │ └── build.gradle │ │ └── openhouse-java-runtime │ │ └── build.gradle └── spark │ ├── spark-3.1 │ ├── openhouse-spark-itest │ │ ├── build.gradle │ │ └── src │ │ │ └── test │ │ │ ├── java │ │ │ └── com │ │ │ │ └── linkedin │ │ │ │ └── openhouse │ │ │ │ └── spark │ │ │ │ ├── MockHelpers.java │ │ │ │ ├── ResourceIoHelper.java │ │ │ │ ├── SparkTestBase.java │ │ │ │ ├── catalogtest │ │ │ │ ├── CTASNonNullTest.java │ │ │ │ ├── CatalogOperationTest.java │ │ │ │ ├── PartitionTest.java │ │ │ │ └── WapIdTest.java │ │ │ │ ├── e2e │ │ │ │ ├── ddl │ │ │ │ │ ├── AlterTableSchemaTest.java │ │ │ │ │ ├── AlterTableTest.java │ │ │ │ │ ├── CreateTablePartitionedTest.java │ │ │ │ │ ├── CreateTableTest.java │ │ │ │ │ ├── CreateTableWithPropsTest.java │ │ │ │ │ ├── DescribeTableTest.java │ │ │ │ │ ├── DropTableTest.java │ │ │ │ │ ├── InsertOverwriteTableTest.java │ │ │ │ │ ├── ShowDatabasesTest.java │ │ │ │ │ ├── ShowTablesTest.java │ │ │ │ │ └── UseCatalogTest.java │ │ │ │ ├── dml │ │ │ │ │ ├── CRTASTest.java │ │ │ │ │ ├── CTASTest.java │ │ │ │ │ ├── InsertIntoTableTest.java │ │ │ │ │ ├── RTASTest.java │ │ │ │ │ └── SelectFromTableTest.java │ │ │ │ └── extensions │ │ │ │ │ ├── GrantStatementTest.java │ │ │ │ │ ├── MultiCommentsTest.java │ │ │ │ │ └── SetRetentionPolicyTest.java │ │ │ │ ├── mock │ │ │ │ ├── DoCommitTest.java │ │ │ │ ├── DoRefreshTest.java │ │ │ │ └── mapper │ │ │ │ │ └── IcebergCatalogMapperTest.java │ │ │ │ └── statementtest │ │ │ │ ├── GrantRevokeStatementTest.java │ │ │ │ ├── SetColumnPolicyTagStatementTest.java │ │ │ │ ├── SetHistoryPolicyStatementTest.java │ │ │ │ ├── SetSharingPolicyStatementTest.java │ │ │ │ ├── SetTablePolicyStatementTest.java │ │ │ │ └── SetTableReplicationPolicyStatementTest.java │ │ │ └── resources │ │ │ └── dummy.token │ └── openhouse-spark-runtime │ │ ├── build.gradle │ │ └── src │ │ └── main │ │ ├── antlr │ │ └── com │ │ │ └── linkedin │ │ │ └── openhouse │ │ │ └── spark │ │ │ └── sql │ │ │ └── catalyst │ │ │ └── parser │ │ │ └── extensions │ │ │ └── OpenhouseSqlExtensions.g4 │ │ ├── java │ │ └── com │ │ │ └── linkedin │ │ │ └── openhouse │ │ │ └── spark │ │ │ └── OpenHouseCatalog.java │ │ └── scala │ │ └── com │ │ └── linkedin │ │ └── openhouse │ │ └── spark │ │ ├── extensions │ │ └── OpenhouseSparkSessionExtensions.scala │ │ └── sql │ │ ├── catalyst │ │ ├── constants │ │ │ └── Principal.scala │ │ ├── enums │ │ │ └── GrantableResourceTypes.scala │ │ ├── parser │ │ │ └── extensions │ │ │ │ ├── OpenhouseSparkSqlExtensionsParser.scala │ │ │ │ └── OpenhouseSqlExtensionsAstBuilder.scala │ │ └── plans │ │ │ └── logical │ │ │ ├── GrantRevokeStatement.scala │ │ │ ├── SetColumnPolicyTag.scala │ │ │ ├── SetHistoryPolicy.scala │ │ │ ├── SetReplicationPolicy.scala │ │ │ ├── SetRetentionPolicy.scala │ │ │ ├── SetSharingPolicy.scala │ │ │ ├── ShowGrantsStatement.scala │ │ │ └── UnSetReplicationPolicy.scala │ │ └── execution │ │ └── datasources │ │ └── v2 │ │ ├── GrantRevokeStatementExec.scala │ │ ├── OpenhouseDataSourceV2Strategy.scala │ │ ├── SetColumnPolicyTagExec.scala │ │ ├── SetHistoryPolicyExec.scala │ │ ├── SetReplicationPolicyExec.scala │ │ ├── SetRetentionPolicyExec.scala │ │ ├── SetSharingPolicyExec.scala │ │ ├── ShowGrantsStatementExec.scala │ │ ├── UnSetReplicationPolicyExec.scala │ │ └── mapper │ │ └── IcebergCatalogMapper.scala │ └── spark-3.5 │ ├── openhouse-spark-itest │ ├── build.gradle │ └── src │ │ └── test │ │ ├── java │ │ └── com │ │ │ └── linkedin │ │ │ └── openhouse │ │ │ └── spark │ │ │ ├── MockHelpersSpark3_5.java │ │ │ ├── catalogtest │ │ │ ├── CTASNonNullTestSpark3_5.java │ │ │ └── PartitionTestSpark3_5.java │ │ │ └── e2e │ │ │ ├── ddl │ │ │ ├── AlterTableSchemaTestSpark3_5.java │ │ │ ├── AlterTableTestSpark3_5.java │ │ │ ├── CreateTablePartitionedTestSpark3_5.java │ │ │ ├── CreateTableTestSpark3_5.java │ │ │ ├── CreateTableWithPropsTestSpark3_5.java │ │ │ ├── DescribeTableTestSpark3_5.java │ │ │ ├── DropTableTestSpark3_5.java │ │ │ └── ShowTablesTestSpark3_5.java │ │ │ └── dml │ │ │ └── InsertIntoTableTestSpark3_5.java │ │ └── resources │ │ └── dummy.token │ └── openhouse-spark-runtime │ ├── build.gradle │ └── src │ └── main │ └── scala │ └── com │ └── linkedin │ └── openhouse │ └── spark │ └── sql │ ├── catalyst │ ├── parser │ │ └── extensions │ │ │ └── OpenhouseSparkSqlExtensionsParser.scala │ └── plans │ │ └── logical │ │ ├── GrantRevokeStatement.scala │ │ ├── SetColumnPolicyTag.scala │ │ ├── SetHistoryPolicy.scala │ │ ├── SetReplicationPolicy.scala │ │ ├── SetRetentionPolicy.scala │ │ ├── SetSharingPolicy.scala │ │ ├── ShowGrantsStatement.scala │ │ └── UnSetReplicationPolicy.scala │ └── execution │ └── datasources │ └── v2 │ ├── GrantRevokeStatementExec.scala │ ├── SetColumnPolicyTagExec.scala │ ├── SetHistoryPolicyExec.scala │ ├── SetReplicationPolicyExec.scala │ ├── SetRetentionPolicyExec.scala │ ├── SetSharingPolicyExec.scala │ ├── ShowGrantsStatementExec.scala │ └── UnSetReplicationPolicyExec.scala ├── jobs-scheduler.Dockerfile ├── jobs-service.Dockerfile ├── libs └── datalayout │ ├── build.gradle │ └── src │ ├── main │ └── java │ │ └── com │ │ └── linkedin │ │ └── openhouse │ │ └── datalayout │ │ ├── config │ │ └── DataCompactionConfig.java │ │ ├── datasource │ │ ├── DataSource.java │ │ ├── FileStat.java │ │ ├── PartitionStat.java │ │ ├── SnapshotStat.java │ │ ├── TableFileStats.java │ │ ├── TablePartitionStats.java │ │ └── TableSnapshotStats.java │ │ ├── generator │ │ ├── DataLayoutStrategyGenerator.java │ │ └── OpenHouseDataLayoutStrategyGenerator.java │ │ ├── persistence │ │ ├── StrategiesDao.java │ │ └── StrategiesDaoTableProps.java │ │ ├── ranker │ │ ├── BaseDataLayoutCandidateSelector.java │ │ ├── DataLayoutCandidateSelector.java │ │ ├── DataLayoutStrategyScorer.java │ │ ├── GreedyMaxBudgetCandidateSelector.java │ │ └── SimpleWeightedSumDataLayoutStrategyScorer.java │ │ └── strategy │ │ ├── DataLayoutStrategy.java │ │ └── ScoredDataLayoutStrategy.java │ └── test │ └── java │ └── com │ └── linkedin │ └── openhouse │ └── datalayout │ ├── datasource │ ├── TableFileStatsTest.java │ ├── TablePartitionStatsTest.java │ └── TableSnapshotStatsTest.java │ ├── e2e │ └── IntegrationTest.java │ ├── generator │ └── OpenHouseDataLayoutStrategyGeneratorTest.java │ ├── persistence │ └── StrategiesDaoTablePropsTest.java │ └── ranker │ └── WeightedSumDataLayoutStrategyScorerTest.java ├── run.sh ├── scripts ├── git-hooks │ ├── pre-commit │ └── pre-push ├── java │ └── tools │ │ └── dummytokens │ │ ├── build.gradle │ │ └── src │ │ └── main │ │ └── java │ │ └── com │ │ └── linkedin │ │ └── openhouse │ │ └── tools │ │ └── dummytokens │ │ └── DummyTokenGenerator.java ├── python │ ├── README.md │ ├── integration_test.py │ ├── livy_cli.py │ └── requirements.txt └── spec_update.sh ├── services ├── common │ ├── build.gradle │ └── src │ │ ├── main │ │ ├── java │ │ │ └── com │ │ │ │ └── linkedin │ │ │ │ └── openhouse │ │ │ │ └── common │ │ │ │ ├── JobState.java │ │ │ │ ├── api │ │ │ │ ├── spec │ │ │ │ │ ├── ApiRequest.java │ │ │ │ │ ├── ApiResponse.java │ │ │ │ │ ├── ErrorResponseBody.java │ │ │ │ │ └── TableUri.java │ │ │ │ └── validator │ │ │ │ │ ├── ApiValidatorUtil.java │ │ │ │ │ └── ValidatorConstants.java │ │ │ │ ├── audit │ │ │ │ ├── AuditHandler.java │ │ │ │ ├── CachingRequestBodyFilter.java │ │ │ │ ├── DummyServiceAuditHandler.java │ │ │ │ ├── ServiceAuditAspect.java │ │ │ │ └── model │ │ │ │ │ ├── BaseAuditEvent.java │ │ │ │ │ ├── ServiceAuditEvent.java │ │ │ │ │ └── ServiceName.java │ │ │ │ ├── config │ │ │ │ └── BaseApplicationConfig.java │ │ │ │ ├── exception │ │ │ │ ├── AlreadyExistsException.java │ │ │ │ ├── EntityConcurrentModificationException.java │ │ │ │ ├── InvalidSchemaEvolutionException.java │ │ │ │ ├── JobEngineException.java │ │ │ │ ├── JobStateConflictException.java │ │ │ │ ├── NoSuchEntityException.java │ │ │ │ ├── NoSuchJobException.java │ │ │ │ ├── NoSuchUserTableException.java │ │ │ │ ├── OpenHouseCommitStateUnknownException.java │ │ │ │ ├── RequestValidationFailureException.java │ │ │ │ ├── ResourceGatedByToggledOnFeatureException.java │ │ │ │ ├── UnprocessableEntityException.java │ │ │ │ ├── UnsupportedClientOperationException.java │ │ │ │ └── handler │ │ │ │ │ └── OpenHouseExceptionHandler.java │ │ │ │ ├── metrics │ │ │ │ └── MetricsConstant.java │ │ │ │ ├── provider │ │ │ │ └── HttpConnectionPoolProviderConfig.java │ │ │ │ ├── schema │ │ │ │ └── IcebergSchemaHelper.java │ │ │ │ ├── security │ │ │ │ ├── AuthenticationUtils.java │ │ │ │ ├── DummyAuthenticationContext.java │ │ │ │ └── DummyTokenInterceptor.java │ │ │ │ └── stats │ │ │ │ └── model │ │ │ │ ├── BaseTableMetadata.java │ │ │ │ ├── HistoryPolicyStatsSchema.java │ │ │ │ ├── IcebergTableStats.java │ │ │ │ ├── PolicyStats.java │ │ │ │ └── RetentionStatsSchema.java │ │ └── resources │ │ │ └── dummy.token │ │ ├── test │ │ ├── java │ │ │ └── com │ │ │ │ └── linkedin │ │ │ │ └── openhouse │ │ │ │ └── common │ │ │ │ ├── schema │ │ │ │ └── IcebergSchemaHelperTest.java │ │ │ │ └── security │ │ │ │ └── DummySecurityInterceptorTest.java │ │ └── resources │ │ │ ├── noisy-schema.json │ │ │ ├── one-line-schema.json │ │ │ └── schema.json │ │ └── testFixtures │ │ └── java │ │ └── com │ │ └── linkedin │ │ └── openhouse │ │ └── common │ │ └── test │ │ ├── cluster │ │ └── PropertyOverrideContextInitializer.java │ │ └── schema │ │ └── ResourceIoHelper.java ├── housetables │ ├── build.gradle │ └── src │ │ ├── main │ │ ├── java │ │ │ └── com │ │ │ │ └── linkedin │ │ │ │ └── openhouse │ │ │ │ ├── HouseTablesSpringApplication.java │ │ │ │ └── housetables │ │ │ │ ├── api │ │ │ │ ├── ApiConfig.java │ │ │ │ ├── handler │ │ │ │ │ ├── HouseTablesApiHandler.java │ │ │ │ │ ├── JobTableHtsApiHandler.java │ │ │ │ │ ├── OpenHouseJobTableHtsApiHandler.java │ │ │ │ │ ├── OpenHouseToggleStatusesApiHandler.java │ │ │ │ │ ├── OpenHouseUserTableHtsApiHandler.java │ │ │ │ │ ├── ToggleStatusesApiHandler.java │ │ │ │ │ └── UserTableHtsApiHandler.java │ │ │ │ ├── spec │ │ │ │ │ ├── model │ │ │ │ │ │ ├── Job.java │ │ │ │ │ │ ├── JobKey.java │ │ │ │ │ │ ├── TableToggleStatusKey.java │ │ │ │ │ │ ├── ToggleStatus.java │ │ │ │ │ │ ├── ToggleStatusEnum.java │ │ │ │ │ │ ├── UserTable.java │ │ │ │ │ │ └── UserTableKey.java │ │ │ │ │ ├── request │ │ │ │ │ │ └── CreateUpdateEntityRequestBody.java │ │ │ │ │ └── response │ │ │ │ │ │ ├── EntityResponseBody.java │ │ │ │ │ │ └── GetAllEntityResponseBody.java │ │ │ │ └── validator │ │ │ │ │ ├── HouseTablesApiValidator.java │ │ │ │ │ └── impl │ │ │ │ │ ├── OpenHouseJobTablesHtsApiValidator.java │ │ │ │ │ └── OpenHouseUserTableHtsApiValidator.java │ │ │ │ ├── config │ │ │ │ ├── MainApplicationConfig.java │ │ │ │ └── db │ │ │ │ │ ├── DatabaseConfiguration.java │ │ │ │ │ ├── iceberg │ │ │ │ │ └── IcebergProviderConfiguration.java │ │ │ │ │ └── jdbc │ │ │ │ │ └── JdbcProviderConfiguration.java │ │ │ │ ├── controller │ │ │ │ ├── JobTablesController.java │ │ │ │ ├── ToggleStatusesController.java │ │ │ │ └── UserHouseTablesController.java │ │ │ │ ├── dto │ │ │ │ ├── mapper │ │ │ │ │ ├── JobMapper.java │ │ │ │ │ ├── UserTableVersionMapper.java │ │ │ │ │ └── UserTablesMapper.java │ │ │ │ └── model │ │ │ │ │ ├── JobDto.java │ │ │ │ │ ├── UserTableDto.java │ │ │ │ │ └── Utilities.java │ │ │ │ ├── model │ │ │ │ ├── JobRow.java │ │ │ │ ├── JobRowPrimaryKey.java │ │ │ │ ├── TableToggleRule.java │ │ │ │ ├── UserTableRow.java │ │ │ │ └── UserTableRowPrimaryKey.java │ │ │ │ ├── repository │ │ │ │ ├── HtsRepository.java │ │ │ │ └── impl │ │ │ │ │ ├── iceberg │ │ │ │ │ ├── JobTableHtsRepository.java │ │ │ │ │ └── UserTableHtsRepository.java │ │ │ │ │ └── jdbc │ │ │ │ │ ├── JobTableHtsJdbcRepository.java │ │ │ │ │ ├── ToggleStatusHtsJdbcRepository.java │ │ │ │ │ └── UserTableHtsJdbcRepository.java │ │ │ │ └── services │ │ │ │ ├── JobsService.java │ │ │ │ ├── JobsServiceImpl.java │ │ │ │ ├── TableToggleRuleMatcher.java │ │ │ │ ├── ToggleStatusesService.java │ │ │ │ ├── ToggleStatusesServiceImpl.java │ │ │ │ ├── UserTablesService.java │ │ │ │ ├── UserTablesServiceImpl.java │ │ │ │ └── WildcardTableToggleRuleMatcher.java │ │ └── resources │ │ │ ├── application.properties │ │ │ ├── data.sql │ │ │ └── schema.sql │ │ └── test │ │ ├── java │ │ └── com │ │ │ └── linkedin │ │ │ └── openhouse │ │ │ └── housetables │ │ │ ├── dto │ │ │ └── model │ │ │ │ └── UtilitiesTest.java │ │ │ ├── e2e │ │ │ ├── SpringH2HtsApplication.java │ │ │ ├── job │ │ │ │ ├── JobControllerTest.java │ │ │ │ ├── JobRepositoryTest.java │ │ │ │ └── JobServiceTest.java │ │ │ ├── togglerule │ │ │ │ ├── ToggleStatusControllerTest.java │ │ │ │ ├── ToggleStatusRepositoryTest.java │ │ │ │ ├── ToggleStatusesServiceTest.java │ │ │ │ └── ToggleStatusesTestConstants.java │ │ │ └── usertable │ │ │ │ ├── HtsControllerTest.java │ │ │ │ ├── HtsRepositoryTest.java │ │ │ │ └── UserTablesServiceTest.java │ │ │ ├── mock │ │ │ ├── MockHouseTablesApplication.java │ │ │ ├── MockUserTableHtsApiHandler.java │ │ │ ├── TestOpenHouseHtsControllerConfig.java │ │ │ ├── WildcardTableToggleRuleMatcherTest.java │ │ │ ├── api │ │ │ │ ├── OpenHouseJobTablesValidatorTest.java │ │ │ │ └── OpenHouseUserTablesValidatorTest.java │ │ │ ├── controller │ │ │ │ └── UserHouseTablesControllerTest.java │ │ │ └── mapper │ │ │ │ ├── JobsMapperTest.java │ │ │ │ ├── UserTableVersionMapperTest.java │ │ │ │ └── UserTablesMapperTest.java │ │ │ └── model │ │ │ ├── ServiceAuditModelConstants.java │ │ │ ├── TestHouseTableModelConstants.java │ │ │ └── TestHtsApiConstants.java │ │ └── resources │ │ ├── application.properties │ │ └── data.sql ├── jobs │ ├── build.gradle │ └── src │ │ ├── main │ │ ├── java │ │ │ └── com │ │ │ │ └── linkedin │ │ │ │ └── openhouse │ │ │ │ └── jobs │ │ │ │ ├── JobsSpringApplication.java │ │ │ │ ├── api │ │ │ │ ├── handler │ │ │ │ │ ├── JobsApiHandler.java │ │ │ │ │ └── impl │ │ │ │ │ │ └── OpenHouseJobsApiHandler.java │ │ │ │ ├── spec │ │ │ │ │ ├── request │ │ │ │ │ │ └── CreateJobRequestBody.java │ │ │ │ │ └── response │ │ │ │ │ │ └── JobResponseBody.java │ │ │ │ └── validator │ │ │ │ │ ├── JobsApiValidator.java │ │ │ │ │ └── impl │ │ │ │ │ └── OpenHouseJobsApiValidator.java │ │ │ │ ├── config │ │ │ │ ├── JobLaunchConf.java │ │ │ │ ├── JobsEngineProperties.java │ │ │ │ ├── JobsProperties.java │ │ │ │ └── MainApplicationConfig.java │ │ │ │ ├── controller │ │ │ │ ├── JobsController.java │ │ │ │ └── SwaggerConfig.java │ │ │ │ ├── dto │ │ │ │ └── mapper │ │ │ │ │ └── JobsMapper.java │ │ │ │ ├── model │ │ │ │ ├── JobConf.java │ │ │ │ ├── JobConfConverter.java │ │ │ │ ├── JobDto.java │ │ │ │ └── JobDtoPrimaryKey.java │ │ │ │ ├── repository │ │ │ │ ├── JobsInternalRepository.java │ │ │ │ ├── JobsInternalRepositoryImpl.java │ │ │ │ └── exception │ │ │ │ │ ├── JobNotFoundException.java │ │ │ │ │ ├── JobsInternalRepositoryTimeoutException.java │ │ │ │ │ ├── JobsInternalRepositoryUnavailableException.java │ │ │ │ │ ├── JobsTableCallerException.java │ │ │ │ │ └── JobsTableConcurrentUpdateException.java │ │ │ │ └── services │ │ │ │ ├── BatchJobInfo.java │ │ │ │ ├── HouseJobHandle.java │ │ │ │ ├── HouseJobsCoordinator.java │ │ │ │ ├── JobInfo.java │ │ │ │ ├── JobsCoordinatorManager.java │ │ │ │ ├── JobsRegistry.java │ │ │ │ ├── JobsService.java │ │ │ │ ├── JobsServiceImpl.java │ │ │ │ └── livy │ │ │ │ ├── LivyJobHandle.java │ │ │ │ └── LivyJobsCoordinator.java │ │ └── resources │ │ │ └── application.properties │ │ └── test │ │ ├── http │ │ ├── http-client.env.json │ │ ├── orphan_files_deletion_job.http │ │ ├── retention_job.http │ │ ├── snapshot_expiration_job.http │ │ ├── sql_test_job.http │ │ └── staged_files_deletion_job.http │ │ ├── java │ │ └── com │ │ │ └── linkedin │ │ │ └── openhouse │ │ │ └── jobs │ │ │ └── mock │ │ │ ├── CustomClusterPropertiesInitializer.java │ │ │ ├── JobModelConstants.java │ │ │ ├── JobsApiValidatorTest.java │ │ │ ├── JobsControllerTest.java │ │ │ ├── JobsCoordinatorManagerTest.java │ │ │ ├── JobsInternalRepositoryImplTest.java │ │ │ ├── JobsMapperTest.java │ │ │ ├── JobsPropertiesTest.java │ │ │ ├── JobsRegistryTest.java │ │ │ ├── JobsServiceTest.java │ │ │ ├── LivyJobHandleTest.java │ │ │ ├── LivyJobsCoordinatorTest.java │ │ │ ├── MockJobsApiHandler.java │ │ │ ├── RequestConstants.java │ │ │ └── ServiceAuditModelConstants.java │ │ └── resources │ │ ├── test-local-cluster.yaml │ │ └── test-local-jobs.yaml └── tables │ ├── build.gradle │ └── src │ ├── main │ ├── java │ │ └── com │ │ │ └── linkedin │ │ │ └── openhouse │ │ │ └── tables │ │ │ ├── TablesSpringApplication.java │ │ │ ├── api │ │ │ ├── ApiConfig.java │ │ │ ├── handler │ │ │ │ ├── DatabasesApiHandler.java │ │ │ │ ├── IcebergSnapshotsApiHandler.java │ │ │ │ ├── TablesApiHandler.java │ │ │ │ └── impl │ │ │ │ │ ├── OpenHouseDatabasesApiHandler.java │ │ │ │ │ ├── OpenHouseIcebergSnapshotsApiHandler.java │ │ │ │ │ └── OpenHouseTablesApiHandler.java │ │ │ ├── spec │ │ │ │ ├── v0 │ │ │ │ │ ├── request │ │ │ │ │ │ ├── CreateUpdateLockRequestBody.java │ │ │ │ │ │ ├── CreateUpdateTableRequestBody.java │ │ │ │ │ │ ├── IcebergSnapshotsRequestBody.java │ │ │ │ │ │ ├── UpdateAclPoliciesRequestBody.java │ │ │ │ │ │ └── components │ │ │ │ │ │ │ ├── ClusteringColumn.java │ │ │ │ │ │ │ ├── History.java │ │ │ │ │ │ │ ├── LockState.java │ │ │ │ │ │ │ ├── Policies.java │ │ │ │ │ │ │ ├── PolicyTag.java │ │ │ │ │ │ │ ├── Replication.java │ │ │ │ │ │ │ ├── ReplicationConfig.java │ │ │ │ │ │ │ ├── Retention.java │ │ │ │ │ │ │ ├── RetentionColumnPattern.java │ │ │ │ │ │ │ ├── TimePartitionSpec.java │ │ │ │ │ │ │ └── Transform.java │ │ │ │ │ └── response │ │ │ │ │ │ ├── GetAclPoliciesResponseBody.java │ │ │ │ │ │ ├── GetAllDatabasesResponseBody.java │ │ │ │ │ │ ├── GetAllTablesResponseBody.java │ │ │ │ │ │ ├── GetDatabaseResponseBody.java │ │ │ │ │ │ ├── GetTableResponseBody.java │ │ │ │ │ │ └── components │ │ │ │ │ │ └── AclPolicy.java │ │ │ │ └── v1 │ │ │ │ │ └── request │ │ │ │ │ └── CreateUpdateTableRequestBody.java │ │ │ └── validator │ │ │ │ ├── DatabasesApiValidator.java │ │ │ │ ├── IcebergSnapshotsApiValidator.java │ │ │ │ ├── TablesApiValidator.java │ │ │ │ └── impl │ │ │ │ ├── ClusteringSpecValidator.java │ │ │ │ ├── HistoryPolicySpecValidator.java │ │ │ │ ├── IcebergSnapshotsApiValidatorImpl.java │ │ │ │ ├── OpenHouseDatabasesApiValidator.java │ │ │ │ ├── OpenHouseTablesApiValidator.java │ │ │ │ ├── PolicySpecValidator.java │ │ │ │ ├── ReplicationConfigValidator.java │ │ │ │ └── RetentionPolicySpecValidator.java │ │ │ ├── audit │ │ │ ├── DummyTableAuditHandler.java │ │ │ ├── TableAuditAspect.java │ │ │ └── model │ │ │ │ ├── OperationStatus.java │ │ │ │ ├── OperationType.java │ │ │ │ └── TableAuditEvent.java │ │ │ ├── authorization │ │ │ ├── AuthorizationHandler.java │ │ │ ├── AuthorizationInterceptor.java │ │ │ ├── OpaAuthorizationHandler.java │ │ │ ├── OpaHandler.java │ │ │ └── Privileges.java │ │ │ ├── common │ │ │ ├── DefaultColumnPattern.java │ │ │ ├── ReplicationInterval.java │ │ │ └── TableType.java │ │ │ ├── config │ │ │ ├── AuthorizationConfig.java │ │ │ ├── MainApplicationConfig.java │ │ │ ├── TablesMvcConfigurer.java │ │ │ ├── TablesMvcConstants.java │ │ │ ├── TblPropsToggleRegistry.java │ │ │ └── TblPropsToggleRegistryBaseImpl.java │ │ │ ├── controller │ │ │ ├── DatabasesController.java │ │ │ ├── IcebergSnapshotsController.java │ │ │ └── TablesController.java │ │ │ ├── dto │ │ │ └── mapper │ │ │ │ ├── DatabasesMapper.java │ │ │ │ ├── TablesMapper.java │ │ │ │ ├── TablesMapperHelper.java │ │ │ │ ├── attribute │ │ │ │ ├── ClusteringSpecConverter.java │ │ │ │ ├── PoliciesSpecConverter.java │ │ │ │ └── TimePartitionSpecConverter.java │ │ │ │ └── iceberg │ │ │ │ ├── PartitionSpecMapper.java │ │ │ │ ├── PoliciesSpecMapper.java │ │ │ │ └── TableTypeMapper.java │ │ │ ├── model │ │ │ ├── DatabaseDto.java │ │ │ ├── DatabaseDtoPrimaryKey.java │ │ │ ├── TableDto.java │ │ │ └── TableDtoPrimaryKey.java │ │ │ ├── repository │ │ │ ├── OpenHouseInternalRepository.java │ │ │ ├── PreservedKeyChecker.java │ │ │ ├── SchemaValidator.java │ │ │ └── impl │ │ │ │ ├── BaseIcebergSchemaValidator.java │ │ │ │ ├── BasePreservedKeyChecker.java │ │ │ │ ├── InternalRepositoryUtils.java │ │ │ │ ├── MetricsAspect.java │ │ │ │ ├── OpenHouseInternalRepositoryImpl.java │ │ │ │ ├── PreservedPropsToggleEnabler.java │ │ │ │ └── Timed.java │ │ │ ├── services │ │ │ ├── DatabasesService.java │ │ │ ├── DatabasesServiceImpl.java │ │ │ ├── IcebergSnapshotsService.java │ │ │ ├── IcebergSnapshotsServiceImpl.java │ │ │ ├── TablesService.java │ │ │ └── TablesServiceImpl.java │ │ │ ├── toggle │ │ │ ├── BaseTableFeatureToggle.java │ │ │ ├── FeatureToggleAspect.java │ │ │ ├── TableFeatureToggle.java │ │ │ ├── ToggleStatusMapper.java │ │ │ ├── model │ │ │ │ ├── TableToggleStatus.java │ │ │ │ └── ToggleStatusKey.java │ │ │ └── repository │ │ │ │ ├── ToggleStatusesRepository.java │ │ │ │ └── ToggleStatusesRepositoryImpl.java │ │ │ └── utils │ │ │ ├── AuthorizationUtils.java │ │ │ ├── IntervalToCronConverter.java │ │ │ └── TableUUIDGenerator.java │ └── resources │ │ ├── application.properties │ │ └── static │ │ └── favicon.ico │ └── test │ ├── java │ └── com │ │ └── linkedin │ │ └── openhouse │ │ └── tables │ │ ├── api │ │ └── validator │ │ │ └── impl │ │ │ ├── HistoryPolicySpecValidatorTest.java │ │ │ └── RetentionPolicySpecValidatorTest.java │ │ ├── config │ │ ├── MainApplicationConfigTest.java │ │ └── MockMvcBuilderConfig.java │ │ ├── e2e │ │ └── h2 │ │ │ ├── DatabasesControllerTest.java │ │ │ ├── DatabasesServiceTest.java │ │ │ ├── DualStorageTest.java │ │ │ ├── HouseTablesH2Repository.java │ │ │ ├── RepositoryTest.java │ │ │ ├── RepositoryTestWithSettableComponents.java │ │ │ ├── RequestAndValidateHelper.java │ │ │ ├── SnapshotsControllerTest.java │ │ │ ├── SpringH2Application.java │ │ │ ├── TablesControllerTest.java │ │ │ ├── TablesServiceTest.java │ │ │ ├── ToggleH2StatusesRepository.java │ │ │ ├── ToggleStatusMapperTest.java │ │ │ └── ValidationUtilities.java │ │ ├── mock │ │ ├── BaseTableFeatureToggleTest.java │ │ ├── MockDatabasesApiHandler.java │ │ ├── MockIcebergSnapshotApiHandler.java │ │ ├── MockTablesApiHandler.java │ │ ├── MockTablesApplication.java │ │ ├── RequestConstants.java │ │ ├── api │ │ │ ├── DatabasesValidatorTest.java │ │ │ ├── IcebergSnapshotsApiValidatorTest.java │ │ │ └── TablesValidatorTest.java │ │ ├── audit │ │ │ ├── DatabasesApiHandlerAuditTest.java │ │ │ ├── IcebergSnapshotsApiHandlerAuditTest.java │ │ │ └── TablesApiHandlerAuditTest.java │ │ ├── authorization │ │ │ └── TestPrivileges.java │ │ ├── controller │ │ │ ├── IcebergSnapshotsControllerTest.java │ │ │ ├── MockUnauthenticatedSecurityContextFactory.java │ │ │ ├── MockUnauthenticatedUser.java │ │ │ └── TablesControllerTest.java │ │ ├── mapper │ │ │ ├── DatabasesMapperTest.java │ │ │ ├── PartitionSpecMapperTest.java │ │ │ ├── PoliciesSpecMapperTest.java │ │ │ └── TablesMapperTest.java │ │ ├── properties │ │ │ ├── AuthorizationPropertiesInitializer.java │ │ │ ├── AuthorizationPropertiesTest.java │ │ │ ├── ClusterPropertiesTest.java │ │ │ ├── CustomClusterPropertiesInitializer.java │ │ │ ├── CustomClusterPropertiesTest.java │ │ │ ├── DefaultClusterPropertiesInitializer.java │ │ │ ├── DefaultClusterPropertiesTest.java │ │ │ └── InvalidHandlerInterceptorResource.java │ │ ├── service │ │ │ ├── IcebergSnapshotsServiceTest.java │ │ │ └── TablesServiceTest.java │ │ ├── storage │ │ │ ├── StorageManagerTest.java │ │ │ ├── StoragePropertiesConfigTest.java │ │ │ ├── StorageTypeEnumTest.java │ │ │ ├── adls │ │ │ │ ├── AdlsStorageClientTest.java │ │ │ │ └── AdlsStorageTest.java │ │ │ ├── base │ │ │ │ └── BaseStorageTest.java │ │ │ ├── hdfs │ │ │ │ ├── HdfsDelegationTokenRefresherDisabledTest.java │ │ │ │ ├── HdfsDelegationTokenRefresherEnabledTest.java │ │ │ │ ├── HdfsStorageClientTest.java │ │ │ │ └── HdfsStorageTest.java │ │ │ ├── local │ │ │ │ ├── LocalStorageClientTest.java │ │ │ │ └── LocalStorageTest.java │ │ │ └── s3 │ │ │ │ ├── S3StorageClientTest.java │ │ │ │ └── S3StorageTest.java │ │ └── uuid │ │ │ ├── TableUUIDGeneratorMultiStorageTest.java │ │ │ └── TableUUIDGeneratorTest.java │ │ ├── model │ │ ├── DatabaseModelConstants.java │ │ ├── IcebergSnapshotsModelTestUtilities.java │ │ ├── ServiceAuditModelConstants.java │ │ ├── TableAuditModelConstants.java │ │ ├── TableDtoMappingTest.java │ │ └── TableModelConstants.java │ │ ├── repository │ │ └── impl │ │ │ ├── InternalRepositoryUtilsTest.java │ │ │ └── SettableInternalRepositoryForTest.java │ │ ├── settable │ │ ├── SettableCatalogForTest.java │ │ └── SettableTestConfig.java │ │ └── toggle │ │ └── FeatureToggleAspectTest.java │ └── resources │ ├── cluster-test-properties.yaml │ ├── dummy_healthy_schema.json │ ├── dummy_snapshot_serialized.json │ ├── dummy_unhealthy_cluster_schema.json │ ├── evolved_dummy_healthy_schema.json │ ├── evolved_dummy_healthy_schema_reorder.json │ ├── field_update │ ├── base.json │ └── base_int2long.json │ └── invalid_type_promote.json ├── settings.gradle ├── tables-service.Dockerfile └── tables-test-fixtures ├── tables-test-fixtures-iceberg-1.2 ├── build.gradle └── src │ ├── main │ ├── java │ │ └── com │ │ │ └── linkedin │ │ │ └── openhouse │ │ │ └── tablestest │ │ │ ├── HouseTablesH2Repository.java │ │ │ ├── OpenHouseLocalServer.java │ │ │ ├── OpenHouseSparkITest.java │ │ │ ├── SpringH2TestApplication.java │ │ │ ├── TestSparkSessionUtil.java │ │ │ ├── ToggleH2StatusesRepository.java │ │ │ └── annotation │ │ │ ├── CustomParameterResolver.java │ │ │ └── MappedParameterContext.java │ └── resources │ │ └── dummy.token │ └── test │ └── java │ └── com │ └── linkedin │ └── openhouse │ └── tablestest │ ├── OpenHouseLocalServerTest.java │ └── TomcatServerBootTest.java └── tables-test-fixtures-iceberg-1.5 └── build.gradle /.github/ISSUE_TEMPLATE/openhouse_rfc.yml: -------------------------------------------------------------------------------- 1 | name: RFC (Request for Comments) 2 | description: Request for comments on a feature 3 | title: "[RFC] " 4 | labels: 5 | - feat 6 | body: 7 | - type: markdown 8 | attributes: 9 | value: | 10 | **Proposal** 11 | A clear and concise description of the proposed idea or feature. 12 | - type: textarea 13 | id: motivation 14 | attributes: 15 | label: Motivation 16 | description: Why do you think this idea or feature is important? 17 | - type: textarea 18 | id: proposal 19 | attributes: 20 | label: Proposal 21 | description: Describe the proposed solution or implementation. 22 | - type: textarea 23 | id: alternatives 24 | attributes: 25 | label: Alternatives 26 | description: Have you considered any alternatives? If so, please describe them. 27 | - type: textarea 28 | id: additional_notes 29 | attributes: 30 | label: Additional Notes 31 | description: Add any additional notes or context about the proposal. 32 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # Keep GitHub Actions up to date with GitHub's Dependabot... 2 | # https://docs.github.com/en/code-security/dependabot/working-with-dependabot/keeping-your-actions-up-to-date-with-dependabot 3 | # https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file#package-ecosystem 4 | version: 2 5 | updates: 6 | - package-ecosystem: github-actions 7 | directory: / 8 | groups: 9 | github-actions: 10 | patterns: 11 | - "*" # Group all Actions updates into a single larger pull request 12 | schedule: 13 | interval: weekly 14 | ignore: 15 | - dependency-name: "actions/setup-java" 16 | - dependency-name: "gradle/wrapper-validation-action" 17 | -------------------------------------------------------------------------------- /.github/workflows/pr-validations.yml: -------------------------------------------------------------------------------- 1 | name: Pull Request Validations 2 | 3 | on: 4 | pull_request: 5 | types: [opened, synchronize, reopened] 6 | branches: 7 | - main 8 | 9 | concurrency: 10 | group: ci-${{ github.event.pull_request.number || github.ref }} 11 | cancel-in-progress: true 12 | 13 | jobs: 14 | build-run-tests: 15 | uses: ./.github/workflows/build-run-tests.yml 16 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | Copyright 2024 LinkedIn Corporation 2 | All Rights Reserved. 3 | 4 | Licensed under the BSD 2-Clause License (the "License"). See License in the project root for license information. 5 | 6 | This product includes: 7 | 8 | * N/A -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Responsible Disclosure of Security Vulnerabilities 2 | 3 | Please do not file reports on GitHub for security issues. Please 4 | review the guidelines for reporting [Security Vulnerabilities](https://www.linkedin.com/help/linkedin/answer/62924/security-vulnerabilities?lang=en). 5 | 6 | Alternatively, reports can be encrypted using PGP ([public key](https://www.linkedin.com/help/linkedin/answer/79676)) 7 | and sent to security@linkedin.com, preferably with the title "GitHub 8 | linkedin/openhouse - <short summary>". 9 | -------------------------------------------------------------------------------- /apps/spark/src/main/java/com/linkedin/openhouse/jobs/exception/OperationTaskException.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.jobs.exception; 2 | 3 | /** Exception thrown when operation task fails */ 4 | public class OperationTaskException extends Exception { 5 | public OperationTaskException(String message) { 6 | super(message); 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /apps/spark/src/main/java/com/linkedin/openhouse/jobs/exception/TableValidationException.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.jobs.exception; 2 | 3 | /** Exception thrown when table validation fails */ 4 | public class TableValidationException extends RuntimeException { 5 | public TableValidationException(String message) { 6 | super(message); 7 | } 8 | 9 | public TableValidationException(String message, Throwable cause) { 10 | super(message, cause); 11 | }; 12 | } 13 | -------------------------------------------------------------------------------- /apps/spark/src/main/java/com/linkedin/openhouse/jobs/spark/BaseTableDirectorySparkApp.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.jobs.spark; 2 | 3 | import com.linkedin.openhouse.jobs.spark.state.StateManager; 4 | import org.apache.hadoop.fs.Path; 5 | 6 | /** Base table directory app class implemented for table directory specific operations */ 7 | public abstract class BaseTableDirectorySparkApp extends BaseSparkApp { 8 | protected final Path tableDirectoryPath; 9 | 10 | protected BaseTableDirectorySparkApp( 11 | String jobId, StateManager stateManager, Path tableDirectoryPath) { 12 | super(jobId, stateManager); 13 | this.tableDirectoryPath = tableDirectoryPath; 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /apps/spark/src/main/java/com/linkedin/openhouse/jobs/spark/NoOpSparkApp.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.jobs.spark; 2 | 3 | import com.linkedin.openhouse.jobs.spark.state.StateManager; 4 | import java.util.Collections; 5 | import lombok.extern.slf4j.Slf4j; 6 | import org.apache.commons.cli.CommandLine; 7 | 8 | @Slf4j 9 | public class NoOpSparkApp extends BaseSparkApp { 10 | public NoOpSparkApp(String jobId, StateManager stateManager) { 11 | super(jobId, stateManager); 12 | } 13 | 14 | @Override 15 | protected void runInner(Operations ops) { 16 | log.info(String.format("Hello from %s", ops.spark().sparkContext().appName())); 17 | } 18 | 19 | public static void main(String[] args) { 20 | CommandLine cmdLine = createCommandLine(args, Collections.emptyList()); 21 | NoOpSparkApp app = new NoOpSparkApp(getJobId(cmdLine), createStateManager(cmdLine)); 22 | app.run(); 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /apps/spark/src/main/java/com/linkedin/openhouse/jobs/util/DirectoryMetadata.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.jobs.util; 2 | 3 | import lombok.EqualsAndHashCode; 4 | import lombok.Getter; 5 | import lombok.ToString; 6 | import lombok.experimental.SuperBuilder; 7 | import org.apache.hadoop.fs.Path; 8 | 9 | /** Directory metadata, including path and creator. */ 10 | @Getter 11 | @SuperBuilder 12 | @EqualsAndHashCode(callSuper = true) 13 | @ToString 14 | public class DirectoryMetadata extends Metadata { 15 | private Path path; 16 | 17 | @Override 18 | public String getEntityName() { 19 | return getPath().toString(); 20 | } 21 | 22 | public String getBaseName() { 23 | return path.getName(); 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /apps/spark/src/main/java/com/linkedin/openhouse/jobs/util/HistoryConfig.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.jobs.util; 2 | 3 | import com.linkedin.openhouse.tables.client.model.History; 4 | import lombok.Builder; 5 | import lombok.EqualsAndHashCode; 6 | import lombok.Getter; 7 | import lombok.ToString; 8 | 9 | /** History Policy config class. This is app side representation of /tables policies->history */ 10 | @Builder 11 | @Getter 12 | @EqualsAndHashCode 13 | @ToString 14 | public class HistoryConfig { 15 | private final int maxAge; 16 | private final int versions; 17 | private final History.GranularityEnum granularity; 18 | } 19 | -------------------------------------------------------------------------------- /apps/spark/src/main/java/com/linkedin/openhouse/jobs/util/Metadata.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.jobs.util; 2 | 3 | import lombok.Builder; 4 | import lombok.EqualsAndHashCode; 5 | import lombok.Getter; 6 | import lombok.NonNull; 7 | import lombok.ToString; 8 | import lombok.experimental.SuperBuilder; 9 | 10 | /** Metadata, table or table directory can extend from it. */ 11 | @Getter 12 | @SuperBuilder 13 | @EqualsAndHashCode 14 | @ToString 15 | public abstract class Metadata { 16 | @NonNull @Builder.Default protected String creator = ""; 17 | 18 | public abstract String getEntityName(); 19 | } 20 | -------------------------------------------------------------------------------- /apps/spark/src/main/java/com/linkedin/openhouse/jobs/util/ReplicationConfig.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.jobs.util; 2 | 3 | import lombok.Builder; 4 | import lombok.EqualsAndHashCode; 5 | import lombok.Getter; 6 | import lombok.ToString; 7 | 8 | /** Table retention config class. This is app side representation of /tables policies->retention */ 9 | @Builder 10 | @Getter 11 | @EqualsAndHashCode 12 | @ToString 13 | public class ReplicationConfig { 14 | private final String schedule; 15 | private final String tableOwner; 16 | private final String cluster; 17 | } 18 | -------------------------------------------------------------------------------- /apps/spark/src/main/java/com/linkedin/openhouse/jobs/util/RetentionConfig.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.jobs.util; 2 | 3 | import com.linkedin.openhouse.tables.client.model.Retention; 4 | import lombok.Builder; 5 | import lombok.EqualsAndHashCode; 6 | import lombok.Getter; 7 | import lombok.ToString; 8 | 9 | /** Table retention config class. This is app side representation of /tables policies->retention */ 10 | @Builder 11 | @Getter 12 | @EqualsAndHashCode 13 | @ToString 14 | public class RetentionConfig { 15 | private final String columnName; 16 | private final String columnPattern; 17 | private final Retention.GranularityEnum granularity; 18 | private final int count; 19 | } 20 | -------------------------------------------------------------------------------- /apps/spark/src/main/java/com/linkedin/openhouse/jobs/util/TableDataLayoutMetadata.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.jobs.util; 2 | 3 | import com.linkedin.openhouse.datalayout.strategy.DataLayoutStrategy; 4 | import lombok.EqualsAndHashCode; 5 | import lombok.Getter; 6 | import lombok.NonNull; 7 | import lombok.ToString; 8 | import lombok.experimental.SuperBuilder; 9 | 10 | @Getter 11 | @SuperBuilder 12 | @EqualsAndHashCode(callSuper = true) 13 | @ToString(callSuper = true) 14 | public class TableDataLayoutMetadata extends TableMetadata { 15 | @NonNull protected DataLayoutStrategy dataLayoutStrategy; 16 | } 17 | -------------------------------------------------------------------------------- /apps/spark/src/main/resources/log4j2.properties: -------------------------------------------------------------------------------- 1 | # Extra logging related to initialization of Log4j 2 | # Set to debug or trace if log4j initialization is failing 3 | status = warn 4 | # Name of the configuration 5 | name = ConsoleLog 6 | 7 | # Console appender configuration 8 | appender.console.type = Console 9 | appender.console.name = consoleLogger 10 | appender.console.layout.type = PatternLayout 11 | appender.console.layout.pattern = %d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n 12 | 13 | # Root logger level 14 | rootLogger.level = info 15 | # Root logger referring to console appender 16 | rootLogger.appenderRef.stdout.ref = consoleLogger -------------------------------------------------------------------------------- /apps/spark/src/test/java/com/linkedin/openhouse/jobs/scheduler/JobsSchedulerTest.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.jobs.scheduler; 2 | 3 | import com.linkedin.openhouse.jobs.scheduler.tasks.TableRetentionTask; 4 | import org.junit.jupiter.api.Test; 5 | 6 | public class JobsSchedulerTest { 7 | @Test 8 | void testRegistryIsInitialized() { 9 | JobsScheduler.main( 10 | new String[] { 11 | "--type", TableRetentionTask.OPERATION_TYPE.getValue(), 12 | "--tablesURL", "http://test.openhouse.com", 13 | "--jobsURL", "unused", 14 | "--cluster", "unused", 15 | }); 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /apps/spark/src/test/java/com/linkedin/openhouse/jobs/util/SimpleRecord.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.jobs.util; 2 | 3 | import java.io.Serializable; 4 | import lombok.Getter; 5 | import lombok.Setter; 6 | 7 | @Setter 8 | @Getter 9 | public class SimpleRecord implements Serializable { 10 | // Getters and setters required 11 | private int id; 12 | private String data; 13 | 14 | // Constructor 15 | public SimpleRecord(int id, String data) { 16 | this.id = id; 17 | this.data = data; 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /buildSrc/build.gradle: -------------------------------------------------------------------------------- 1 | plugins { 2 | id 'groovy-gradle-plugin' 3 | } -------------------------------------------------------------------------------- /buildSrc/src/main/groovy/openhouse.hadoop-conventions.gradle: -------------------------------------------------------------------------------- 1 | ext { 2 | hadoopVersion = '2.10.0' 3 | } 4 | 5 | dependencies { 6 | implementation("org.apache.hadoop:hadoop-client:" + hadoopVersion) { 7 | exclude group: 'junit', module: 'junit' 8 | exclude group: 'javax', module: 'servlet-api' 9 | exclude group: 'com.zaxxer', module: 'HikariCP-java7' 10 | exclude group: 'org.apache.commons', module: 'commons-lang3' 11 | exclude group: 'com.codahale.metrics', module: 'metrics-core' 12 | exclude group: 'com.squareup.okhttp', module: 'okhttp' 13 | } 14 | implementation 'org.apache.commons:commons-lang3:3.12.0' 15 | } 16 | -------------------------------------------------------------------------------- /buildSrc/src/main/groovy/openhouse.iceberg-aws-conventions.gradle: -------------------------------------------------------------------------------- 1 | ext { 2 | icebergAwsVersion = '1.2.0' 3 | } 4 | 5 | dependencies { 6 | implementation('org.apache.iceberg:iceberg-aws:' + icebergAwsVersion) 7 | implementation('software.amazon.awssdk:s3:2.20.18') 8 | implementation('software.amazon.awssdk:sts:2.20.18') 9 | implementation('software.amazon.awssdk:url-connection-client:2.20.18') 10 | } -------------------------------------------------------------------------------- /buildSrc/src/main/groovy/openhouse.iceberg-azure-conventions.gradle: -------------------------------------------------------------------------------- 1 | ext { 2 | icebergAzureVersion = '1.5.2' 3 | } 4 | 5 | dependencies { 6 | // Ideally, we have these, but they are only supported for iceberg version >= 1.4.0, which is not compatible 7 | // with the current Openhouse implementation. 8 | // implementation('org.apache.iceberg:iceberg-azure:' + icebergAzureVersion) 9 | // implementation('org.apache.iceberg:iceberg-azure-bundle:' + icebergAzureVersion) 10 | 11 | implementation("com.azure:azure-storage-file-datalake:12.19.1") { 12 | exclude group: 'io.netty' 13 | exclude group: 'io.projectreactor.netty' 14 | } 15 | 16 | implementation("com.azure:azure-identity:1.12.1") { 17 | exclude group: 'io.netty' 18 | exclude group: 'io.projectreactor.netty' 19 | } 20 | 21 | } -------------------------------------------------------------------------------- /buildSrc/src/main/groovy/openhouse.iceberg-conventions-1.2.gradle: -------------------------------------------------------------------------------- 1 | ext { 2 | icebergVersion = '1.2.0' 3 | } 4 | 5 | dependencies { 6 | implementation('org.apache.iceberg:iceberg-bundled-guava:' + icebergVersion + "!!") 7 | implementation('org.apache.iceberg:iceberg-data:' + icebergVersion + "!!") 8 | implementation('org.apache.iceberg:iceberg-core:' + icebergVersion + "!!") 9 | implementation('org.apache.iceberg:iceberg-common:' + icebergVersion + "!!") 10 | implementation('org.testcontainers:testcontainers:1.19.8') 11 | } -------------------------------------------------------------------------------- /buildSrc/src/main/groovy/openhouse.iceberg-conventions-1.5.2.gradle: -------------------------------------------------------------------------------- 1 | ext { 2 | icebergVersion = '1.5.2' 3 | } 4 | 5 | dependencies { 6 | implementation('org.apache.iceberg:iceberg-bundled-guava:' + icebergVersion) 7 | implementation('org.apache.iceberg:iceberg-data:' + icebergVersion) 8 | implementation('org.apache.iceberg:iceberg-core:' + icebergVersion) 9 | implementation('org.apache.iceberg:iceberg-common:' + icebergVersion) 10 | implementation('org.testcontainers:testcontainers:1.19.8') 11 | } -------------------------------------------------------------------------------- /buildSrc/src/main/groovy/openhouse.java-conventions.gradle: -------------------------------------------------------------------------------- 1 | plugins { 2 | id 'openhouse.java-minimal-conventions' 3 | } 4 | 5 | ext { 6 | gsonVersion = '2.8.9' 7 | micrometerVersion = '1.9.13' 8 | } 9 | 10 | dependencies { 11 | implementation 'com.google.code.gson:gson:' + gsonVersion 12 | testImplementation 'com.google.code.gson:gson:' + gsonVersion 13 | 14 | implementation 'io.micrometer:micrometer-core:' + micrometerVersion 15 | implementation 'io.micrometer:micrometer-registry-prometheus:' + micrometerVersion 16 | implementation 'commons-cli:commons-cli:1.5.0' 17 | 18 | } 19 | -------------------------------------------------------------------------------- /buildSrc/src/main/groovy/openhouse.service-specgen-convention.gradle: -------------------------------------------------------------------------------- 1 | plugins { 2 | id 'com.github.johnrengelman.processes' 3 | id 'org.springdoc.openapi-gradle-plugin' 4 | } 5 | 6 | /** 7 | * Customization for `generateOpenApiDocs` task is needed since: 8 | * - Default API doc URL is pointing to v3 at 9000 port number. 9 | * - We need to make depending task aware of the location for the output. 10 | */ 11 | openApi { 12 | outputDir.set(file("$buildDir/specs")) 13 | outputFileName.set("${project.name}.json") 14 | waitTimeInSeconds.set(60) 15 | } 16 | 17 | task openApiOutput { 18 | dependsOn('generateOpenApiDocs') 19 | it.outputs.file(file("$buildDir/specs/${project.name}.json")) 20 | } -------------------------------------------------------------------------------- /buildSrc/src/main/groovy/openhouse.springboot-ext-conventions.gradle: -------------------------------------------------------------------------------- 1 | // plugin for extensible springboot services 2 | plugins { 3 | id 'openhouse.springboot-conventions' 4 | id "org.springframework.boot" 5 | } 6 | 7 | tasks.named("bootJar") { 8 | enabled = true 9 | archiveClassifier = "" 10 | manifest { 11 | // Default springboot app launcher class is JarLauncher which doesn't allow to load classes from external jars 12 | // (jars not packaged within the app fat jar). PropertiesLauncher allows to extend list of paths 13 | // where to load classes from with the loader.path property, or LOADER_PATH env var. 14 | // More info: https://docs.spring.io/spring-boot/docs/current/reference/html/executable-jar.html#appendix.executable-jar.launching. 15 | attributes ('Main-Class': 'org.springframework.boot.loader.PropertiesLauncher') 16 | } 17 | } 18 | 19 | tasks.named("jar") { 20 | enabled = true 21 | archiveClassifier = "lib" 22 | } 23 | -------------------------------------------------------------------------------- /client/hts/build.gradle: -------------------------------------------------------------------------------- 1 | plugins { 2 | id 'openhouse.java-minimal-conventions' 3 | id 'openhouse.client-codegen-convention' 4 | id 'openhouse.maven-publish' 5 | } 6 | 7 | ext { 8 | codeGenForService = ":services:housetables" 9 | } 10 | 11 | apply from: "${project(':client:common').file("codegen.build.gradle")}" 12 | -------------------------------------------------------------------------------- /client/jobsclient/build.gradle: -------------------------------------------------------------------------------- 1 | plugins { 2 | id 'openhouse.java-minimal-conventions' 3 | id 'openhouse.client-codegen-convention' 4 | id 'openhouse.maven-publish' 5 | } 6 | 7 | ext { 8 | codeGenForService = ":services:jobs" 9 | } 10 | 11 | apply from: "${project(':client:common').file("codegen.build.gradle")}" 12 | -------------------------------------------------------------------------------- /client/secureclient/build.gradle: -------------------------------------------------------------------------------- 1 | plugins { 2 | id 'openhouse.java-conventions' 3 | id 'openhouse.maven-publish' 4 | id 'com.github.johnrengelman.processes' version '0.5.0' 5 | } 6 | 7 | 8 | dependencies { 9 | implementation project(':client:tableclient') 10 | implementation project(':client:jobsclient') 11 | implementation project(':client:hts') 12 | implementation 'org.springframework.boot:spring-boot-starter-webflux:2.7.8' 13 | testImplementation 'io.netty:netty-resolver-dns-native-macos:4.1.70.Final:osx-x86_64' 14 | testImplementation 'org.junit.jupiter:junit-jupiter-api:' + junit_version 15 | testRuntimeOnly 'org.junit.jupiter:junit-jupiter-engine:' + junit_version 16 | testImplementation 'org.mockito:mockito-inline:4.11.0' 17 | 18 | } 19 | 20 | test { 21 | useJUnitPlatform() 22 | } 23 | -------------------------------------------------------------------------------- /client/secureclient/src/main/java/com/linkedin/openhouse/client/ssl/HttpConnectionStrategy.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.client.ssl; 2 | 3 | /** Enum to represent connection strategy such as POOLED and NEW connection */ 4 | public enum HttpConnectionStrategy { 5 | POOLED, // represents pooled http connection 6 | NEW; // represents new connection 7 | 8 | public static HttpConnectionStrategy fromString(String value) { 9 | // default to POOLED if null or empty 10 | if (value == null || value.isEmpty()) { 11 | return POOLED; 12 | } 13 | return Enum.valueOf(HttpConnectionStrategy.class, value.toUpperCase()); 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /client/tableclient/build.gradle: -------------------------------------------------------------------------------- 1 | plugins { 2 | id 'openhouse.java-minimal-conventions' 3 | id 'openhouse.client-codegen-convention' 4 | id 'openhouse.maven-publish' 5 | } 6 | 7 | ext { 8 | codeGenForService = ":services:tables" 9 | } 10 | 11 | apply from: "${project(':client:common').file("codegen.build.gradle")}" 12 | -------------------------------------------------------------------------------- /cluster/configs/build.gradle: -------------------------------------------------------------------------------- 1 | plugins { 2 | id 'openhouse.java-conventions' 3 | id 'openhouse.maven-publish' 4 | id 'java-library' 5 | } 6 | 7 | dependencies { 8 | 9 | // Additional dependencies to make objects annotated properly as spring components. 10 | // Other modules depending on this module don't have redeclare the following dependencies. 11 | api 'com.fasterxml.jackson.core:jackson-annotations:2.13.4' 12 | api 'io.swagger.core.v3:swagger-annotations:2.1.11' 13 | api 'jakarta.validation:jakarta.validation-api:2.0.2' 14 | api 'org.springframework:spring-web:5.3.18' 15 | api 'org.springframework:spring-webmvc:5.3.18' 16 | } 17 | -------------------------------------------------------------------------------- /cluster/metrics/build.gradle: -------------------------------------------------------------------------------- 1 | plugins { 2 | id 'openhouse.java-conventions' 3 | id 'openhouse.maven-publish' 4 | } 5 | 6 | dependencies { 7 | implementation project(':cluster:configs') 8 | } -------------------------------------------------------------------------------- /cluster/metrics/src/main/java/com/linkedin/openhouse/cluster/metrics/TagUtils.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.cluster.metrics; 2 | 3 | import com.linkedin.openhouse.cluster.configs.ClusterProperties; 4 | import io.micrometer.core.instrument.Tag; 5 | import java.util.Arrays; 6 | 7 | public final class TagUtils { 8 | 9 | public static final String COMMON_TAG_CLUSTER_NAME = "clusterName"; 10 | public static final String COMMON_TAG_APP_NAME = "application"; 11 | 12 | private TagUtils() {} 13 | 14 | public static Iterable buildCommonTag(ClusterProperties clusterProperties, String appName) { 15 | final Tag[] tags = { 16 | Tag.of(COMMON_TAG_CLUSTER_NAME, clusterProperties.getClusterName()), 17 | Tag.of(COMMON_TAG_APP_NAME, appName) 18 | }; 19 | return () -> Arrays.stream(tags).iterator(); 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /cluster/storage/build.gradle: -------------------------------------------------------------------------------- 1 | plugins { 2 | id 'openhouse.java-conventions' 3 | id 'openhouse.hadoop-conventions' 4 | id 'openhouse.iceberg-aws-conventions' 5 | id 'openhouse.iceberg-azure-conventions' 6 | id 'openhouse.iceberg-conventions-1.5.2' 7 | id 'openhouse.maven-publish' 8 | } 9 | 10 | dependencies { 11 | implementation project(':cluster:configs') 12 | implementation project(':iceberg:azure') 13 | implementation 'org.springframework.boot:spring-boot-autoconfigure:' + spring_web_version 14 | implementation 'javax.annotation:javax.annotation-api:1.3.2' 15 | } 16 | 17 | configurations { 18 | all { 19 | exclude group: 'org.apache.logging.log4j', module: 'log4j-slf4j-impl' 20 | exclude group: 'org.slf4j', module: 'slf4j-log4j12' 21 | } 22 | } -------------------------------------------------------------------------------- /cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/FsStorageUtils.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.cluster.storage; 2 | 3 | import java.io.IOException; 4 | import org.apache.hadoop.fs.FileSystem; 5 | import org.apache.hadoop.fs.Path; 6 | import org.apache.hadoop.fs.permission.FsAction; 7 | import org.apache.hadoop.fs.permission.FsPermission; 8 | 9 | public final class FsStorageUtils { 10 | private FsStorageUtils() { 11 | // Utility class ctor no-op 12 | } 13 | 14 | static final String OPENHOUSE_GROUP_NAME = "openhouse"; 15 | 16 | /** A file permission that grants all to OpenHouse but nothing to other entities. */ 17 | static final FsPermission PERM = new FsPermission(FsAction.NONE, FsAction.ALL, FsAction.NONE); 18 | 19 | public static void securePath(FileSystem fs, Path path) throws IOException { 20 | /* set null means user won't be changed */ 21 | fs.setOwner(path, null, OPENHOUSE_GROUP_NAME); 22 | fs.setPermission(path, PERM); 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/exception/ConfigMismatchException.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.cluster.storage.exception; 2 | 3 | /** 4 | * An exception thrown when there are mismatch between configs or between config and injected beans. 5 | */ 6 | public class ConfigMismatchException extends RuntimeException { 7 | public ConfigMismatchException(String errorMsg, String entityA, String entityB) { 8 | super(String.format("%s, since %s collides with %s", errorMsg, entityA, entityB)); 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/selector/BaseStorageSelector.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.cluster.storage.selector; 2 | 3 | /** An abstract class for all Storage selectors */ 4 | public abstract class BaseStorageSelector implements StorageSelector { 5 | 6 | @Override 7 | public String getName() { 8 | return this.getClass().getSimpleName(); 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /docs/development/ide-setup-for-shadow-jars.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linkedin/openhouse/7e9fa9b3e37d5ada57a70bf46dcd975def8ac47b/docs/development/ide-setup-for-shadow-jars.gif -------------------------------------------------------------------------------- /docs/images/openhouse-controlplane.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linkedin/openhouse/7e9fa9b3e37d5ada57a70bf46dcd975def8ac47b/docs/images/openhouse-controlplane.jpeg -------------------------------------------------------------------------------- /docs/images/openhouse-deployed-architecture.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linkedin/openhouse/7e9fa9b3e37d5ada57a70bf46dcd975def8ac47b/docs/images/openhouse-deployed-architecture.jpeg -------------------------------------------------------------------------------- /docs/images/openhouse-logo.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linkedin/openhouse/7e9fa9b3e37d5ada57a70bf46dcd975def8ac47b/docs/images/openhouse-logo.jpeg -------------------------------------------------------------------------------- /example-workflow-template/main.yaml: -------------------------------------------------------------------------------- 1 | test -------------------------------------------------------------------------------- /gradle.properties: -------------------------------------------------------------------------------- 1 | # Increase daemon memory allocation 2 | org.gradle.jvmargs=-Xmx2g "-XX:MaxMetaspaceSize=512m" 3 | -------------------------------------------------------------------------------- /gradle/checkstyle/suppressions.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /gradle/spotbugs/spotbugsInclude.xml: -------------------------------------------------------------------------------- 1 | 2 | 6 | 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linkedin/openhouse/7e9fa9b3e37d5ada57a70bf46dcd975def8ac47b/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | distributionBase=GRADLE_USER_HOME 2 | distributionPath=wrapper/dists 3 | distributionUrl=https\://services.gradle.org/distributions/gradle-7.6.2-bin.zip 4 | zipStoreBase=GRADLE_USER_HOME 5 | zipStorePath=wrapper/dists 6 | -------------------------------------------------------------------------------- /iceberg/azure/NOTICE: -------------------------------------------------------------------------------- 1 | 2 | Apache Iceberg 3 | Copyright 2017-2022 The Apache Software Foundation 4 | 5 | This product includes software developed at 6 | The Apache Software Foundation (http://www.apache.org/). 7 | 8 | -------------------------------------------------------------------------------- 9 | 10 | This project includes code from Kite, developed at Cloudera, Inc. with 11 | the following copyright notice: 12 | 13 | | Copyright 2013 Cloudera Inc. 14 | | 15 | | Licensed under the Apache License, Version 2.0 (the "License"); 16 | | you may not use this file except in compliance with the License. 17 | | You may obtain a copy of the License at 18 | | 19 | | http://www.apache.org/licenses/LICENSE-2.0 20 | | 21 | | Unless required by applicable law or agreed to in writing, software 22 | | distributed under the License is distributed on an "AS IS" BASIS, 23 | | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 24 | | See the License for the specific language governing permissions and 25 | | limitations under the License. -------------------------------------------------------------------------------- /iceberg/azure/build.gradle: -------------------------------------------------------------------------------- 1 | plugins { 2 | id 'openhouse.java-conventions' 3 | id 'openhouse.iceberg-conventions-1.5.2' 4 | id 'openhouse.iceberg-azure-conventions' 5 | id 'openhouse.maven-publish' 6 | } -------------------------------------------------------------------------------- /iceberg/openhouse/htscatalog/build.gradle: -------------------------------------------------------------------------------- 1 | plugins { 2 | id 'openhouse.springboot-conventions' 3 | id 'openhouse.hadoop-conventions' 4 | id 'openhouse.iceberg-conventions-1.5.2' 5 | id 'openhouse.maven-publish' 6 | } 7 | 8 | -------------------------------------------------------------------------------- /iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/InternalCatalogMetricsConstant.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.internal.catalog; 2 | 3 | public final class InternalCatalogMetricsConstant { 4 | private InternalCatalogMetricsConstant() {} 5 | 6 | static final String METRICS_PREFIX = "catalog"; 7 | 8 | static final String COMMIT_FAILED_CTR = "commit_failed"; 9 | static final String COMMIT_STATE_UNKNOWN = "commit_state_unknown"; 10 | static final String NO_TABLE_WHEN_REFRESH = "table_not_found_during_refresh"; 11 | 12 | static final String MISSING_COMMIT_KEY = "commit_key_missing"; 13 | 14 | static final String SNAPSHOTS_ADDED_CTR = "snapshots_added"; 15 | static final String SNAPSHOTS_STAGED_CTR = "snapshots_staged"; 16 | static final String SNAPSHOTS_CHERRY_PICKED_CTR = "snapshots_cherry_picked"; 17 | static final String SNAPSHOTS_DELETED_CTR = "snapshots_deleted"; 18 | 19 | static final String METADATA_UPDATE_LATENCY = "metadata_update_latency"; 20 | static final String METADATA_RETRIEVAL_LATENCY = "metadata_retrieval_latency"; 21 | } 22 | -------------------------------------------------------------------------------- /iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/exception/InvalidIcebergSnapshotException.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.internal.catalog.exception; 2 | 3 | /** 4 | * The exception thrown when the load-snapshot API detects invalid snapshot provided by clients. 5 | * TODO: Fill in more information in this exception class. 6 | */ 7 | public class InvalidIcebergSnapshotException extends RuntimeException { 8 | 9 | public InvalidIcebergSnapshotException(String errorMsg) { 10 | super(errorMsg); 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/model/HouseTablePrimaryKey.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.internal.catalog.model; 2 | 3 | import java.io.Serializable; 4 | import lombok.AccessLevel; 5 | import lombok.AllArgsConstructor; 6 | import lombok.Builder; 7 | import lombok.EqualsAndHashCode; 8 | import lombok.Getter; 9 | import lombok.NoArgsConstructor; 10 | 11 | @Builder 12 | @EqualsAndHashCode 13 | @Getter 14 | @NoArgsConstructor 15 | @AllArgsConstructor(access = AccessLevel.PROTECTED) 16 | public class HouseTablePrimaryKey implements Serializable { 17 | private String tableId; 18 | 19 | private String databaseId; 20 | } 21 | -------------------------------------------------------------------------------- /iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/repository/CustomRetryListener.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.internal.catalog.repository; 2 | 3 | import org.springframework.retry.RetryCallback; 4 | import org.springframework.retry.RetryContext; 5 | import org.springframework.retry.RetryListener; 6 | 7 | /** Used as an observer in testing the retry behavior. */ 8 | public class CustomRetryListener implements RetryListener { 9 | 10 | private int retryCount = 0; 11 | 12 | @Override 13 | public boolean open(RetryContext context, RetryCallback callback) { 14 | return true; 15 | } 16 | 17 | @Override 18 | public void close( 19 | RetryContext context, RetryCallback callback, Throwable throwable) {} 20 | 21 | @Override 22 | public void onError( 23 | RetryContext context, RetryCallback callback, Throwable throwable) { 24 | retryCount++; 25 | } 26 | 27 | public int getRetryCount() { 28 | return retryCount; 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/repository/HouseTableRepository.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.internal.catalog.repository; 2 | 3 | import com.linkedin.openhouse.internal.catalog.model.HouseTable; 4 | import com.linkedin.openhouse.internal.catalog.model.HouseTablePrimaryKey; 5 | import java.util.List; 6 | import org.springframework.data.repository.CrudRepository; 7 | import org.springframework.stereotype.Repository; 8 | 9 | /** 10 | * Base interface for repository backed by HouseTableService for storing and retrieving {@link 11 | * HouseTable} object. 12 | */ 13 | @Repository 14 | public interface HouseTableRepository extends CrudRepository { 15 | 16 | List findAllByDatabaseId(String databaseId); 17 | 18 | void rename( 19 | String fromDatabaseId, 20 | String fromTableId, 21 | String toDatabaseId, 22 | String toTableId, 23 | String metadataLocation); 24 | } 25 | -------------------------------------------------------------------------------- /iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/repository/HtsRetryUtils.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.internal.catalog.repository; 2 | 3 | import java.util.concurrent.TimeUnit; 4 | import lombok.extern.slf4j.Slf4j; 5 | import org.springframework.retry.backoff.BackOffPolicy; 6 | import org.springframework.retry.backoff.BackOffPolicyBuilder; 7 | 8 | /** Common utilities for retrying service call, e.g. HTS call. */ 9 | @Slf4j 10 | public final class HtsRetryUtils { 11 | private HtsRetryUtils() { 12 | // utilities class private ctor noop 13 | } 14 | 15 | public static final int MAX_RETRY_ATTEMPT = 3; 16 | 17 | public static final BackOffPolicy DEFAULT_HTS_BACKOFF_POLICY = 18 | BackOffPolicyBuilder.newBuilder() 19 | .multiplier(2.0) 20 | .delay(TimeUnit.SECONDS.toMillis(2)) 21 | .maxDelay(TimeUnit.SECONDS.toMillis(30)) 22 | .build(); 23 | } 24 | -------------------------------------------------------------------------------- /iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/repository/exception/HouseTableCallerException.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.internal.catalog.repository.exception; 2 | 3 | public class HouseTableCallerException extends HouseTableRepositoryException { 4 | public HouseTableCallerException(String message, Throwable cause) { 5 | super(message, cause); 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/repository/exception/HouseTableConcurrentUpdateException.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.internal.catalog.repository.exception; 2 | 3 | public class HouseTableConcurrentUpdateException extends HouseTableRepositoryException { 4 | 5 | public HouseTableConcurrentUpdateException(String message, Throwable cause) { 6 | super(message, cause); 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/repository/exception/HouseTableNotFoundException.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.internal.catalog.repository.exception; 2 | 3 | public class HouseTableNotFoundException extends HouseTableRepositoryException { 4 | public HouseTableNotFoundException(String message, Throwable cause) { 5 | super(message, cause); 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/repository/exception/HouseTableRepositoryException.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.internal.catalog.repository.exception; 2 | 3 | public class HouseTableRepositoryException extends RuntimeException { 4 | public HouseTableRepositoryException(String message, Throwable cause) { 5 | super(message, cause); 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/repository/exception/HouseTableRepositoryStateUnknownException.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.internal.catalog.repository.exception; 2 | 3 | /** Exception thrown when HTS returns a 5xx. */ 4 | public class HouseTableRepositoryStateUnknownException extends HouseTableRepositoryException { 5 | public HouseTableRepositoryStateUnknownException(String message, Throwable cause) { 6 | super(message, cause); 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/toggle/IcebergFeatureGate.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.internal.catalog.toggle; 2 | 3 | import java.lang.annotation.ElementType; 4 | import java.lang.annotation.Retention; 5 | import java.lang.annotation.RetentionPolicy; 6 | import java.lang.annotation.Target; 7 | 8 | /** 9 | * Used to gate the feature of the corresponding method where this annotation is applied, on the 10 | * granularity of Iceberg table identified by {@link org.apache.iceberg.catalog.TableIdentifier} 11 | * 12 | *

Note for users: Annotated method should have {@link 13 | * org.apache.iceberg.catalog.TableIdentifier} in method signature. If feature/value is not 14 | * activated, it throws ResourceGatedByToggledOnFeatureException. 15 | */ 16 | @Retention(RetentionPolicy.RUNTIME) 17 | @Target(ElementType.METHOD) 18 | public @interface IcebergFeatureGate { 19 | String value(); 20 | } 21 | -------------------------------------------------------------------------------- /iceberg/openhouse/internalcatalog/src/test/resources/future_serialized_snapshots.json: -------------------------------------------------------------------------------- 1 | [ 2 | "{ \"snapshot-id\" : 4151407017102313399, \"timestamp-ms\" : 2655481960000, \"summary\" : { \"operation\" : \"append\", \"spark.app.id\" : \"local-1669126906634\", \"added-data-files\" : \"1\", \"added-records\" : \"1\", \"added-files-size\" : \"673\", \"changed-partition-count\" : \"1\", \"total-records\" : \"1\", \"total-files-size\" : \"673\", \"total-data-files\" : \"1\", \"total-delete-files\" : \"0\", \"total-position-deletes\" : \"0\", \"total-equality-deletes\" : \"0\" }, \"manifest-list\" : \"/data/openhouse/db/test-7a9e8c95-1a62-4d29-9621-d8784047fc6b/metadata/snap-2151407017102313398-1-aa0dcbb9-707f-4f53-9df8-394bad8563f2.avro\", \"schema-id\" : 0}" 3 | ] -------------------------------------------------------------------------------- /infra/recipes/docker-compose/common/debug-profile/housetables.yml: -------------------------------------------------------------------------------- 1 | services: 2 | openhouse-housetables: 3 | ports: 4 | - 6005:6005 5 | environment: 6 | - JAVA_TOOL_OPTIONS=-agentlib:jdwp=transport=dt_socket,server=y,address=*:6005,suspend=n -------------------------------------------------------------------------------- /infra/recipes/docker-compose/common/debug-profile/jobs.yml: -------------------------------------------------------------------------------- 1 | services: 2 | openhouse-jobs: 3 | ports: 4 | - 6005:6005 5 | environment: 6 | - JAVA_TOOL_OPTIONS=-agentlib:jdwp=transport=dt_socket,server=y,address=*:6005,suspend=n -------------------------------------------------------------------------------- /infra/recipes/docker-compose/common/debug-profile/tables.yml: -------------------------------------------------------------------------------- 1 | services: 2 | openhouse-tables: 3 | ports: 4 | - 6005:6005 5 | environment: 6 | - JAVA_TOOL_OPTIONS=-agentlib:jdwp=transport=dt_socket,server=y,address=*:6005,suspend=n -------------------------------------------------------------------------------- /infra/recipes/docker-compose/common/hdfs-services.yml: -------------------------------------------------------------------------------- 1 | version: "3.3" 2 | services: 3 | namenode: 4 | image: bde2020/hadoop-namenode:1.2.0-hadoop2.8-java8 5 | restart: always 6 | ports: 7 | - 9870:9870 8 | - 9000:9000 9 | environment: 10 | - CLUSTER_NAME=test 11 | env_file: 12 | - hadoop/hadoop.env 13 | 14 | datanode: 15 | image: bde2020/hadoop-datanode:1.2.0-hadoop2.8-java8 16 | restart: always 17 | environment: 18 | SERVICE_PRECONDITION: "namenode:9870" 19 | CORE_CONF_fs_defaultFS: hdfs://namenode:9000 20 | ports: 21 | - 9864:9864 22 | - 9866:9866 23 | env_file: 24 | - hadoop/hadoop.env 25 | -------------------------------------------------------------------------------- /infra/recipes/docker-compose/common/intellij-setup.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linkedin/openhouse/7e9fa9b3e37d5ada57a70bf46dcd975def8ac47b/infra/recipes/docker-compose/common/intellij-setup.png -------------------------------------------------------------------------------- /infra/recipes/docker-compose/common/mysql-services.yml: -------------------------------------------------------------------------------- 1 | version: "3.3" 2 | services: 3 | mysql: 4 | image: mysql:8 5 | ports: 6 | - 3306:3306 7 | environment: 8 | - MYSQL_ROOT_PASSWORD=oh_root_password 9 | - MYSQL_PASSWORD=oh_password 10 | - MYSQL_USER=oh_user 11 | - MYSQL_DATABASE=oh_db 12 | -------------------------------------------------------------------------------- /infra/recipes/docker-compose/common/oauth-services.yml: -------------------------------------------------------------------------------- 1 | version: "3.3" 2 | services: 3 | oauth: 4 | image: quay.io/keycloak/keycloak:23.0.7 5 | ports: 6 | - 8085:8080 7 | environment: 8 | - KEYCLOAK_ADMIN=admin 9 | - KEYCLOAK_ADMIN_PASSWORD=admin 10 | - DB_VENDOR=h2 11 | volumes: 12 | - ./oauth/openhouse-realm.json:/opt/keycloak/data/import/openhouse-realm.json:ro 13 | command: ["start-dev", "--import-realm"] -------------------------------------------------------------------------------- /infra/recipes/docker-compose/common/oauth/fetch_token.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Check if username is provided 4 | if [ -z "$1" ]; then 5 | echo "Error: Username not provided." 6 | echo "Usage: $0 " 7 | exit 1 8 | fi 9 | 10 | KEYCLOAK_URL="http://localhost:8085" 11 | REALM="openhouse" 12 | CLIENT_ID="openhouse-services" 13 | USERNAME="$1" 14 | PASSWORD="$1" 15 | 16 | TOKEN=$(curl -X POST \ 17 | -d "client_id=$CLIENT_ID" \ 18 | -d "username=$USERNAME" \ 19 | -d "password=$PASSWORD" \ 20 | -d "grant_type=password" \ 21 | "$KEYCLOAK_URL/realms/$REALM/protocol/openid-connect/token" | jq -r '.access_token') 22 | 23 | echo "$TOKEN" 24 | -------------------------------------------------------------------------------- /infra/recipes/docker-compose/common/oauth/validate_token.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Check if token is provided 4 | if [ -z "$1" ]; then 5 | echo "Error: Token not provided." 6 | echo "Usage: $0 " 7 | exit 1 8 | fi 9 | 10 | KEYCLOAK_URL="http://localhost:8085" 11 | REALM="openhouse" 12 | TOKEN="$1" 13 | 14 | USERNAME=$(curl -X GET \ 15 | -H "Content-Type: application/x-www-form-urlencoded" \ 16 | -H "Authorization: Bearer $TOKEN" \ 17 | "$KEYCLOAK_URL/realms/$REALM/protocol/openid-connect/userinfo" | jq -r '.preferred_username') 18 | 19 | echo "$USERNAME" -------------------------------------------------------------------------------- /infra/recipes/docker-compose/common/oh-services.yml: -------------------------------------------------------------------------------- 1 | version: "3.3" 2 | services: 3 | openhouse-tables: 4 | build: 5 | context: ../../../.. 6 | dockerfile: tables-service.Dockerfile 7 | ports: 8 | - 8000:8080 9 | openhouse-housetables: 10 | build: 11 | context: ../../../.. 12 | dockerfile: housetables-service.Dockerfile 13 | ports: 14 | - 8001:8080 15 | openhouse-jobs: 16 | build: 17 | context: ../../../.. 18 | dockerfile: jobs-service.Dockerfile 19 | ports: 20 | - 8002:8080 21 | openhouse-jobs-scheduler: 22 | build: 23 | context: ../../../.. 24 | dockerfile: jobs-scheduler.Dockerfile 25 | prometheus: 26 | image: prom/prometheus:v2.21.0 27 | ports: 28 | - 9090:9090 29 | volumes: 30 | - ./prometheus:/etc/prometheus/ 31 | command: --web.enable-lifecycle --config.file=/etc/prometheus/prometheus.yml -------------------------------------------------------------------------------- /infra/recipes/docker-compose/common/opa-services.yml: -------------------------------------------------------------------------------- 1 | version: "3.3" 2 | services: 3 | opa: 4 | image: openpolicyagent/opa:1.0.0 5 | ports: 6 | - "8181:8181" 7 | expose: 8 | - 8181 9 | volumes: 10 | - ./opa/policy.rego:/policy.rego 11 | - ./opa/data.json:/data 12 | command: 13 | - "run" 14 | - "--server" 15 | - "policy.rego" 16 | - "data" 17 | - "--addr" 18 | - "0.0.0.0:8181" -------------------------------------------------------------------------------- /infra/recipes/docker-compose/common/opa/data.json: -------------------------------------------------------------------------------- 1 | { 2 | "user_roles": { 3 | }, 4 | "privileges": { 5 | "TABLE_VIEWER": [ 6 | "GET_TABLE_METADATA" 7 | ], 8 | "ACL_EDITOR": [ 9 | "UPDATE_ACL" 10 | ], 11 | "TABLE_ADMIN": [ 12 | "ALTER_TABLE", 13 | "DELETE_TABLE", 14 | "UPDATE_TABLE_METADATA" 15 | ], 16 | "TABLE_CREATOR": [ 17 | "CREATE_TABLE" 18 | ] 19 | } 20 | } -------------------------------------------------------------------------------- /infra/recipes/docker-compose/common/prometheus/prometheus.yml: -------------------------------------------------------------------------------- 1 | scrape_configs: 2 | - job_name: 'spring-actuator' 3 | metrics_path: '/actuator/prometheus' 4 | scrape_interval: 5s 5 | static_configs: 6 | # making the prometheus scraping from the service exposed in each container's port 7 | - targets: ['local.openhouse-tables:8080', 'local.openhouse-jobs:8080'] -------------------------------------------------------------------------------- /infra/recipes/docker-compose/common/s3-services.yml: -------------------------------------------------------------------------------- 1 | version: "3.3" 2 | services: 3 | minioS3: 4 | image: minio/minio 5 | environment: 6 | - MINIO_ROOT_USER=admin 7 | - MINIO_ROOT_PASSWORD=password 8 | ports: 9 | - 9871:9001 10 | - 9870:9000 # Minio Server will be available at this port 11 | command: [ "server", "/data", "--console-address", ":9001" ] 12 | minioClient: 13 | depends_on: 14 | - minioS3 15 | image: minio/mc # MinIO Client to pre-create bucket 16 | environment: 17 | - AWS_ACCESS_KEY_ID=admin 18 | - AWS_SECRET_ACCESS_KEY=password 19 | - AWS_REGION=us-east-1 20 | entrypoint: > 21 | /bin/sh -c " 22 | until (/usr/bin/mc config host add minio http://minioS3:9000 admin password) do echo '...waiting...' && sleep 1; done; 23 | /usr/bin/mc rm -r --force minio/openhouse-bucket; 24 | /usr/bin/mc mb minio/openhouse-bucket; 25 | /usr/bin/mc policy set public minio/openhouse-bucket; 26 | tail -f /dev/null 27 | " 28 | -------------------------------------------------------------------------------- /infra/recipes/docker-compose/oh-hadoop-spark/cluster.yaml: -------------------------------------------------------------------------------- 1 | cluster: 2 | name: "LocalHadoopCluster" 3 | storage: 4 | type: "hadoop" 5 | uri: "hdfs://namenode:9000/" 6 | root-path: "/data/openhouse" 7 | storages: 8 | default-type: "hdfs" 9 | types: 10 | hdfs: 11 | rootpath: "/data/openhouse" 12 | endpoint: "hdfs://namenode:9000/" 13 | iceberg: 14 | write: 15 | format: 16 | default: "orc" 17 | metadata: 18 | previous-versions-max: 28 19 | delete-after-commit: 20 | enabled: true 21 | housetables: 22 | base-uri: "http://openhouse-housetables:8080" 23 | database: 24 | type: "MYSQL" 25 | url: "jdbc:mysql://mysql:3306/oh_db?allowPublicKeyRetrieval=true&useSSL=false" 26 | security: 27 | token: 28 | interceptor: 29 | classname: "com.linkedin.openhouse.common.security.DummyTokenInterceptor" 30 | tables: 31 | authorization: 32 | enabled: true 33 | opa: 34 | base-uri: "http://opa:8181" 35 | -------------------------------------------------------------------------------- /infra/recipes/docker-compose/oh-hadoop/cluster.yaml: -------------------------------------------------------------------------------- 1 | cluster: 2 | name: "LocalHadoopCluster" 3 | storage: 4 | type: "hadoop" 5 | uri: "hdfs://namenode:9000/" 6 | root-path: "/data/openhouse" 7 | storages: 8 | default-type: "hdfs" 9 | types: 10 | hdfs: 11 | rootpath: "/data/openhouse" 12 | endpoint: "hdfs://namenode:9000/" 13 | iceberg: 14 | write: 15 | format: 16 | default: "orc" 17 | metadata: 18 | previous-versions-max: 28 19 | delete-after-commit: 20 | enabled: true 21 | housetables: 22 | base-uri: "http://openhouse-housetables:8080" 23 | database: 24 | type: "IN_MEMORY" 25 | security: 26 | token: 27 | interceptor: 28 | classname: "com.linkedin.openhouse.common.security.DummyTokenInterceptor" 29 | tables: 30 | authorization: 31 | enabled: true 32 | opa: 33 | base-uri: "http://opa:8181" 34 | -------------------------------------------------------------------------------- /infra/recipes/docker-compose/oh-only/cluster.yaml: -------------------------------------------------------------------------------- 1 | cluster: 2 | name: "LocalFSCluster" 3 | storage: 4 | type: "hadoop" 5 | uri: "file:///" 6 | root-path: "/tmp/openhouse" 7 | iceberg: 8 | write: 9 | format: 10 | default: "orc" 11 | metadata: 12 | previous-versions-max: 28 13 | delete-after-commit: 14 | enabled: true 15 | housetables: 16 | base-uri: "http://openhouse-housetables:8080" 17 | database: 18 | type: "IN_MEMORY" 19 | security: 20 | token: 21 | interceptor: 22 | classname: "com.linkedin.openhouse.common.security.DummyTokenInterceptor" 23 | tables: 24 | authorization: 25 | enabled: true 26 | opa: 27 | base-uri: "http://opa:8181" -------------------------------------------------------------------------------- /infra/recipes/docker-compose/oh-only/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.3" 2 | services: 3 | openhouse-tables: 4 | extends: 5 | file: ../common/oh-services.yml 6 | service: openhouse-tables 7 | volumes: 8 | - ./:/var/config/ 9 | depends_on: 10 | - openhouse-housetables 11 | - prometheus 12 | - opa 13 | 14 | openhouse-housetables: 15 | extends: 16 | file: ../common/oh-services.yml 17 | service: openhouse-housetables 18 | volumes: 19 | - ./:/var/config/ 20 | depends_on: 21 | - prometheus 22 | 23 | prometheus: 24 | extends: 25 | file: ../common/oh-services.yml 26 | service: prometheus 27 | 28 | opa: 29 | container_name: local.opa 30 | extends: 31 | file: ../common/opa-services.yml 32 | service: opa 33 | 34 | oauth: 35 | container_name: local.oauth 36 | extends: 37 | file: ../common/oauth-services.yml 38 | service: oauth -------------------------------------------------------------------------------- /infra/recipes/docker-compose/spark-only/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.3" 2 | services: 3 | spark-master: 4 | container_name: local.spark-master 5 | extends: 6 | file: ../common/spark-services.yml 7 | service: spark-master 8 | 9 | spark-worker-a: 10 | container_name: local.spark-worker-a 11 | extends: 12 | file: ../common/spark-services.yml 13 | service: spark-worker-a 14 | depends_on: 15 | - spark-master -------------------------------------------------------------------------------- /infra/recipes/k8s/config/housetables/minikube/cluster.yaml: -------------------------------------------------------------------------------- 1 | cluster: 2 | name: "MinikubeCluster" 3 | storage: 4 | type: "hadoop" 5 | uri: "file:///" 6 | root-path: "/tmp/minikube_openhouse" 7 | iceberg: 8 | write: 9 | format: 10 | default: "orc" 11 | metadata: 12 | previous-versions-max: 28 13 | delete-after-commit: 14 | enabled: true 15 | housetables: 16 | base-uri: "http://openhouse-housetables-service:8080" 17 | database: 18 | type: "IN_MEMORY" 19 | security: 20 | token: 21 | interceptor: 22 | classname: null 23 | tables: 24 | authorization: 25 | enabled: false -------------------------------------------------------------------------------- /infra/recipes/k8s/config/tables/minikube/cluster.yaml: -------------------------------------------------------------------------------- 1 | cluster: 2 | name: "MinikubeCluster" 3 | storage: 4 | type: "hadoop" 5 | uri: "file:///" 6 | root-path: "/tmp/minikube_openhouse" 7 | iceberg: 8 | write: 9 | format: 10 | default: "orc" 11 | metadata: 12 | previous-versions-max: 28 13 | delete-after-commit: 14 | enabled: true 15 | housetables: 16 | base-uri: "http://openhouse-housetables-service:8080" 17 | database: 18 | type: "IN_MEMORY" 19 | security: 20 | token: 21 | interceptor: 22 | classname: null 23 | tables: 24 | authorization: 25 | enabled: false -------------------------------------------------------------------------------- /infra/recipes/k8s/environments/azure/sandbox/housetables/values.yaml: -------------------------------------------------------------------------------- 1 | namespace: "default" 2 | 3 | openhouseClusterTag: "openhouse" 4 | 5 | clusterPropertiesPath: "config/azure/sandbox/cluster.yaml" 6 | 7 | htsService: 8 | replicaCount: 1 9 | image: 10 | tag: "latest" 11 | pullPolicy: Always 12 | service: 13 | type: ClusterIP 14 | port: 8080 15 | port: 8080 16 | resources: 17 | requests: 18 | cpu: 1 19 | memory: 1G 20 | limits: 21 | cpu: 1 22 | memory: 2G 23 | mysql: 24 | enabled: true 25 | secrets: 26 | HTS_DB_USER: MYSQL_USER 27 | HTS_DB_PASSWORD: MYSQL_PASSWORD 28 | command: "java -Xmx1024M -Xms128M -XX:NativeMemoryTracking=summary -cp housetables.jar -Dloader.main=com.linkedin.openhouse.HouseTablesSpringApplication org.springframework.boot.loader.PropertiesLauncher $APP_NAME.jar $@" 29 | appName: housetables-service 30 | -------------------------------------------------------------------------------- /infra/recipes/k8s/environments/azure/sandbox/tables/values.yaml: -------------------------------------------------------------------------------- 1 | namespace: "default" 2 | 3 | openhouseClusterTag: "openhouse" 4 | 5 | clusterPropertiesPath: "config/azure/sandbox/cluster.yaml" 6 | 7 | tablesService: 8 | replicaCount: 1 9 | image: 10 | tag: "latest" 11 | pullPolicy: Always 12 | service: 13 | type: LoadBalancer # Needed so that the client cluster can connect to the service via an external IP 14 | port: 8080 15 | port: 8080 16 | resources: 17 | requests: 18 | cpu: 1 19 | memory: 1G 20 | limits: 21 | cpu: 1 22 | memory: 2G 23 | command: "java -Xmx1024M -Xms128M -XX:NativeMemoryTracking=summary -cp tables.jar -Dloader.main=com.linkedin.openhouse.tables.TablesSpringApplication org.springframework.boot.loader.PropertiesLauncher $APP_NAME.jar $@" 24 | appName: tables-service 25 | -------------------------------------------------------------------------------- /infra/recipes/k8s/environments/minikube/housetables/values.yaml: -------------------------------------------------------------------------------- 1 | namespace: "default" 2 | 3 | openhouseClusterTag: "openhouse" 4 | 5 | clusterPropertiesPath: "config/minikube/cluster.yaml" 6 | 7 | htsService: 8 | replicaCount: 1 9 | image: 10 | repository: "openhouse-housetables-service" 11 | tag: "latest" 12 | pullPolicy: Never 13 | service: 14 | type: ClusterIP 15 | port: 8080 16 | port: 8080 17 | resources: 18 | requests: 19 | cpu: 2 20 | memory: 1G 21 | limits: 22 | cpu: 2 23 | memory: 2G 24 | mysql: 25 | enabled: false 26 | secrets: 27 | HTS_DB_USER: MYSQL_USER 28 | HTS_DB_PASSWORD: MYSQL_PASSWORD 29 | command: "java -Xmx1024M -Xms128M -XX:NativeMemoryTracking=summary -cp housetables.jar -Dloader.main=com.linkedin.openhouse.HouseTablesSpringApplication org.springframework.boot.loader.PropertiesLauncher $APP_NAME.jar $@" 30 | appName: housetables-service 31 | -------------------------------------------------------------------------------- /infra/recipes/k8s/environments/minikube/tables/values.yaml: -------------------------------------------------------------------------------- 1 | namespace: "default" 2 | 3 | openhouseClusterTag: "openhouse" 4 | 5 | clusterPropertiesPath: "config/minikube/cluster.yaml" 6 | 7 | tablesService: 8 | replicaCount: 1 9 | image: 10 | repository: "openhouse-tables-service" 11 | tag: "latest" 12 | pullPolicy: Never 13 | service: 14 | type: ClusterIP 15 | port: 8080 16 | port: 8080 17 | resources: 18 | requests: 19 | cpu: 2 20 | memory: 1G 21 | limits: 22 | cpu: 2 23 | memory: 2G 24 | command: "java -Xmx1024M -Xms128M -XX:NativeMemoryTracking=summary -cp tables.jar -Dloader.main=com.linkedin.openhouse.tables.TablesSpringApplication org.springframework.boot.loader.PropertiesLauncher $APP_NAME.jar $@" 25 | appName: tables-service -------------------------------------------------------------------------------- /infra/recipes/k8s/helm/housetables/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v2 2 | name: openhouse-housetables-service 3 | description: A Helm chart for deploying openhouse-housetables-services to Kubernetes 4 | version: 0.1.0 5 | appVersion: latest 6 | -------------------------------------------------------------------------------- /infra/recipes/k8s/helm/housetables/config/azure/sandbox/cluster.yaml: -------------------------------------------------------------------------------- 1 | ../../../../tables/config/azure/sandbox/cluster.yaml -------------------------------------------------------------------------------- /infra/recipes/k8s/helm/housetables/config/minikube/cluster.yaml: -------------------------------------------------------------------------------- 1 | cluster: 2 | name: "MinikubeCluster" 3 | storage: 4 | type: "hadoop" 5 | uri: "file:///" 6 | root-path: "/tmp/minikube_openhouse" 7 | iceberg: 8 | write: 9 | format: 10 | default: "orc" 11 | metadata: 12 | previous-versions-max: 28 13 | delete-after-commit: 14 | enabled: true 15 | housetables: 16 | base-uri: "http://openhouse-housetables-service:8080" 17 | database: 18 | type: "IN_MEMORY" 19 | security: 20 | token: 21 | interceptor: 22 | classname: null 23 | tables: 24 | authorization: 25 | enabled: false -------------------------------------------------------------------------------- /infra/recipes/k8s/helm/housetables/templates/hts-configmap.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | namespace: {{ .Values.namespace }} 5 | name: {{ .Release.Name }}-conf 6 | data: 7 | cluster.yaml: |- 8 | {{- if .Values.clusterPropertiesPath }} 9 | {{- $path := .Values.clusterPropertiesPath }} 10 | {{- $content := .Files.Get $path }} 11 | {{- if $content }} 12 | {{ if .Values.housetables.database.url }} 13 | {{ $content | 14 | regexReplaceAllLiteral "" .Values.housetables.database.url | 15 | nindent 4 16 | }} 17 | {{- else }} 18 | {{ $content | nindent 4 }} 19 | {{- end }} 20 | {{- else }} 21 | {{ fail (printf "File at %s not found or empty" $path) }} 22 | {{- end }} 23 | {{- else }} 24 | {{ fail "clusterPropertiesPath is not defined" }} 25 | {{- end }} 26 | -------------------------------------------------------------------------------- /infra/recipes/k8s/helm/housetables/templates/hts-service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: {{ .Release.Name }} 5 | namespace: {{ .Values.namespace }} 6 | labels: 7 | app.kubernetes.io/app: openhouse 8 | app.kubernetes.io/service: {{ .Release.Name }} 9 | app.kubernetes.io/tag: {{ .Values.openhouseClusterTag }} 10 | spec: 11 | type: ClusterIP 12 | ports: 13 | - port: {{ .Values.htsService.port }} 14 | targetPort: http 15 | protocol: TCP 16 | name: http 17 | selector: 18 | app.kubernetes.io/service: {{ .Release.Name }} 19 | app.kubernetes.io/tag: {{ .Values.openhouseClusterTag }} 20 | app.kubernetes.io/instance: {{ .Release.Name }} 21 | -------------------------------------------------------------------------------- /infra/recipes/k8s/helm/tables/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v2 2 | name: openhouse-tables-service 3 | description: A Helm chart for deploying openhouse-tables-services (aka Catalog) to Kubernetes 4 | version: 0.1.0 5 | appVersion: latest -------------------------------------------------------------------------------- /infra/recipes/k8s/helm/tables/config/azure/sandbox/cluster.yaml: -------------------------------------------------------------------------------- 1 | cluster: 2 | name: "AKSCluster" 3 | storage: 4 | type: "hadoop" 5 | uri: "hdfs://namenode:9000/" 6 | root-path: "/data/openhouse" 7 | storages: 8 | default-type: "adls" 9 | types: 10 | adls: 11 | rootpath: @.dfs.core.windows.net 12 | endpoint: "abfs://" 13 | parameters: 14 | adls.auth.shared-key.account.name: 15 | adls.auth.shared-key.account.key: 16 | iceberg: 17 | write: 18 | format: 19 | default: "orc" 20 | metadata: 21 | previous-versions-max: 78 22 | delete-after-commit: 23 | enabled: true 24 | housetables: 25 | base-uri: "http://openhouse-housetables-service:8080" 26 | database: 27 | type: "MYSQL" 28 | url: 29 | security: 30 | token: 31 | interceptor: 32 | classname: null 33 | tables: 34 | authorization: 35 | enabled: false -------------------------------------------------------------------------------- /infra/recipes/k8s/helm/tables/config/minikube/cluster.yaml: -------------------------------------------------------------------------------- 1 | cluster: 2 | name: "MinikubeCluster" 3 | storage: 4 | type: "hadoop" 5 | uri: "file:///" 6 | root-path: "/tmp/minikube_openhouse" 7 | iceberg: 8 | write: 9 | format: 10 | default: "orc" 11 | metadata: 12 | previous-versions-max: 28 13 | delete-after-commit: 14 | enabled: true 15 | housetables: 16 | base-uri: "http://openhouse-housetables-service:8080" 17 | database: 18 | type: "IN_MEMORY" 19 | security: 20 | token: 21 | interceptor: 22 | classname: null 23 | tables: 24 | authorization: 25 | enabled: false -------------------------------------------------------------------------------- /infra/recipes/k8s/helm/tables/templates/tables-service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: {{ .Release.Name }} 5 | namespace: {{ .Values.namespace }} 6 | labels: 7 | app.kubernetes.io/app: openhouse 8 | app.kubernetes.io/service: {{ .Release.Name }} 9 | app.kubernetes.io/tag: {{ .Values.openhouseClusterTag }} 10 | spec: 11 | {{- if eq .Values.tablesService.service.type "LoadBalancer" }} 12 | type: LoadBalancer 13 | {{- else }} 14 | type: ClusterIP 15 | {{- end }} 16 | ports: 17 | - port: {{ .Values.tablesService.port }} 18 | targetPort: http 19 | protocol: TCP 20 | name: http 21 | selector: 22 | app.kubernetes.io/service: {{ .Release.Name }} 23 | app.kubernetes.io/tag: {{ .Values.openhouseClusterTag }} 24 | app.kubernetes.io/instance: {{ .Release.Name }} 25 | -------------------------------------------------------------------------------- /infra/recipes/k8s/templates/housetables/hts-configmap.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | namespace: {{ .Values.namespace }} 5 | name: {{ .Release.Name }}-conf 6 | data: 7 | cluster.yaml: |- 8 | {{- if .Values.clusterPropertiesPath }} 9 | {{- $path := .Values.clusterPropertiesPath }} 10 | {{- $content := .Files.Get $path }} 11 | {{- if $content }} 12 | {{ $content | nindent 4 }} 13 | {{- else }} 14 | {{ fail (printf "File at %s not found or empty" $path) }} 15 | {{- end }} 16 | {{- else }} 17 | {{ fail "clusterPropertiesPath is not defined" }} 18 | {{- end }} 19 | -------------------------------------------------------------------------------- /infra/recipes/k8s/templates/housetables/hts-service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: {{ .Release.Name }} 5 | namespace: {{ .Values.namespace }} 6 | labels: 7 | app.kubernetes.io/app: openhouse 8 | app.kubernetes.io/service: {{ .Release.Name }} 9 | app.kubernetes.io/tag: {{ .Values.openhouseClusterTag }} 10 | spec: 11 | type: ClusterIP 12 | ports: 13 | - port: {{ .Values.htsService.port }} 14 | targetPort: http 15 | protocol: TCP 16 | name: http 17 | selector: 18 | app.kubernetes.io/service: {{ .Release.Name }} 19 | app.kubernetes.io/tag: {{ .Values.openhouseClusterTag }} 20 | app.kubernetes.io/instance: {{ .Release.Name }} 21 | -------------------------------------------------------------------------------- /infra/recipes/k8s/templates/tables/tables-configmap.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | namespace: {{ .Values.namespace }} 5 | name: {{ .Release.Name }}-conf 6 | data: 7 | cluster.yaml: |- 8 | {{- if .Values.clusterPropertiesPath }} 9 | {{- $path := .Values.clusterPropertiesPath }} 10 | {{- $content := .Files.Get $path }} 11 | {{- if $content }} 12 | {{ $content | nindent 4 }} 13 | {{- else }} 14 | {{ fail (printf "File at %s not found or empty" $path) }} 15 | {{- end }} 16 | {{- else }} 17 | {{ fail "clusterPropertiesPath is not defined" }} 18 | {{- end}} -------------------------------------------------------------------------------- /infra/recipes/k8s/templates/tables/tables-service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: {{ .Release.Name }} 5 | namespace: {{ .Values.namespace }} 6 | labels: 7 | app.kubernetes.io/app: openhouse 8 | app.kubernetes.io/service: {{ .Release.Name }} 9 | app.kubernetes.io/tag: {{ .Values.openhouseClusterTag }} 10 | spec: 11 | type: ClusterIP 12 | ports: 13 | - port: {{ .Values.tablesService.port }} 14 | targetPort: http 15 | protocol: TCP 16 | name: http 17 | selector: 18 | app.kubernetes.io/service: {{ .Release.Name }} 19 | app.kubernetes.io/tag: {{ .Values.openhouseClusterTag }} 20 | app.kubernetes.io/instance: {{ .Release.Name }} 21 | -------------------------------------------------------------------------------- /infra/recipes/terraform/azure/backend.tfvars.template: -------------------------------------------------------------------------------- 1 | resource_group_name = "" 2 | storage_account_name = "" 3 | container_name = "" 4 | key = "" -------------------------------------------------------------------------------- /infra/recipes/terraform/azure/environments/container/backend.tf: -------------------------------------------------------------------------------- 1 | # UNCOMMENT FOR AZURE REMOTE BACKEND 2 | # terraform { 3 | # backend "azurerm" {} 4 | # } 5 | -------------------------------------------------------------------------------- /infra/recipes/terraform/azure/environments/container/common_variables.tf: -------------------------------------------------------------------------------- 1 | ../../variables.tf -------------------------------------------------------------------------------- /infra/recipes/terraform/azure/environments/container/main.tf: -------------------------------------------------------------------------------- 1 | /** 2 | This container environment is needed to provision the Azure Container Registry (ACR) 3 | before any other provider is created. For example, the Docker resource needs data from 4 | the ACR resource, so we need a separate environment to ensure ACR is provisioned before 5 | we instantiate the Docker resource. 6 | */ 7 | 8 | resource "random_string" "registry_name" { 9 | length = 6 10 | special = false 11 | lower = true 12 | upper = false 13 | } 14 | 15 | locals { 16 | unique_registry_name = "${var.registry_name}${random_string.registry_name.result}" 17 | } 18 | 19 | resource "azurerm_resource_group" "openhouse_sandbox" { 20 | name = var.resource_group_name 21 | location = var.resource_group_location 22 | } 23 | 24 | module "container" { 25 | unique_registry_name = local.unique_registry_name 26 | source = "../../modules/container" 27 | } 28 | -------------------------------------------------------------------------------- /infra/recipes/terraform/azure/environments/container/outputs.tf: -------------------------------------------------------------------------------- 1 | output "unique_registry_name" { 2 | value = local.unique_registry_name 3 | } 4 | -------------------------------------------------------------------------------- /infra/recipes/terraform/azure/environments/container/variables.tf: -------------------------------------------------------------------------------- 1 | variable "registry_name" { 2 | type = string 3 | description = "The name of the container registry." 4 | default = "openhousecontainerregistry" 5 | } 6 | -------------------------------------------------------------------------------- /infra/recipes/terraform/azure/environments/container/versions.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | required_providers { 3 | azurerm = { 4 | source = "hashicorp/azurerm" 5 | version = "< 3.90.0" 6 | } 7 | } 8 | } 9 | 10 | provider "azurerm" { 11 | features {} 12 | } 13 | -------------------------------------------------------------------------------- /infra/recipes/terraform/azure/environments/sandbox/backend.tf: -------------------------------------------------------------------------------- 1 | # UNCOMMENT FOR AZURE REMOTE BACKEND 2 | # terraform { 3 | # backend "azurerm" {} 4 | # } 5 | -------------------------------------------------------------------------------- /infra/recipes/terraform/azure/environments/sandbox/common_provider.tf: -------------------------------------------------------------------------------- 1 | ../../provider.tf -------------------------------------------------------------------------------- /infra/recipes/terraform/azure/environments/sandbox/common_variables.tf: -------------------------------------------------------------------------------- 1 | ../../variables.tf -------------------------------------------------------------------------------- /infra/recipes/terraform/azure/environments/sandbox/common_versions.tf: -------------------------------------------------------------------------------- 1 | ../../versions.tf -------------------------------------------------------------------------------- /infra/recipes/terraform/azure/environments/sandbox/outputs.tf: -------------------------------------------------------------------------------- 1 | output "resource_group_name" { 2 | value = data.azurerm_resource_group.openhouse_sandbox.name 3 | } 4 | 5 | output "storage_account_name" { 6 | value = local.storage_account_name 7 | } 8 | 9 | output "storage_account_key" { 10 | value = data.azurerm_storage_account.default.primary_access_key 11 | sensitive = true 12 | } 13 | 14 | output "aks_cluster_name" { 15 | value = var.k8s_cluster_name 16 | } 17 | -------------------------------------------------------------------------------- /infra/recipes/terraform/azure/environments/sandbox/scripts/populate-tables.scala: -------------------------------------------------------------------------------- 1 | spark.sql("CREATE TABLE openhouse.testdb.tbA (ts timestamp, col1 string, col2 string) PARTITIONED BY (days(ts))").show() 2 | println("created first table: ") 3 | spark.sql("DESCRIBE TABLE openhouse.testdb.tbA").show() 4 | for (x <- 0 until 100) { 5 | spark.sql(s"INSERT INTO TABLE openhouse.testdb.tbA VALUES (current_timestamp(), 'testval1 $x', 'testval2 $x')") 6 | } 7 | println("inserted into first table") 8 | 9 | spark.sql("CREATE TABLE openhouse.testdb.tbB (ts timestamp, col1 int) PARTITIONED BY (days(ts))").show() 10 | println("created second table: ") 11 | spark.sql("DESCRIBE TABLE openhouse.testdb.tbB").show() 12 | for (x <- 0 until 100) { 13 | spark.sql(s"INSERT INTO TABLE openhouse.testdb.tbB VALUES (current_timestamp(), $x)") 14 | } 15 | println("inserted into second table") -------------------------------------------------------------------------------- /infra/recipes/terraform/azure/environments/sandbox/variables.tf: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /infra/recipes/terraform/azure/k8s_variables.tf: -------------------------------------------------------------------------------- 1 | variable "k8s_client_certificate" { 2 | type = string 3 | description = "The client certificate for AKS." 4 | } 5 | 6 | variable "k8s_client_key" { 7 | type = string 8 | description = "The client key for AKS." 9 | sensitive = true 10 | } 11 | 12 | variable "k8s_cluster_ca_certificate" { 13 | type = string 14 | description = "The cluster certificate for AKS." 15 | } 16 | -------------------------------------------------------------------------------- /infra/recipes/terraform/azure/modules/container/common_variables.tf: -------------------------------------------------------------------------------- 1 | ../../variables.tf -------------------------------------------------------------------------------- /infra/recipes/terraform/azure/modules/container/main.tf: -------------------------------------------------------------------------------- 1 | /** 2 | Defines the Azure container registry (ACR) to hold image builds. 3 | */ 4 | 5 | resource "azurerm_container_registry" "default" { 6 | name = var.unique_registry_name 7 | resource_group_name = var.resource_group_name 8 | location = var.resource_group_location 9 | sku = "Premium" 10 | admin_enabled = true 11 | } 12 | -------------------------------------------------------------------------------- /infra/recipes/terraform/azure/modules/container/outputs.tf: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /infra/recipes/terraform/azure/modules/container/variables.tf: -------------------------------------------------------------------------------- 1 | variable "unique_registry_name" { 2 | type = string 3 | description = "The unique name of the ACR." 4 | default = "openhousecontainerregistry" 5 | } 6 | -------------------------------------------------------------------------------- /infra/recipes/terraform/azure/modules/helm_release/outputs.tf: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /infra/recipes/terraform/azure/modules/image/common_variables.tf: -------------------------------------------------------------------------------- 1 | ../../variables.tf -------------------------------------------------------------------------------- /infra/recipes/terraform/azure/modules/image/common_versions.tf: -------------------------------------------------------------------------------- 1 | ../../versions.tf -------------------------------------------------------------------------------- /infra/recipes/terraform/azure/modules/image/main.tf: -------------------------------------------------------------------------------- 1 | /** 2 | Defines resources for building images for openhouse services such as table and housetables. 3 | */ 4 | locals { 5 | codebase_root = "${abspath(path.module)}/../../../../../.." 6 | } 7 | 8 | resource "docker_image" "housetables" { 9 | name = "${var.registry_login_server}/openhouse-housetables-service:latest" 10 | 11 | build { 12 | context = local.codebase_root 13 | dockerfile = "housetables-service.Dockerfile" 14 | } 15 | } 16 | 17 | resource "docker_registry_image" "hts_registry" { 18 | name = docker_image.housetables.name 19 | 20 | depends_on = [docker_image.housetables] 21 | } 22 | 23 | resource "docker_image" "tables" { 24 | name = "${var.registry_login_server}/openhouse-tables-service:latest" 25 | 26 | build { 27 | context = local.codebase_root 28 | dockerfile = "tables-service.Dockerfile" 29 | } 30 | } 31 | 32 | resource "docker_registry_image" "tables_registry" { 33 | name = docker_image.tables.name 34 | 35 | depends_on = [docker_image.tables] 36 | } 37 | -------------------------------------------------------------------------------- /infra/recipes/terraform/azure/modules/image/outputs.tf: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /infra/recipes/terraform/azure/modules/image/variables.tf: -------------------------------------------------------------------------------- 1 | variable "registry_login_server" { 2 | type = string 3 | description = "The server name for the Azure container registry." 4 | } 5 | -------------------------------------------------------------------------------- /infra/recipes/terraform/azure/modules/k8s/common_variables.tf: -------------------------------------------------------------------------------- 1 | ../../variables.tf -------------------------------------------------------------------------------- /infra/recipes/terraform/azure/modules/k8s/main.tf: -------------------------------------------------------------------------------- 1 | /** 2 | Defines k8s module resources and configuration. These includes AKS cluster configuration, node pool, role assignment etc. 3 | */ 4 | resource "azurerm_kubernetes_cluster" "aks_cluster" { 5 | name = var.k8s_cluster_name 6 | resource_group_name = var.resource_group_name 7 | location = var.resource_group_location 8 | dns_prefix = "sandboxk8s" 9 | 10 | default_node_pool { 11 | name = "default" 12 | min_count = var.min_node_count 13 | max_count = var.max_node_count 14 | enable_auto_scaling = true 15 | vm_size = var.vm_size 16 | } 17 | 18 | identity { 19 | type = "SystemAssigned" 20 | } 21 | } 22 | 23 | # add the role to the identity the kubernetes cluster was assigned 24 | resource "azurerm_role_assignment" "aks_to_acr" { 25 | scope = var.acr_id 26 | role_definition_name = "AcrPull" 27 | principal_id = azurerm_kubernetes_cluster.aks_cluster.kubelet_identity[0].object_id 28 | } 29 | -------------------------------------------------------------------------------- /infra/recipes/terraform/azure/modules/k8s/outputs.tf: -------------------------------------------------------------------------------- 1 | output "host" { 2 | value = azurerm_kubernetes_cluster.aks_cluster.kube_config.0.host 3 | } 4 | 5 | output "client_certificate" { 6 | value = azurerm_kubernetes_cluster.aks_cluster.kube_config.0.client_certificate 7 | } 8 | 9 | output "client_key" { 10 | value = azurerm_kubernetes_cluster.aks_cluster.kube_config.0.client_certificate 11 | sensitive = true 12 | } 13 | 14 | output "cluster_ca_certificate" { 15 | value = azurerm_kubernetes_cluster.aks_cluster.kube_config.0.cluster_ca_certificate 16 | } 17 | -------------------------------------------------------------------------------- /infra/recipes/terraform/azure/modules/k8s/variables.tf: -------------------------------------------------------------------------------- 1 | variable "min_node_count" { 2 | type = number 3 | description = "The minimum number of nodes in the Azure k8s cluster." 4 | } 5 | 6 | variable "max_node_count" { 7 | type = number 8 | description = "The maximum number of nodes in the Azure k8s cluster." 9 | } 10 | 11 | variable "vm_size" { 12 | type = string 13 | description = "The size of the virtual machine for the Azure k8s cluster." 14 | } 15 | 16 | variable "acr_id" { 17 | type = string 18 | description = "The id of the Azure Container Registry that we attach to AKS" 19 | } 20 | -------------------------------------------------------------------------------- /infra/recipes/terraform/azure/modules/mysql/common_variables.tf: -------------------------------------------------------------------------------- 1 | ../../variables.tf -------------------------------------------------------------------------------- /infra/recipes/terraform/azure/modules/mysql/main.tf: -------------------------------------------------------------------------------- 1 | /** 2 | Defines MySQL resources and configurations. 3 | */ 4 | 5 | resource "azurerm_mysql_flexible_server" "mysql_server" { 6 | name = var.server_name 7 | resource_group_name = var.resource_group_name 8 | location = var.resource_group_location 9 | administrator_login = var.db_admin_login 10 | administrator_password = var.db_admin_password 11 | delegated_subnet_id = var.subnet_id 12 | sku_name = var.server_sku 13 | private_dns_zone_id = var.dns_zone_id 14 | } 15 | 16 | resource "azurerm_mysql_flexible_database" "mysql" { 17 | name = var.db_name 18 | resource_group_name = var.resource_group_name 19 | server_name = azurerm_mysql_flexible_server.mysql_server.name 20 | collation = "utf8_unicode_ci" // for sorting 21 | charset = "utf8" 22 | } 23 | -------------------------------------------------------------------------------- /infra/recipes/terraform/azure/modules/mysql/outputs.tf: -------------------------------------------------------------------------------- 1 | output "mysql_server_name" { 2 | value = azurerm_mysql_flexible_server.mysql_server.name 3 | } 4 | -------------------------------------------------------------------------------- /infra/recipes/terraform/azure/modules/mysql/variables.tf: -------------------------------------------------------------------------------- 1 | variable "subnet_id" { 2 | type = string 3 | description = "The id of the MySQL subnet." 4 | } 5 | 6 | variable "server_name" { 7 | type = string 8 | description = "The name for the MySQL server." 9 | } 10 | 11 | variable "db_admin_login" { 12 | type = string 13 | description = "The username for the MySQL server admin." 14 | } 15 | 16 | variable "db_admin_password" { 17 | type = string 18 | description = "The password for the MySQL server admin." 19 | sensitive = true 20 | } 21 | 22 | variable "db_name" { 23 | type = string 24 | description = "The name for the MySQL database." 25 | } 26 | 27 | variable "server_sku" { 28 | type = string 29 | description = "The SKU for the MySQL flexible server." 30 | default = "GP_Standard_D2ds_v4" 31 | } 32 | 33 | variable "dns_zone_id" { 34 | type = string 35 | description = "The id of the private DNS zone within Azure DNS." 36 | } 37 | -------------------------------------------------------------------------------- /infra/recipes/terraform/azure/modules/storage/common_variables.tf: -------------------------------------------------------------------------------- 1 | ../../variables.tf -------------------------------------------------------------------------------- /infra/recipes/terraform/azure/modules/storage/main.tf: -------------------------------------------------------------------------------- 1 | resource "azurerm_storage_account" "storage" { 2 | name = var.storage_account_name 3 | resource_group_name = var.resource_group_name 4 | location = var.resource_group_location 5 | account_tier = "Standard" 6 | account_replication_type = "LRS" 7 | } 8 | 9 | resource "azurerm_storage_container" "storage" { 10 | name = var.container_name 11 | storage_account_name = azurerm_storage_account.storage.name 12 | container_access_type = "blob" 13 | } 14 | -------------------------------------------------------------------------------- /infra/recipes/terraform/azure/modules/storage/outputs.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linkedin/openhouse/7e9fa9b3e37d5ada57a70bf46dcd975def8ac47b/infra/recipes/terraform/azure/modules/storage/outputs.tf -------------------------------------------------------------------------------- /infra/recipes/terraform/azure/modules/storage/variables.tf: -------------------------------------------------------------------------------- 1 | variable "storage_account_name" { 2 | type = string 3 | description = "The name of the Azure storage account. Must be unique." 4 | } 5 | 6 | variable "container_name" { 7 | type = string 8 | description = "The name of the container withing the storage account." 9 | } 10 | -------------------------------------------------------------------------------- /infra/recipes/terraform/azure/modules/vm/common_variables.tf: -------------------------------------------------------------------------------- 1 | ../../variables.tf -------------------------------------------------------------------------------- /infra/recipes/terraform/azure/modules/vm/outputs.tf: -------------------------------------------------------------------------------- 1 | output "virtual_network_name" { 2 | value = azurerm_virtual_network.virtual-network.name 3 | } 4 | 5 | output "subnet_id" { 6 | value = azurerm_subnet.subnet.id 7 | } 8 | 9 | output "dns_zone_id" { 10 | value = azurerm_private_dns_zone.default.id 11 | } 12 | -------------------------------------------------------------------------------- /infra/recipes/terraform/azure/modules/vm/variables.tf: -------------------------------------------------------------------------------- 1 | variable "virtual_network_name" { 2 | type = string 3 | description = "The name of the virtual network." 4 | } 5 | 6 | variable "subnet_name" { 7 | type = string 8 | description = "The name of the Azure subnet." 9 | } 10 | 11 | variable "address_space" { 12 | type = list(string) 13 | description = "The address space for the virtual network." 14 | default = ["10.0.0.0/16"] 15 | } 16 | 17 | variable "address_prefixes" { 18 | type = list(string) 19 | description = "The address prefixes for the virtual machine subnet." 20 | default = ["10.0.1.0/24"] 21 | } 22 | 23 | variable "dns_zone_name" { 24 | type = string 25 | description = "The name of the private DNS zone." 26 | } 27 | 28 | variable "dns_link_name" { 29 | type = string 30 | description = "The name of the DNS zone virtual network link." 31 | } 32 | -------------------------------------------------------------------------------- /infra/recipes/terraform/azure/variables.tf: -------------------------------------------------------------------------------- 1 | variable "resource_group_name" { 2 | type = string 3 | default = "resources" 4 | description = "Name of the resource group." 5 | } 6 | 7 | variable "resource_group_location" { 8 | type = string 9 | default = "westus" 10 | description = "Location of the resource group." 11 | } 12 | 13 | variable "k8s_cluster_name" { 14 | type = string 15 | default = "openhouse_cluster" 16 | description = "The name of the Azure k8s cluster." 17 | } 18 | -------------------------------------------------------------------------------- /infra/recipes/terraform/azure/versions.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | required_providers { 3 | azurerm = { 4 | source = "hashicorp/azurerm" 5 | version = "< 3.90.0" 6 | } 7 | 8 | docker = { 9 | source = "kreuzwerker/docker" 10 | version = "3.0.2" 11 | } 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /integrations/java/iceberg-1.2/openhouse-java-itest/build.gradle: -------------------------------------------------------------------------------- 1 | plugins { 2 | id 'openhouse.java-minimal-conventions' 3 | id 'openhouse.maven-publish' 4 | } 5 | 6 | ext { 7 | icebergVersion = '1.2.0' 8 | } 9 | 10 | dependencies { 11 | testImplementation(project(path: ':integrations:java:iceberg-1.2:openhouse-java-runtime', configuration: 'shadow')) 12 | 13 | testImplementation "com.squareup.okhttp3:okhttp:" + ok_http3_version 14 | testImplementation "com.squareup.okhttp3:mockwebserver:" + ok_http3_version 15 | testImplementation "org.apache.iceberg:iceberg-bundled-guava:" + icebergVersion 16 | } -------------------------------------------------------------------------------- /integrations/java/iceberg-1.2/openhouse-java-runtime/src/main/java/com/linkedin/openhouse/javaclient/audit/OpenHouseReportHandler.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.javaclient.audit; 2 | 3 | /** 4 | * interface to handle Iceberg audit events. mandates handling commitEvent and ScanEvent for Iceberg 5 | * Audit events 6 | */ 7 | public interface OpenHouseReportHandler { 8 | void audit(MetricsReport metricsReport); 9 | } 10 | -------------------------------------------------------------------------------- /integrations/java/iceberg-1.2/openhouse-java-runtime/src/main/java/com/linkedin/openhouse/javaclient/exception/WebClientRequestWithMessageException.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.javaclient.exception; 2 | 3 | import org.springframework.web.reactive.function.client.WebClientRequestException; 4 | 5 | /** 6 | * An exception thrown in Openhouse clients to indicate an error in a tables API request, acting as 7 | * a wrapper around WebClientRequestException. 8 | */ 9 | public class WebClientRequestWithMessageException extends WebClientWithMessageException { 10 | private final WebClientRequestException requestException; 11 | 12 | public WebClientRequestWithMessageException(WebClientRequestException requestException) { 13 | this.requestException = requestException; 14 | } 15 | 16 | @Override 17 | public String getMessage() { 18 | return requestException.getMessage(); 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /integrations/java/iceberg-1.2/openhouse-java-runtime/src/main/java/com/linkedin/openhouse/javaclient/exception/WebClientWithMessageException.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.javaclient.exception; 2 | 3 | /** 4 | * An exception thrown in Openhouse clients to indicate an error in a tables API response or 5 | * request, acting as a wrapper around WebClientException. 6 | */ 7 | public abstract class WebClientWithMessageException extends RuntimeException { 8 | public abstract String getMessage(); 9 | } 10 | -------------------------------------------------------------------------------- /integrations/java/iceberg-1.2/openhouse-java-runtime/src/main/resources/com/linkedin/openhouse/relocated/org/springframework/http/codec/CodecConfigurer.properties: -------------------------------------------------------------------------------- 1 | com.linkedin.openhouse.relocated.org.springframework.http.codec.ClientCodecConfigurer=com.linkedin.openhouse.relocated.org.springframework.http.codec.support.DefaultClientCodecConfigurer 2 | com.linkedin.openhouse.relocated.org.springframework.http.codec.ServerCodecConfigurer=com.linkedin.openhouse.relocated.org.springframework.http.codec.support.DefaultServerCodecConfigurer 3 | -------------------------------------------------------------------------------- /integrations/java/iceberg-1.5/openhouse-java-itest/build.gradle: -------------------------------------------------------------------------------- 1 | plugins { 2 | id 'openhouse.java-minimal-conventions' 3 | id 'openhouse.maven-publish' 4 | } 5 | 6 | ext { 7 | icebergVersion = '1.5.2' 8 | } 9 | 10 | sourceSets { 11 | test { 12 | java { 13 | srcDirs += project(':integrations:java:iceberg-1.2:openhouse-java-itest').sourceSets.test.java.srcDirs 14 | } 15 | } 16 | } 17 | 18 | dependencies { 19 | testImplementation(project(path: ':integrations:java:iceberg-1.5:openhouse-java-iceberg-1.5-runtime', configuration: 'shadow')) 20 | 21 | testImplementation "com.squareup.okhttp3:okhttp:4.9.3" 22 | testImplementation "com.squareup.okhttp3:mockwebserver:4.9.3" 23 | testImplementation "org.apache.iceberg:iceberg-bundled-guava:" + icebergVersion 24 | } -------------------------------------------------------------------------------- /integrations/spark/spark-3.1/openhouse-spark-itest/src/test/java/com/linkedin/openhouse/spark/e2e/ddl/UseCatalogTest.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.spark.e2e.ddl; 2 | 3 | import static com.linkedin.openhouse.spark.SparkTestBase.*; 4 | 5 | import com.linkedin.openhouse.spark.SparkTestBase; 6 | import org.junit.jupiter.api.Assertions; 7 | import org.junit.jupiter.api.Test; 8 | import org.junit.jupiter.api.extension.ExtendWith; 9 | 10 | @ExtendWith(SparkTestBase.class) 11 | public class UseCatalogTest { 12 | 13 | @Test 14 | public void testUseCatalog() { 15 | Assertions.assertDoesNotThrow(() -> spark.sql("USE openhouse").show()); 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /integrations/spark/spark-3.1/openhouse-spark-itest/src/test/resources/dummy.token: -------------------------------------------------------------------------------- 1 | eyJhbGciOiJIUzI1NiJ9.eyJpYXQiOjE2NTk2ODI4MDAsImp0aSI6IkRVTU1ZX0FOT05ZTU9VU19VU0VSZGJjNDk3MTMtMzM5ZC00Y2ZkLTkwMDgtZDY4NzlhZDQwZjE2Iiwic3ViIjoie1wiQ09ERVwiOlwiRFVNTVlfQ09ERVwiLFwiVVNFUi1JRFwiOlwiRFVNTVlfQU5PTllNT1VTX1VTRVJcIn0ifQ.W2WVBrMacFrXS8Xa29k_V_yD0yca2nEet5mSYq27Ayo -------------------------------------------------------------------------------- /integrations/spark/spark-3.1/openhouse-spark-runtime/src/main/scala/com/linkedin/openhouse/spark/extensions/OpenhouseSparkSessionExtensions.scala: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.spark.extensions 2 | 3 | import com.linkedin.openhouse.spark.sql.catalyst.parser.extensions.OpenhouseSparkSqlExtensionsParser 4 | import com.linkedin.openhouse.spark.sql.execution.datasources.v2.OpenhouseDataSourceV2Strategy 5 | import org.apache.spark.sql.SparkSessionExtensions 6 | 7 | class OpenhouseSparkSessionExtensions extends (SparkSessionExtensions => Unit) { 8 | override def apply(extensions: SparkSessionExtensions): Unit = { 9 | extensions.injectParser { case (_, parser) => new OpenhouseSparkSqlExtensionsParser(parser) } 10 | extensions.injectPlannerStrategy( spark => OpenhouseDataSourceV2Strategy(spark)) 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /integrations/spark/spark-3.1/openhouse-spark-runtime/src/main/scala/com/linkedin/openhouse/spark/sql/catalyst/constants/Principal.scala: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.spark.sql.catalyst.constants 2 | 3 | /** 4 | * This object is used to represent keyword global user group "PUBLIC" which maps to the acl policy representation "*" 5 | */ 6 | object Principal { 7 | private val GLOBAL_USER_GROUP = "PUBLIC" 8 | private val GLOBAL_USER_GROUP_ACL = "*" 9 | def apply(principal: String): String = principal toUpperCase() match { 10 | case GLOBAL_USER_GROUP => GLOBAL_USER_GROUP_ACL 11 | case _ => principal 12 | } 13 | 14 | def unapply(principalAcl: String): Option[String] = principalAcl match { 15 | case GLOBAL_USER_GROUP_ACL => Some(GLOBAL_USER_GROUP) 16 | case _ => Some(principalAcl) 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /integrations/spark/spark-3.1/openhouse-spark-runtime/src/main/scala/com/linkedin/openhouse/spark/sql/catalyst/enums/GrantableResourceTypes.scala: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.spark.sql.catalyst.enums 2 | 3 | private[sql] object GrantableResourceTypes extends Enumeration { 4 | type GrantableResourceType = Value 5 | val TABLE, DATABASE = Value 6 | } 7 | -------------------------------------------------------------------------------- /integrations/spark/spark-3.1/openhouse-spark-runtime/src/main/scala/com/linkedin/openhouse/spark/sql/catalyst/plans/logical/GrantRevokeStatement.scala: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.spark.sql.catalyst.plans.logical 2 | 3 | import com.linkedin.openhouse.spark.sql.catalyst.enums.GrantableResourceTypes.GrantableResourceType 4 | import org.apache.spark.sql.catalyst.plans.logical.Command 5 | 6 | case class GrantRevokeStatement(isGrant: Boolean, resourceType: GrantableResourceType, resourceName: Seq[String], privilege: String, principal: String) extends Command { 7 | override def simpleString(maxFields: Int): String = { 8 | s"GrantRevokeStatement: isGrant ${isGrant}, ${resourceType} ${resourceName} ${privilege} ${principal}" 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /integrations/spark/spark-3.1/openhouse-spark-runtime/src/main/scala/com/linkedin/openhouse/spark/sql/catalyst/plans/logical/SetColumnPolicyTag.scala: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.spark.sql.catalyst.plans.logical 2 | 3 | import org.apache.spark.sql.catalyst.plans.logical.Command 4 | 5 | case class SetColumnPolicyTag(tableName: Seq[String], colName: String, policyTags: Seq[String]) extends Command { 6 | override def simpleString(maxFields: Int): String = { 7 | s"SetColumnPolicyTag: ${tableName} ${colName} ${policyTags}" 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /integrations/spark/spark-3.1/openhouse-spark-runtime/src/main/scala/com/linkedin/openhouse/spark/sql/catalyst/plans/logical/SetHistoryPolicy.scala: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.spark.sql.catalyst.plans.logical 2 | 3 | import org.apache.spark.sql.catalyst.plans.logical.Command 4 | 5 | case class SetHistoryPolicy (tableName: Seq[String], granularity: Option[String], maxAge: Int, versions: Int) extends Command { 6 | override def simpleString(maxFields: Int): String = { 7 | s"SetHistoryPolicy: ${tableName} ${if (maxAge > 0) "MAX_AGE=" + maxAge else ""}${granularity.getOrElse("")} ${if (versions > 0) "VERSIONS=" + versions else ""}" 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /integrations/spark/spark-3.1/openhouse-spark-runtime/src/main/scala/com/linkedin/openhouse/spark/sql/catalyst/plans/logical/SetReplicationPolicy.scala: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.spark.sql.catalyst.plans.logical 2 | 3 | import org.apache.spark.sql.catalyst.plans.logical.Command 4 | 5 | case class SetReplicationPolicy(tableName: Seq[String], replicationPolicies: Seq[(String, Option[String])]) extends Command { 6 | override def simpleString(maxFields: Int): String = { 7 | s"SetReplicationPolicy: ${tableName} ${replicationPolicies}" 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /integrations/spark/spark-3.1/openhouse-spark-runtime/src/main/scala/com/linkedin/openhouse/spark/sql/catalyst/plans/logical/SetRetentionPolicy.scala: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.spark.sql.catalyst.plans.logical 2 | 3 | import org.apache.spark.sql.catalyst.plans.logical.Command 4 | 5 | case class SetRetentionPolicy(tableName: Seq[String], granularity: String, count: Int, colName: Option[String], colPattern: Option[String]) extends Command { 6 | override def simpleString(maxFields: Int): String = { 7 | s"SetRetentionPolicy: ${tableName} ${count} ${granularity} ${colName.getOrElse("")} ${colPattern.getOrElse("")}" 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /integrations/spark/spark-3.1/openhouse-spark-runtime/src/main/scala/com/linkedin/openhouse/spark/sql/catalyst/plans/logical/SetSharingPolicy.scala: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.spark.sql.catalyst.plans.logical 2 | 3 | import org.apache.spark.sql.catalyst.plans.logical.Command 4 | 5 | case class SetSharingPolicy(tableName: Seq[String], sharing: String) extends Command { 6 | override def simpleString(maxFields: Int): String = { 7 | s"SetSharingPolicy: ${tableName} ${sharing}" 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /integrations/spark/spark-3.1/openhouse-spark-runtime/src/main/scala/com/linkedin/openhouse/spark/sql/catalyst/plans/logical/ShowGrantsStatement.scala: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.spark.sql.catalyst.plans.logical 2 | 3 | import com.linkedin.openhouse.spark.sql.catalyst.enums.GrantableResourceTypes.GrantableResourceType 4 | import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} 5 | import org.apache.spark.sql.catalyst.plans.logical.Command 6 | import org.apache.spark.sql.types.StringType 7 | 8 | 9 | case class ShowGrantsStatement(resourceType: GrantableResourceType, resourceName: Seq[String]) extends Command { 10 | 11 | override lazy val output: Seq[Attribute] = Seq( 12 | AttributeReference("privilege", StringType, nullable = false)(), 13 | AttributeReference("principal", StringType, nullable = false)() 14 | ) 15 | override def simpleString(maxFields: Int): String = { 16 | s"ShowGrantsStatement: ${resourceType} ${resourceName}" 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /integrations/spark/spark-3.1/openhouse-spark-runtime/src/main/scala/com/linkedin/openhouse/spark/sql/catalyst/plans/logical/UnSetReplicationPolicy.scala: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.spark.sql.catalyst.plans.logical 2 | 3 | import org.apache.spark.sql.catalyst.plans.logical.Command 4 | 5 | case class UnSetReplicationPolicy(tableName: Seq[String], replicationPolicies: String) extends Command { 6 | override def simpleString(maxFields: Int): String = { 7 | s"UnSetReplicationPolicy: ${tableName}" 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /integrations/spark/spark-3.5/openhouse-spark-itest/src/test/resources/dummy.token: -------------------------------------------------------------------------------- 1 | eyJhbGciOiJIUzI1NiJ9.eyJpYXQiOjE2NTk2ODI4MDAsImp0aSI6IkRVTU1ZX0FOT05ZTU9VU19VU0VSZGJjNDk3MTMtMzM5ZC00Y2ZkLTkwMDgtZDY4NzlhZDQwZjE2Iiwic3ViIjoie1wiQ09ERVwiOlwiRFVNTVlfQ09ERVwiLFwiVVNFUi1JRFwiOlwiRFVNTVlfQU5PTllNT1VTX1VTRVJcIn0ifQ.W2WVBrMacFrXS8Xa29k_V_yD0yca2nEet5mSYq27Ayo -------------------------------------------------------------------------------- /integrations/spark/spark-3.5/openhouse-spark-runtime/src/main/scala/com/linkedin/openhouse/spark/sql/catalyst/plans/logical/GrantRevokeStatement.scala: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.spark.sql.catalyst.plans.logical 2 | 3 | import com.linkedin.openhouse.spark.sql.catalyst.enums.GrantableResourceTypes.GrantableResourceType 4 | import org.apache.spark.sql.catalyst.plans.logical.LeafCommand 5 | 6 | case class GrantRevokeStatement(isGrant: Boolean, resourceType: GrantableResourceType, resourceName: Seq[String], privilege: String, principal: String) extends LeafCommand { 7 | override def simpleString(maxFields: Int): String = { 8 | s"GrantRevokeStatement: isGrant ${isGrant}, ${resourceType} ${resourceName} ${privilege} ${principal}" 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /integrations/spark/spark-3.5/openhouse-spark-runtime/src/main/scala/com/linkedin/openhouse/spark/sql/catalyst/plans/logical/SetColumnPolicyTag.scala: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.spark.sql.catalyst.plans.logical 2 | 3 | import org.apache.spark.sql.catalyst.plans.logical.LeafCommand 4 | 5 | case class SetColumnPolicyTag(tableName: Seq[String], colName: String, policyTags: Seq[String]) extends LeafCommand { 6 | override def simpleString(maxFields: Int): String = { 7 | s"SetColumnPolicyTag: ${tableName} ${colName} ${policyTags}" 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /integrations/spark/spark-3.5/openhouse-spark-runtime/src/main/scala/com/linkedin/openhouse/spark/sql/catalyst/plans/logical/SetHistoryPolicy.scala: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.spark.sql.catalyst.plans.logical 2 | 3 | import org.apache.spark.sql.catalyst.plans.logical.LeafCommand 4 | 5 | case class SetHistoryPolicy(tableName: Seq[String], granularity: Option[String], maxAge: Int, versions: Int) extends LeafCommand { 6 | override def simpleString(maxFields: Int): String = { 7 | s"SetHistoryPolicy: ${tableName} ${if (maxAge > 0) "MAX_AGE=" + maxAge else ""}${granularity.getOrElse("")} ${if (versions > 0) "VERSIONS=" + versions else ""}" 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /integrations/spark/spark-3.5/openhouse-spark-runtime/src/main/scala/com/linkedin/openhouse/spark/sql/catalyst/plans/logical/SetReplicationPolicy.scala: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.spark.sql.catalyst.plans.logical 2 | 3 | import org.apache.spark.sql.catalyst.plans.logical.LeafCommand 4 | 5 | case class SetReplicationPolicy(tableName: Seq[String], replicationPolicies: Seq[(String, Option[String])]) extends LeafCommand { 6 | override def simpleString(maxFields: Int): String = { 7 | s"SetReplicationPolicy: ${tableName} ${replicationPolicies}" 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /integrations/spark/spark-3.5/openhouse-spark-runtime/src/main/scala/com/linkedin/openhouse/spark/sql/catalyst/plans/logical/SetRetentionPolicy.scala: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.spark.sql.catalyst.plans.logical 2 | 3 | import org.apache.spark.sql.catalyst.plans.logical.LeafCommand 4 | 5 | case class SetRetentionPolicy(tableName: Seq[String], granularity: String, count: Int, colName: Option[String], colPattern: Option[String]) extends LeafCommand { 6 | override def simpleString(maxFields: Int): String = { 7 | s"SetRetentionPolicy: ${tableName} ${count} ${granularity} ${colName.getOrElse("")} ${colPattern.getOrElse("")}" 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /integrations/spark/spark-3.5/openhouse-spark-runtime/src/main/scala/com/linkedin/openhouse/spark/sql/catalyst/plans/logical/SetSharingPolicy.scala: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.spark.sql.catalyst.plans.logical 2 | 3 | import org.apache.spark.sql.catalyst.plans.logical.LeafCommand 4 | 5 | case class SetSharingPolicy(tableName: Seq[String], sharing: String) extends LeafCommand { 6 | override def simpleString(maxFields: Int): String = { 7 | s"SetSharingPolicy: ${tableName} ${sharing}" 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /integrations/spark/spark-3.5/openhouse-spark-runtime/src/main/scala/com/linkedin/openhouse/spark/sql/catalyst/plans/logical/ShowGrantsStatement.scala: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.spark.sql.catalyst.plans.logical 2 | 3 | import com.linkedin.openhouse.spark.sql.catalyst.enums.GrantableResourceTypes.GrantableResourceType 4 | import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} 5 | import org.apache.spark.sql.catalyst.plans.logical.LeafCommand 6 | import org.apache.spark.sql.types.StringType 7 | 8 | 9 | case class ShowGrantsStatement(resourceType: GrantableResourceType, resourceName: Seq[String]) extends LeafCommand { 10 | 11 | override lazy val output: Seq[Attribute] = Seq( 12 | AttributeReference("privilege", StringType, nullable = false)(), 13 | AttributeReference("principal", StringType, nullable = false)() 14 | ) 15 | override def simpleString(maxFields: Int): String = { 16 | s"ShowGrantsStatement: ${resourceType} ${resourceName}" 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /integrations/spark/spark-3.5/openhouse-spark-runtime/src/main/scala/com/linkedin/openhouse/spark/sql/catalyst/plans/logical/UnSetReplicationPolicy.scala: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.spark.sql.catalyst.plans.logical 2 | 3 | import org.apache.spark.sql.catalyst.plans.logical.LeafCommand 4 | 5 | case class UnSetReplicationPolicy(tableName: Seq[String], replicationPolicies: String) extends LeafCommand { 6 | override def simpleString(maxFields: Int): String = { 7 | s"UnSetReplicationPolicy: ${tableName}" 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /libs/datalayout/src/main/java/com/linkedin/openhouse/datalayout/datasource/DataSource.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.datalayout.datasource; 2 | 3 | import java.util.function.Supplier; 4 | import org.apache.spark.sql.Dataset; 5 | 6 | /** 7 | * Interface for a data source that provides a {@link Dataset}. Used to plug-in table file and query 8 | * statistics to generate data layout strategies. 9 | */ 10 | public interface DataSource extends Supplier> {} 11 | -------------------------------------------------------------------------------- /libs/datalayout/src/main/java/com/linkedin/openhouse/datalayout/datasource/FileStat.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.datalayout.datasource; 2 | 3 | import java.util.List; 4 | import lombok.AllArgsConstructor; 5 | import lombok.Builder; 6 | import lombok.Data; 7 | import lombok.NoArgsConstructor; 8 | import org.apache.iceberg.FileContent; 9 | 10 | /** Represents the statistics of a file. */ 11 | @Data 12 | @Builder 13 | @NoArgsConstructor 14 | @AllArgsConstructor 15 | public final class FileStat { 16 | private FileContent content; 17 | private String path; 18 | private long sizeInBytes; 19 | private long recordCount; 20 | private List partitionValues; 21 | } 22 | -------------------------------------------------------------------------------- /libs/datalayout/src/main/java/com/linkedin/openhouse/datalayout/datasource/PartitionStat.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.datalayout.datasource; 2 | 3 | import java.util.List; 4 | import lombok.AllArgsConstructor; 5 | import lombok.Builder; 6 | import lombok.Data; 7 | import lombok.NoArgsConstructor; 8 | 9 | /** Represents table partition stats. */ 10 | @Data 11 | @Builder 12 | @NoArgsConstructor 13 | @AllArgsConstructor 14 | public class PartitionStat { 15 | // list of transformed values for a given table partition 16 | // e.g. if table is partitioned by (datepartition: string, state: string) 17 | // the list could be ["2024-01-01", "CA"] 18 | private List values; 19 | private int fileCount; 20 | } 21 | -------------------------------------------------------------------------------- /libs/datalayout/src/main/java/com/linkedin/openhouse/datalayout/datasource/SnapshotStat.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.datalayout.datasource; 2 | 3 | import lombok.AllArgsConstructor; 4 | import lombok.Builder; 5 | import lombok.Data; 6 | import lombok.NoArgsConstructor; 7 | 8 | @Data 9 | @Builder 10 | @NoArgsConstructor 11 | @AllArgsConstructor 12 | public class SnapshotStat { 13 | long committedAt; 14 | long snapshotId; 15 | String operation; 16 | } 17 | -------------------------------------------------------------------------------- /libs/datalayout/src/main/java/com/linkedin/openhouse/datalayout/generator/DataLayoutStrategyGenerator.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.datalayout.generator; 2 | 3 | import com.linkedin.openhouse.datalayout.strategy.DataLayoutStrategy; 4 | import java.util.List; 5 | 6 | public interface DataLayoutStrategyGenerator { 7 | List generate(); 8 | 9 | List generateTableLevelStrategies(); 10 | 11 | List generatePartitionLevelStrategies(); 12 | } 13 | -------------------------------------------------------------------------------- /libs/datalayout/src/main/java/com/linkedin/openhouse/datalayout/persistence/StrategiesDao.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.datalayout.persistence; 2 | 3 | import com.linkedin.openhouse.datalayout.strategy.DataLayoutStrategy; 4 | import java.util.List; 5 | 6 | /** DAO interface for persisting and loading data layout optimization strategies. */ 7 | public interface StrategiesDao { 8 | void save(String fqtn, List strategies); 9 | 10 | void savePartitionScope(String fqtn, List strategies); 11 | 12 | List load(String fqtn); 13 | 14 | List loadPartitionScope(String fqtn); 15 | 16 | void delete(String fqtn); 17 | 18 | void deletePartitionScope(String fqtn); 19 | } 20 | -------------------------------------------------------------------------------- /libs/datalayout/src/main/java/com/linkedin/openhouse/datalayout/ranker/DataLayoutCandidateSelector.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.datalayout.ranker; 2 | 3 | import com.linkedin.openhouse.datalayout.strategy.ScoredDataLayoutStrategy; 4 | import java.util.List; 5 | 6 | public interface DataLayoutCandidateSelector { 7 | 8 | /** 9 | * Pick the candidates to perform data layout optimizations based on the scores. 10 | * 11 | * @param dataLayoutStrategies all data layout strategies with scores computed. 12 | * @return index of the selected data layout strategies. 13 | */ 14 | List select(List dataLayoutStrategies); 15 | } 16 | -------------------------------------------------------------------------------- /libs/datalayout/src/main/java/com/linkedin/openhouse/datalayout/ranker/DataLayoutStrategyScorer.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.datalayout.ranker; 2 | 3 | import com.linkedin.openhouse.datalayout.strategy.DataLayoutStrategy; 4 | import com.linkedin.openhouse.datalayout.strategy.ScoredDataLayoutStrategy; 5 | import java.util.List; 6 | 7 | /** Interface for scoring data layout strategies. */ 8 | public interface DataLayoutStrategyScorer { 9 | /** 10 | * Compute scores for the data layout strategies based on the input data. 11 | * 12 | * @param dataLayoutStrategies the data layout strategies to score 13 | * @return the data layout strategies w/ computed scores 14 | */ 15 | List scoreDataLayoutStrategies( 16 | List dataLayoutStrategies); 17 | } 18 | -------------------------------------------------------------------------------- /libs/datalayout/src/main/java/com/linkedin/openhouse/datalayout/strategy/ScoredDataLayoutStrategy.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.datalayout.strategy; 2 | 3 | import lombok.Builder; 4 | import lombok.Data; 5 | 6 | /** Data layout strategy with a score, and normalized units for traits. */ 7 | @Data 8 | @Builder 9 | public class ScoredDataLayoutStrategy { 10 | private final DataLayoutStrategy dataLayoutStrategy; 11 | private final double score; 12 | private final double normalizedComputeCost; 13 | private final double normalizedFileCountReduction; 14 | } 15 | -------------------------------------------------------------------------------- /run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | echo "Command $@" 4 | 5 | if [[ "$#" -lt 1 ]]; then 6 | echo "Script accepts at least Executable Jar" 7 | exit 1 8 | fi 9 | 10 | echo "Executing Jar $1 at " 11 | date 12 | 13 | # Using -XX:NativeMemoryTracking=summary for quick idea on java process memory breakdown, one could switch to 14 | # "detail" for further details 15 | 16 | java -Xmx256M -Xms64M -XX:NativeMemoryTracking=summary -jar "$@" 17 | -------------------------------------------------------------------------------- /scripts/git-hooks/pre-commit: -------------------------------------------------------------------------------- 1 | # Referenced from https://gist.github.com/chadmaughan/5889802 2 | # stash any unstaged changes 3 | 4 | STASH_MESSAGE=GitHookUnstagedStash`date +%s` 5 | echo "Stashing uncommitted changes with message ${STASH_MESSAGE}" 6 | git stash -q --keep-index -m $STASH_MESSAGE 7 | 8 | # run the spotless format checker 9 | echo "Running Spotless format check" 10 | ./gradlew spotlessCheck -q --daemon 11 | 12 | # store the last exit code in a variable 13 | RESULT=$? 14 | if [[ $RESULT -ne 0 ]]; then 15 | echo "Run ./gradlew spotlessApply to fix formatting" 16 | fi 17 | 18 | # unstash the stashed changes if exist 19 | echo "Unstashing changes with message ${STASH_MESSAGE}" 20 | git stash list | grep $STASH_MESSAGE && git stash pop -q 21 | 22 | # return the './gradlew spotlessCheck' exit code 23 | exit $RESULT -------------------------------------------------------------------------------- /scripts/git-hooks/pre-push: -------------------------------------------------------------------------------- 1 | # Referenced from https://gist.github.com/chadmaughan/5889802 2 | # stash any unstaged changes 3 | 4 | STASH_MESSAGE=GitHookUnstagedStash`date +%s` 5 | echo "Stashing uncommitted changes with message ${STASH_MESSAGE}" 6 | git stash -q --keep-index -m $STASH_MESSAGE 7 | 8 | # run the spotless format checker 9 | echo "Running Spotless format check" 10 | ./gradlew spotlessCheck -q --daemon 11 | 12 | # store the last exit code in a variable 13 | RESULT=$? 14 | if [[ $RESULT -ne 0 ]]; then 15 | echo "Run ./gradlew spotlessApply to fix formatting" 16 | fi 17 | 18 | # unstash the stashed changes if exist 19 | echo "Unstashing changes with message ${STASH_MESSAGE}" 20 | git stash list | grep $STASH_MESSAGE && git stash pop -q 21 | 22 | # return the './gradlew spotlessCheck' exit code 23 | exit $RESULT -------------------------------------------------------------------------------- /scripts/python/README.md: -------------------------------------------------------------------------------- 1 | ##Setup virtual environment 2 | Run `python3 -m venv env` if environment doesn't exist. 3 | ##Activate environment 4 | `source env/bin/activate` 5 | ##Install dependencies 6 | `pip install -r requirements.txt` 7 | 8 | ## Adding new dependencies 9 | `pip install ` 10 | `pip freeze > requirements.txt` 11 | -------------------------------------------------------------------------------- /scripts/python/requirements.txt: -------------------------------------------------------------------------------- 1 | certifi==2022.6.15 2 | charset-normalizer==2.1.1 3 | idna==3.3 4 | requests==2.28.1 5 | urllib3==1.26.12 6 | -------------------------------------------------------------------------------- /services/common/src/main/java/com/linkedin/openhouse/common/JobState.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.common; 2 | 3 | /** 4 | * Class to represent a job state. This is decoupled from engine specific state, and there is a 5 | * mapping being done for each engine. 6 | */ 7 | public enum JobState { 8 | QUEUED, 9 | RUNNING, 10 | CANCELLED, 11 | FAILED, 12 | SUCCEEDED; 13 | 14 | public boolean isTerminal() { 15 | return this.equals(SUCCEEDED) || this.equals(FAILED) || this.equals(CANCELLED); 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /services/common/src/main/java/com/linkedin/openhouse/common/api/spec/ApiRequest.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.common.api.spec; 2 | 3 | import lombok.Builder; 4 | import lombok.Value; 5 | import org.springframework.http.HttpHeaders; 6 | 7 | /** 8 | * POJO for ApiRequest containing requestBody JSON, and HTTP headers. 9 | * 10 | * @param Class Type of the RequestBody 11 | */ 12 | @Builder 13 | @Value 14 | public class ApiRequest { 15 | 16 | /** Request Body JSON as sent *by* the client as part of Request payload. */ 17 | T requestBody; 18 | 19 | /** HTTP Headers as sent by the client as part of headers. */ 20 | HttpHeaders httpHeaders; 21 | } 22 | -------------------------------------------------------------------------------- /services/common/src/main/java/com/linkedin/openhouse/common/api/spec/ApiResponse.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.common.api.spec; 2 | 3 | import lombok.Builder; 4 | import lombok.Value; 5 | import org.springframework.http.HttpHeaders; 6 | import org.springframework.http.HttpStatus; 7 | 8 | /** 9 | * POJO for ApiResponse containing HTTP status, responseBody JSON, and HTTP headers. This POJO is 10 | * sufficient for the REST layer to forward the response back to the client without having to 11 | * transform further. 12 | * 13 | * @param Class Type of the ResponseBody 14 | */ 15 | @Builder 16 | @Value 17 | public class ApiResponse { 18 | /** HTTP Status to returned to the Client. */ 19 | HttpStatus httpStatus; 20 | 21 | /** Response Body JSON as sent *to* the client as part of Response payload */ 22 | T responseBody; 23 | 24 | /** HTTP Headers as sent *to* the client as part of headers. */ 25 | HttpHeaders httpHeaders; 26 | } 27 | -------------------------------------------------------------------------------- /services/common/src/main/java/com/linkedin/openhouse/common/audit/AuditHandler.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.common.audit; 2 | 3 | /** Handler interface injected in the audit aspect to send an event. */ 4 | public interface AuditHandler { 5 | /** 6 | * Logic to display or send the event given an audit event object. 7 | * 8 | * @param event 9 | */ 10 | void audit(BaseAuditEvent event); 11 | } 12 | -------------------------------------------------------------------------------- /services/common/src/main/java/com/linkedin/openhouse/common/audit/DummyServiceAuditHandler.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.common.audit; 2 | 3 | import com.linkedin.openhouse.common.audit.model.ServiceAuditEvent; 4 | import lombok.extern.slf4j.Slf4j; 5 | import org.springframework.stereotype.Component; 6 | 7 | /** A dummy service audit handler which is used in only unit-tests and local docker-environments. */ 8 | @Slf4j 9 | @Component 10 | public class DummyServiceAuditHandler implements AuditHandler { 11 | @Override 12 | public void audit(ServiceAuditEvent event) { 13 | log.info("Service audit event: \n" + event.toJson()); 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /services/common/src/main/java/com/linkedin/openhouse/common/audit/model/ServiceName.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.common.audit.model; 2 | 3 | /** The service that handles the REST request */ 4 | public enum ServiceName { 5 | TABLES_SERVICE("tables_service"), 6 | JOBS_SERVICE("jobs_service"), 7 | HOUSETABLES_SERVICE("housetables_service"), 8 | UNRECOGNIZED("unrecognized"); 9 | 10 | private final String serviceName; 11 | 12 | ServiceName(String serviceName) { 13 | this.serviceName = serviceName; 14 | } 15 | 16 | public String getServiceName() { 17 | return serviceName; 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /services/common/src/main/java/com/linkedin/openhouse/common/config/BaseApplicationConfig.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.common.config; 2 | 3 | import com.linkedin.openhouse.cluster.configs.ClusterProperties; 4 | import org.springframework.beans.factory.annotation.Autowired; 5 | 6 | public class BaseApplicationConfig { 7 | @Autowired protected ClusterProperties clusterProperties; 8 | } 9 | -------------------------------------------------------------------------------- /services/common/src/main/java/com/linkedin/openhouse/common/exception/EntityConcurrentModificationException.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.common.exception; 2 | 3 | import lombok.Getter; 4 | 5 | /** An exception thrown to surface concurrent modification for the user table. */ 6 | @Getter 7 | public class EntityConcurrentModificationException extends RuntimeException { 8 | private String entityId; 9 | 10 | private Throwable cause; 11 | 12 | public EntityConcurrentModificationException(String entityId, Throwable cause) { 13 | this(entityId, "", cause); 14 | } 15 | 16 | public EntityConcurrentModificationException(String entityId, String message, Throwable cause) { 17 | super(message); 18 | this.entityId = entityId; 19 | this.cause = cause; 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /services/common/src/main/java/com/linkedin/openhouse/common/exception/JobEngineException.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.common.exception; 2 | 3 | /** Exception indicating failure in the jobs execution engine, e.g Livy endpoint */ 4 | public class JobEngineException extends RuntimeException { 5 | public JobEngineException(String message, Throwable cause) { 6 | super(message, cause); 7 | } 8 | 9 | public JobEngineException(String message) { 10 | super(message); 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /services/common/src/main/java/com/linkedin/openhouse/common/exception/JobStateConflictException.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.common.exception; 2 | 3 | /** 4 | * An exception indicating a conflict in Job state, e.g. job cancel request applied to a completed 5 | * job. 6 | */ 7 | public class JobStateConflictException extends RuntimeException { 8 | public JobStateConflictException(String currentState, String newState) { 9 | super(String.format("Cannot change state from %s to %s", currentState, newState)); 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /services/common/src/main/java/com/linkedin/openhouse/common/exception/NoSuchJobException.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.common.exception; 2 | 3 | import java.util.NoSuchElementException; 4 | import lombok.Getter; 5 | 6 | /** Exception indicating Job is not found. */ 7 | @Getter 8 | public class NoSuchJobException extends NoSuchElementException { 9 | private String jobId; 10 | private final Throwable cause; 11 | private static final String ERROR_MSG_TEMPLATE = "Job $jid cannot be found"; 12 | 13 | public NoSuchJobException(String jobId) { 14 | this(jobId, ERROR_MSG_TEMPLATE.replace("$jid", jobId), null); 15 | } 16 | 17 | public NoSuchJobException(String jobId, Throwable cause) { 18 | this(jobId, ERROR_MSG_TEMPLATE.replace("$jid", jobId), cause); 19 | } 20 | 21 | public NoSuchJobException(String jobId, String errorMsg, Throwable cause) { 22 | super(errorMsg); 23 | this.jobId = jobId; 24 | this.cause = cause; 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /services/common/src/main/java/com/linkedin/openhouse/common/exception/OpenHouseCommitStateUnknownException.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.common.exception; 2 | 3 | import lombok.Getter; 4 | 5 | /** Specifically mapped to {@link org.apache.iceberg.exceptions.CommitStateUnknownException} */ 6 | @Getter 7 | public class OpenHouseCommitStateUnknownException extends RuntimeException { 8 | private String tableId; 9 | 10 | private Throwable cause; 11 | 12 | public OpenHouseCommitStateUnknownException(String tableId, String message, Throwable cause) { 13 | super(message); 14 | this.tableId = tableId; 15 | this.cause = cause; 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /services/common/src/main/java/com/linkedin/openhouse/common/exception/RequestValidationFailureException.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.common.exception; 2 | 3 | import java.util.List; 4 | 5 | /** A preliminary version for exception to indicate validation failure. */ 6 | public class RequestValidationFailureException extends RuntimeException { 7 | 8 | public RequestValidationFailureException() {} 9 | 10 | public RequestValidationFailureException(String message, Exception internalException) { 11 | super(message + ", internal exception:" + internalException); 12 | } 13 | 14 | public RequestValidationFailureException(String message) { 15 | super(message); 16 | } 17 | 18 | public RequestValidationFailureException(List reasons) { 19 | super(String.join("; ", reasons)); 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /services/common/src/main/java/com/linkedin/openhouse/common/exception/UnprocessableEntityException.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.common.exception; 2 | 3 | /** An exception to indicate request is understood by server but can't be processed. */ 4 | public class UnprocessableEntityException extends RuntimeException { 5 | 6 | public UnprocessableEntityException(String message, Throwable throwable) { 7 | super(message, throwable); 8 | } 9 | 10 | public UnprocessableEntityException(String message) { 11 | super(message); 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /services/common/src/main/java/com/linkedin/openhouse/common/exception/UnsupportedClientOperationException.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.common.exception; 2 | 3 | /** 4 | * Exception indicating an operation is not supported 5 | * 6 | *

message contains the details of the operation which is not supported. This error is mapped to 7 | * 400 server error. Use this error to only indicate an invalid operation done by the client (for 8 | * example: partition evolution). Avoid using this exception for internal server errors. 9 | */ 10 | public class UnsupportedClientOperationException extends RuntimeException { 11 | 12 | public UnsupportedClientOperationException(Operation operation, String message) { 13 | super(message); 14 | } 15 | 16 | public enum Operation { 17 | PARTITION_EVOLUTION, 18 | ALTER_RESERVED_TBLPROPS, 19 | ALTER_RESERVED_ROLES, 20 | GRANT_ON_UNSHARED_TABLES, 21 | ALTER_TABLE_TYPE, 22 | GRANT_ON_LOCKED_TABLES, 23 | LOCKED_TABLE_OPERATION 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /services/common/src/main/java/com/linkedin/openhouse/common/schema/IcebergSchemaHelper.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.common.schema; 2 | 3 | import org.apache.iceberg.Schema; 4 | import org.apache.iceberg.SchemaParser; 5 | 6 | public final class IcebergSchemaHelper { 7 | private IcebergSchemaHelper() { 8 | // utility class no-op constructor 9 | } 10 | 11 | public static Schema getSchemaFromSchemaJson(String schemaJson) { 12 | return SchemaParser.fromJson(schemaJson); 13 | } 14 | 15 | public static String getSchemaJsonFromSchema(Schema schema) { 16 | return SchemaParser.toJson(schema); 17 | } 18 | 19 | /** 20 | * @param schema Schema object 21 | * @param columnName Name of the column to be inspected 22 | * @return true if the columnName exists in the provided schema string 23 | */ 24 | public static boolean columnExists(Schema schema, String columnName) { 25 | return schema.findField(columnName) != null; 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /services/common/src/main/java/com/linkedin/openhouse/common/security/AuthenticationUtils.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.common.security; 2 | 3 | import org.springframework.security.core.context.SecurityContext; 4 | import org.springframework.security.core.context.SecurityContextHolder; 5 | import org.springframework.security.core.userdetails.User; 6 | 7 | /** Utils class for authentication purpose. */ 8 | public final class AuthenticationUtils { 9 | private AuthenticationUtils() {} 10 | 11 | public static String extractAuthenticatedUserPrincipal() { 12 | SecurityContext securityContext = SecurityContextHolder.getContext(); 13 | String authenticatedUserPrincipal = "undefined"; 14 | if (securityContext != null 15 | && securityContext.getAuthentication() != null 16 | && securityContext.getAuthentication().getPrincipal() != null) { 17 | User user = (User) securityContext.getAuthentication().getPrincipal(); 18 | authenticatedUserPrincipal = user.getUsername(); 19 | } 20 | return authenticatedUserPrincipal; 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /services/common/src/main/java/com/linkedin/openhouse/common/stats/model/BaseTableMetadata.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.common.stats.model; 2 | 3 | import lombok.AllArgsConstructor; 4 | import lombok.Getter; 5 | import lombok.NoArgsConstructor; 6 | import lombok.experimental.SuperBuilder; 7 | 8 | /** Data Model for capturing table metadata for stats. */ 9 | @Getter 10 | @NoArgsConstructor 11 | @AllArgsConstructor 12 | @SuperBuilder(toBuilder = true) 13 | public class BaseTableMetadata { 14 | 15 | private Long recordTimestamp; 16 | 17 | private String clusterName; 18 | 19 | private String databaseName; 20 | 21 | private String tableName; 22 | 23 | private String tableUUID; 24 | 25 | private String tableLocation; 26 | 27 | private String tableCreator; 28 | 29 | private Long tableCreationTimestamp; 30 | 31 | private Long tableLastUpdatedTimestamp; 32 | 33 | private String tableType; 34 | 35 | private Boolean sharingEnabled; 36 | 37 | private RetentionStatsSchema retentionPolicies; 38 | 39 | private HistoryPolicyStatsSchema historyPolicy; 40 | } 41 | -------------------------------------------------------------------------------- /services/common/src/main/java/com/linkedin/openhouse/common/stats/model/HistoryPolicyStatsSchema.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.common.stats.model; 2 | 3 | import lombok.AllArgsConstructor; 4 | import lombok.Getter; 5 | import lombok.NoArgsConstructor; 6 | import lombok.experimental.SuperBuilder; 7 | 8 | @Getter 9 | @NoArgsConstructor 10 | @AllArgsConstructor 11 | @SuperBuilder(toBuilder = true) 12 | public class HistoryPolicyStatsSchema { 13 | 14 | private Integer versions; 15 | 16 | private Integer maxAge; 17 | 18 | private String granularity; 19 | } 20 | -------------------------------------------------------------------------------- /services/common/src/main/java/com/linkedin/openhouse/common/stats/model/PolicyStats.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.common.stats.model; 2 | 3 | import lombok.Getter; 4 | import lombok.Setter; 5 | 6 | @Getter 7 | @Setter 8 | public class PolicyStats { 9 | Boolean sharingEnabled; 10 | RetentionStatsSchema retentionPolicy; 11 | HistoryPolicyStatsSchema historyPolicy; 12 | 13 | public PolicyStats() { 14 | this.sharingEnabled = sharingEnabled; 15 | this.retentionPolicy = retentionPolicy; 16 | this.historyPolicy = historyPolicy; 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /services/common/src/main/resources/dummy.token: -------------------------------------------------------------------------------- 1 | eyJhbGciOiJIUzI1NiJ9.eyJpYXQiOjE2NTk2ODI4MDAsImp0aSI6IkRVTU1ZX0FOT05ZTU9VU19VU0VSZGJjNDk3MTMtMzM5ZC00Y2ZkLTkwMDgtZDY4NzlhZDQwZjE2Iiwic3ViIjoie1wiQ09ERVwiOlwiRFVNTVlfQ09ERVwiLFwiVVNFUi1JRFwiOlwiRFVNTVlfQU5PTllNT1VTX1VTRVJcIn0ifQ.W2WVBrMacFrXS8Xa29k_V_yD0yca2nEet5mSYq27Ayo -------------------------------------------------------------------------------- /services/common/src/test/resources/noisy-schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "struct", 3 | 4 | "fields": [ 5 | { 6 | "id": 1, 7 | 8 | 9 | 10 | "required": true, 11 | "name": "id", 12 | "type": "string" 13 | }, 14 | 15 | { 16 | "id": 2, 17 | "required": true, 18 | "name": "name", 19 | "type": "string" 20 | } 21 | ] 22 | } -------------------------------------------------------------------------------- /services/common/src/test/resources/one-line-schema.json: -------------------------------------------------------------------------------- 1 | {"type":"struct","schema-id":0,"fields":[{"id":1,"name":"id","required":true,"type":"string"},{"id":2,"name":"name","required":true,"type":"string"}]} -------------------------------------------------------------------------------- /services/common/src/test/resources/schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "struct", 3 | "fields": [ 4 | { 5 | "id": 1, 6 | "required": true, 7 | "name": "id", 8 | "type": "string" 9 | }, 10 | { 11 | "id": 2, 12 | "required": true, 13 | "name": "name", 14 | "type": "string" 15 | } 16 | ] 17 | } -------------------------------------------------------------------------------- /services/common/src/testFixtures/java/com/linkedin/openhouse/common/test/schema/ResourceIoHelper.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.common.test.schema; 2 | 3 | import java.io.IOException; 4 | import java.io.InputStream; 5 | import java.nio.charset.StandardCharsets; 6 | import org.apache.commons.io.IOUtils; 7 | 8 | public final class ResourceIoHelper { 9 | private ResourceIoHelper() { 10 | // utility constructor noop 11 | } 12 | 13 | public static String getSchemaJsonFromResource(String resourceName) throws IOException { 14 | return getSchemaJsonFromResource(ResourceIoHelper.class, resourceName); 15 | } 16 | 17 | public static String getSchemaJsonFromResource(Class klazz, String resourceName) 18 | throws IOException { 19 | InputStream inputStream = klazz.getClassLoader().getResourceAsStream(resourceName); 20 | return IOUtils.toString(inputStream, StandardCharsets.UTF_8); 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /services/housetables/src/main/java/com/linkedin/openhouse/housetables/api/ApiConfig.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.housetables.api; 2 | 3 | import com.linkedin.openhouse.housetables.api.handler.OpenHouseUserTableHtsApiHandler; 4 | import com.linkedin.openhouse.housetables.api.handler.UserTableHtsApiHandler; 5 | import org.springframework.context.annotation.Bean; 6 | import org.springframework.context.annotation.Configuration; 7 | 8 | @Configuration 9 | public class ApiConfig { 10 | @Bean 11 | public UserTableHtsApiHandler tableHtsApiHandler() { 12 | return new OpenHouseUserTableHtsApiHandler(); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /services/housetables/src/main/java/com/linkedin/openhouse/housetables/api/handler/JobTableHtsApiHandler.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.housetables.api.handler; 2 | 3 | import com.linkedin.openhouse.housetables.api.spec.model.Job; 4 | import com.linkedin.openhouse.housetables.api.spec.model.JobKey; 5 | 6 | /** 7 | * Invocation of generic type {@link HouseTablesApiHandler} using {@link Job} as the entity type. 8 | */ 9 | public interface JobTableHtsApiHandler extends HouseTablesApiHandler {} 10 | -------------------------------------------------------------------------------- /services/housetables/src/main/java/com/linkedin/openhouse/housetables/api/handler/ToggleStatusesApiHandler.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.housetables.api.handler; 2 | 3 | import com.linkedin.openhouse.housetables.api.spec.model.TableToggleStatusKey; 4 | import com.linkedin.openhouse.housetables.api.spec.model.ToggleStatus; 5 | 6 | public interface ToggleStatusesApiHandler 7 | extends HouseTablesApiHandler {} 8 | -------------------------------------------------------------------------------- /services/housetables/src/main/java/com/linkedin/openhouse/housetables/api/handler/UserTableHtsApiHandler.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.housetables.api.handler; 2 | 3 | import com.linkedin.openhouse.housetables.api.spec.model.UserTable; 4 | import com.linkedin.openhouse.housetables.api.spec.model.UserTableKey; 5 | 6 | /** 7 | * Invocation of generic type {@link HouseTablesApiHandler} using {@link UserTable} as the entity 8 | * type. 9 | */ 10 | public interface UserTableHtsApiHandler extends HouseTablesApiHandler {} 11 | -------------------------------------------------------------------------------- /services/housetables/src/main/java/com/linkedin/openhouse/housetables/api/spec/model/JobKey.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.housetables.api.spec.model; 2 | 3 | import com.fasterxml.jackson.annotation.JsonProperty; 4 | import io.swagger.v3.oas.annotations.media.Schema; 5 | import javax.validation.constraints.NotEmpty; 6 | import lombok.Builder; 7 | import lombok.Value; 8 | 9 | /** The key type for the House table storing jobs. */ 10 | @Builder 11 | @Value 12 | public class JobKey { 13 | 14 | @Schema( 15 | description = "Unique Resource identifier for a job within a Database.", 16 | example = "24efc962-9962-4522-b0b6-29490d7d8a0e") 17 | @JsonProperty(value = "jobId") 18 | @NotEmpty(message = "jobId cannot be empty") 19 | private String jobId; 20 | } 21 | -------------------------------------------------------------------------------- /services/housetables/src/main/java/com/linkedin/openhouse/housetables/api/spec/model/ToggleStatus.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.housetables.api.spec.model; 2 | 3 | import com.fasterxml.jackson.annotation.JsonProperty; 4 | import io.swagger.v3.oas.annotations.media.Schema; 5 | import javax.validation.constraints.NotEmpty; 6 | import lombok.Builder; 7 | import lombok.Value; 8 | 9 | /** This layer on top of {@link ToggleStatusEnum} is ensuring API extensibility. */ 10 | @Builder(toBuilder = true) 11 | @Value 12 | public class ToggleStatus { 13 | @Schema( 14 | description = "Status of an entity with respect to whether a feature has been toggled on", 15 | example = "Active") 16 | @JsonProperty(value = "status") 17 | @NotEmpty(message = "Toggle status cannot be empty") 18 | ToggleStatusEnum status; 19 | } 20 | -------------------------------------------------------------------------------- /services/housetables/src/main/java/com/linkedin/openhouse/housetables/api/spec/model/ToggleStatusEnum.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.housetables.api.spec.model; 2 | 3 | /** Indicate if a feature is active or inactive on an entity (e.g. table) */ 4 | public enum ToggleStatusEnum { 5 | ACTIVE, 6 | INACTIVE 7 | } 8 | -------------------------------------------------------------------------------- /services/housetables/src/main/java/com/linkedin/openhouse/housetables/api/spec/request/CreateUpdateEntityRequestBody.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.housetables.api.spec.request; 2 | 3 | import com.fasterxml.jackson.annotation.JsonProperty; 4 | import com.google.gson.Gson; 5 | import io.swagger.v3.oas.annotations.media.Schema; 6 | import lombok.AccessLevel; 7 | import lombok.AllArgsConstructor; 8 | import lombok.Builder; 9 | import lombok.EqualsAndHashCode; 10 | import lombok.Getter; 11 | import lombok.NoArgsConstructor; 12 | 13 | @Builder 14 | @Getter 15 | @EqualsAndHashCode 16 | @AllArgsConstructor(access = AccessLevel.PROTECTED) 17 | @NoArgsConstructor(access = AccessLevel.PROTECTED) 18 | public class CreateUpdateEntityRequestBody { 19 | @Schema( 20 | description = 21 | "The entity object that clients want to create/update in the target house table.", 22 | required = true) 23 | @JsonProperty(value = "entity") 24 | T entity; 25 | 26 | public String toJson() { 27 | return new Gson().toJson(this); 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /services/housetables/src/main/java/com/linkedin/openhouse/housetables/api/spec/response/EntityResponseBody.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.housetables.api.spec.response; 2 | 3 | import com.fasterxml.jackson.annotation.JsonProperty; 4 | import com.google.gson.Gson; 5 | import io.swagger.v3.oas.annotations.media.Schema; 6 | import lombok.Builder; 7 | import lombok.Getter; 8 | 9 | /** 10 | * A generic class for response body of methods that deals with key'ed entity in different types of 11 | * House tables. 12 | * 13 | * @param Representing the entity type. 14 | */ 15 | @Builder 16 | public class EntityResponseBody { 17 | 18 | @Schema(description = "Representing a row in the house table ", required = true) 19 | @JsonProperty(access = JsonProperty.Access.READ_ONLY) 20 | @Getter 21 | protected T entity; 22 | 23 | public String toJson() { 24 | return new Gson().toJson(this); 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /services/housetables/src/main/java/com/linkedin/openhouse/housetables/dto/model/UserTableDto.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.housetables.dto.model; 2 | 3 | import com.linkedin.openhouse.housetables.api.spec.model.UserTable; 4 | import lombok.Builder; 5 | import lombok.Value; 6 | 7 | /** 8 | * Needed to avoid coupling the transport and controller layer by sharing the same {@link UserTable} 9 | * object. 10 | */ 11 | @Builder(toBuilder = true) 12 | @Value 13 | public class UserTableDto { 14 | String tableId; 15 | 16 | String databaseId; 17 | 18 | String tableVersion; 19 | 20 | String metadataLocation; 21 | 22 | String storageType; 23 | 24 | Long creationTime; 25 | } 26 | -------------------------------------------------------------------------------- /services/housetables/src/main/java/com/linkedin/openhouse/housetables/model/JobRowPrimaryKey.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.housetables.model; 2 | 3 | import java.io.Serializable; 4 | import lombok.AccessLevel; 5 | import lombok.AllArgsConstructor; 6 | import lombok.Builder; 7 | import lombok.EqualsAndHashCode; 8 | import lombok.Getter; 9 | import lombok.NoArgsConstructor; 10 | 11 | @Builder 12 | @EqualsAndHashCode 13 | @Getter 14 | @NoArgsConstructor 15 | @AllArgsConstructor(access = AccessLevel.PROTECTED) 16 | public class JobRowPrimaryKey implements Serializable { 17 | 18 | private String jobId; 19 | } 20 | -------------------------------------------------------------------------------- /services/housetables/src/main/java/com/linkedin/openhouse/housetables/model/UserTableRow.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.housetables.model; 2 | 3 | import javax.persistence.Entity; 4 | import javax.persistence.Id; 5 | import javax.persistence.IdClass; 6 | import javax.persistence.Version; 7 | import lombok.AccessLevel; 8 | import lombok.AllArgsConstructor; 9 | import lombok.Builder; 10 | import lombok.EqualsAndHashCode; 11 | import lombok.Getter; 12 | import lombok.NoArgsConstructor; 13 | 14 | /** Data Model for persisting a User Table Object in the HouseTable. */ 15 | @Entity 16 | @IdClass(UserTableRowPrimaryKey.class) 17 | @Builder(toBuilder = true) 18 | @Getter 19 | @EqualsAndHashCode 20 | @NoArgsConstructor(access = AccessLevel.PROTECTED) 21 | @AllArgsConstructor(access = AccessLevel.PROTECTED) 22 | public class UserTableRow { 23 | 24 | @Id String tableId; 25 | 26 | @Id String databaseId; 27 | 28 | @Version Long version; 29 | 30 | String metadataLocation; 31 | 32 | String storageType; 33 | 34 | Long creationTime; 35 | } 36 | -------------------------------------------------------------------------------- /services/housetables/src/main/java/com/linkedin/openhouse/housetables/model/UserTableRowPrimaryKey.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.housetables.model; 2 | 3 | import java.io.Serializable; 4 | import lombok.AccessLevel; 5 | import lombok.AllArgsConstructor; 6 | import lombok.Builder; 7 | import lombok.EqualsAndHashCode; 8 | import lombok.Getter; 9 | import lombok.NoArgsConstructor; 10 | 11 | @Builder 12 | @EqualsAndHashCode 13 | @Getter 14 | @NoArgsConstructor 15 | @AllArgsConstructor(access = AccessLevel.PROTECTED) 16 | public class UserTableRowPrimaryKey implements Serializable { 17 | private String tableId; 18 | 19 | private String databaseId; 20 | 21 | @Override 22 | public String toString() { 23 | return this.databaseId + ":" + this.tableId; 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /services/housetables/src/main/java/com/linkedin/openhouse/housetables/repository/HtsRepository.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.housetables.repository; 2 | 3 | import com.linkedin.openhouse.housetables.api.spec.model.UserTable; 4 | import org.springframework.data.repository.PagingAndSortingRepository; 5 | 6 | /** 7 | * Interface for repository backed by Iceberg/JDBC for storing and retrieving {@link UserTable} or 8 | * {@link com.linkedin.openhouse.housetables.api.spec.model.Job} row object. 9 | */ 10 | public interface HtsRepository extends PagingAndSortingRepository {} 11 | -------------------------------------------------------------------------------- /services/housetables/src/main/java/com/linkedin/openhouse/housetables/repository/impl/jdbc/JobTableHtsJdbcRepository.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.housetables.repository.impl.jdbc; 2 | 3 | import com.linkedin.openhouse.housetables.config.db.jdbc.JdbcProviderConfiguration; 4 | import com.linkedin.openhouse.housetables.model.JobRow; 5 | import com.linkedin.openhouse.housetables.model.JobRowPrimaryKey; 6 | import com.linkedin.openhouse.housetables.repository.HtsRepository; 7 | 8 | /** 9 | * JDBC-backed {@link HtsRepository} for CRUDing {@link JobRow} 10 | * 11 | *

This class gets configured in {@link 12 | * com.linkedin.openhouse.housetables.config.db.DatabaseConfiguration} with @EnableJpaRepositories. 13 | * The datasource for the Jpa repository is provided in {@link JdbcProviderConfiguration}. 14 | */ 15 | public interface JobTableHtsJdbcRepository extends HtsRepository {} 16 | -------------------------------------------------------------------------------- /services/housetables/src/main/java/com/linkedin/openhouse/housetables/repository/impl/jdbc/ToggleStatusHtsJdbcRepository.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.housetables.repository.impl.jdbc; 2 | 3 | import com.linkedin.openhouse.housetables.model.TableToggleRule; 4 | import com.linkedin.openhouse.housetables.repository.HtsRepository; 5 | 6 | public interface ToggleStatusHtsJdbcRepository extends HtsRepository { 7 | Iterable findAllByFeature(String feature); 8 | } 9 | -------------------------------------------------------------------------------- /services/housetables/src/main/java/com/linkedin/openhouse/housetables/services/TableToggleRuleMatcher.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.housetables.services; 2 | 3 | import com.linkedin.openhouse.housetables.model.TableToggleRule; 4 | 5 | public interface TableToggleRuleMatcher { 6 | boolean matches(TableToggleRule rule, String tableId, String databaseId); 7 | } 8 | -------------------------------------------------------------------------------- /services/housetables/src/main/java/com/linkedin/openhouse/housetables/services/ToggleStatusesService.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.housetables.services; 2 | 3 | import com.linkedin.openhouse.housetables.api.spec.model.ToggleStatus; 4 | 5 | public interface ToggleStatusesService { 6 | /** 7 | * Obtain the status of a {@link com.linkedin.openhouse.housetables.api.spec.model.UserTable}'s 8 | * feature. 9 | * 10 | * @param featureId identifier of the feature 11 | * @param databaseId identifier of the database 12 | * @param tableId identifier of the table 13 | * @return {@link ToggleStatus} of the requested entity. 14 | */ 15 | ToggleStatus getTableToggleStatus(String featureId, String databaseId, String tableId); 16 | } 17 | -------------------------------------------------------------------------------- /services/housetables/src/main/java/com/linkedin/openhouse/housetables/services/WildcardTableToggleRuleMatcher.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.housetables.services; 2 | 3 | import com.linkedin.openhouse.housetables.model.TableToggleRule; 4 | import org.springframework.stereotype.Component; 5 | 6 | /** An implementation of {@link TableToggleRuleMatcher} that supports '*' to match any entities */ 7 | @Component 8 | public class WildcardTableToggleRuleMatcher implements TableToggleRuleMatcher { 9 | @Override 10 | public boolean matches(TableToggleRule rule, String tableId, String databaseId) { 11 | boolean tableMatches = 12 | rule.getTablePattern().equals("*") || rule.getTablePattern().equals(tableId); 13 | boolean databaseMatches = 14 | rule.getDatabasePattern().equals("*") || rule.getDatabasePattern().equals(databaseId); 15 | 16 | return tableMatches && databaseMatches; 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /services/housetables/src/main/resources/data.sql: -------------------------------------------------------------------------------- 1 | -- Initial value for feature toggle tables 2 | -- When enabling/disabling some feature, please ensure they are checked-in and reviewed through this file 3 | 4 | INSERT IGNORE INTO table_toggle_rule (feature, database_pattern, table_pattern, id, creation_time_ms) VALUES ('demo', 'demodb', 'demotable', DEFAULT, DEFAULT); -------------------------------------------------------------------------------- /services/housetables/src/test/java/com/linkedin/openhouse/housetables/mock/TestOpenHouseHtsControllerConfig.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.housetables.mock; 2 | 3 | import com.linkedin.openhouse.housetables.api.handler.UserTableHtsApiHandler; 4 | import org.springframework.context.annotation.Bean; 5 | import org.springframework.context.annotation.Configuration; 6 | import org.springframework.context.annotation.Primary; 7 | 8 | @Configuration 9 | public class TestOpenHouseHtsControllerConfig { 10 | 11 | @Bean 12 | @Primary 13 | public UserTableHtsApiHandler createTestHouseTableApiHandler() { 14 | return new MockUserTableHtsApiHandler(); 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /services/housetables/src/test/resources/application.properties: -------------------------------------------------------------------------------- 1 | spring.jpa.hibernate.ddl-auto=none 2 | spring.sql.init.mode=always 3 | spring.jpa.defer-datasource-initialization=true -------------------------------------------------------------------------------- /services/housetables/src/test/resources/data.sql: -------------------------------------------------------------------------------- 1 | INSERT IGNORE INTO table_toggle_rule (feature, database_pattern, table_pattern, id, creation_time_ms) VALUES ('demo', 'demodb', 'demotable', DEFAULT, 987L); 2 | INSERT IGNORE INTO table_toggle_rule (feature, database_pattern, table_pattern, id, creation_time_ms) VALUES ('dummy1', 'db', 'tbl', DEFAULT, 987L); 3 | INSERT IGNORE INTO table_toggle_rule (feature, database_pattern, table_pattern, id, creation_time_ms) VALUES ('dummy1', 'db', 'testtbl1', DEFAULT, 987L); 4 | INSERT IGNORE INTO table_toggle_rule (feature, database_pattern, table_pattern, id, creation_time_ms) VALUES ('dummy2', 'db', 'tbl', DEFAULT, 987L); 5 | INSERT IGNORE INTO table_toggle_rule (feature, database_pattern, table_pattern, id, creation_time_ms) VALUES ('stop_create', 'db_no_create', '*', DEFAULT, 987L); 6 | 7 | -------------------------------------------------------------------------------- /services/jobs/src/main/java/com/linkedin/openhouse/jobs/api/validator/JobsApiValidator.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.jobs.api.validator; 2 | 3 | import com.linkedin.openhouse.jobs.api.spec.request.CreateJobRequestBody; 4 | 5 | public interface JobsApiValidator { 6 | /** 7 | * Function to validate Job create request body. 8 | * 9 | * @param createJobRequestBody 10 | * @throws com.linkedin.openhouse.common.exception.RequestValidationFailureException if the 11 | * request is invalid. 12 | */ 13 | void validateCreateJob(CreateJobRequestBody createJobRequestBody); 14 | } 15 | -------------------------------------------------------------------------------- /services/jobs/src/main/java/com/linkedin/openhouse/jobs/config/JobsEngineProperties.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.jobs.config; 2 | 3 | import java.util.ArrayList; 4 | import java.util.HashMap; 5 | import java.util.List; 6 | import java.util.Map; 7 | import lombok.AllArgsConstructor; 8 | import lombok.Builder; 9 | import lombok.EqualsAndHashCode; 10 | import lombok.Getter; 11 | import lombok.NoArgsConstructor; 12 | import lombok.Setter; 13 | import lombok.ToString; 14 | 15 | @Getter 16 | @Setter 17 | @AllArgsConstructor 18 | @NoArgsConstructor 19 | @ToString 20 | @Builder(toBuilder = true) 21 | @EqualsAndHashCode 22 | public class JobsEngineProperties { 23 | private String engineType; 24 | private String coordinatorClassName; 25 | private String engineUri; 26 | private String jarPath; 27 | @Builder.Default private List dependencies = new ArrayList<>(); 28 | @Builder.Default private Map executionTags = new HashMap<>(); 29 | } 30 | -------------------------------------------------------------------------------- /services/jobs/src/main/java/com/linkedin/openhouse/jobs/controller/SwaggerConfig.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.jobs.controller; 2 | 3 | import io.swagger.v3.oas.models.OpenAPI; 4 | import io.swagger.v3.oas.models.info.Info; 5 | import io.swagger.v3.oas.models.info.License; 6 | import org.springframework.context.annotation.Bean; 7 | import org.springframework.context.annotation.Configuration; 8 | 9 | @Configuration 10 | public class SwaggerConfig { 11 | @Bean 12 | public OpenAPI houseJobsOpenAPI() { 13 | return new OpenAPI() 14 | .info( 15 | new Info() 16 | .title("OpenHouse APIs") 17 | .description("API description for OpenHouse API") 18 | .termsOfService("http://swagger.io/terms") 19 | .version("v0.0.1") 20 | .license(new License().name("Apache 2.0").url("http://springdoc.org"))); 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /services/jobs/src/main/java/com/linkedin/openhouse/jobs/model/JobConfConverter.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.jobs.model; 2 | 3 | import com.google.gson.Gson; 4 | import javax.persistence.AttributeConverter; 5 | import javax.persistence.Converter; 6 | 7 | @Converter 8 | public class JobConfConverter implements AttributeConverter { 9 | @Override 10 | public String convertToDatabaseColumn(JobConf attribute) { 11 | return (new Gson()).toJson(attribute); 12 | } 13 | 14 | @Override 15 | public JobConf convertToEntityAttribute(String dbData) { 16 | return (new Gson()).fromJson(dbData, JobConf.class); 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /services/jobs/src/main/java/com/linkedin/openhouse/jobs/model/JobDtoPrimaryKey.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.jobs.model; 2 | 3 | import java.io.Serializable; 4 | import lombok.AccessLevel; 5 | import lombok.AllArgsConstructor; 6 | import lombok.Builder; 7 | import lombok.EqualsAndHashCode; 8 | import lombok.Getter; 9 | import lombok.NoArgsConstructor; 10 | 11 | @Builder 12 | @EqualsAndHashCode 13 | @Getter 14 | @NoArgsConstructor 15 | @AllArgsConstructor(access = AccessLevel.PROTECTED) 16 | public class JobDtoPrimaryKey implements Serializable, Comparable { 17 | private String jobId; 18 | 19 | @Override 20 | public int compareTo(JobDtoPrimaryKey o) { 21 | return jobId.compareTo(o.jobId); 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /services/jobs/src/main/java/com/linkedin/openhouse/jobs/repository/JobsInternalRepository.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.jobs.repository; 2 | 3 | import com.linkedin.openhouse.jobs.model.JobDto; 4 | import com.linkedin.openhouse.jobs.model.JobDtoPrimaryKey; 5 | import org.springframework.data.repository.CrudRepository; 6 | import org.springframework.stereotype.Repository; 7 | 8 | @Repository 9 | public interface JobsInternalRepository extends CrudRepository {} 10 | -------------------------------------------------------------------------------- /services/jobs/src/main/java/com/linkedin/openhouse/jobs/repository/exception/JobNotFoundException.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.jobs.repository.exception; 2 | 3 | public class JobNotFoundException extends RuntimeException { 4 | public JobNotFoundException(String message, Throwable cause) { 5 | super(message, cause); 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /services/jobs/src/main/java/com/linkedin/openhouse/jobs/repository/exception/JobsInternalRepositoryTimeoutException.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.jobs.repository.exception; 2 | 3 | /** Exception thrown when contacting HTS timed out. */ 4 | public class JobsInternalRepositoryTimeoutException extends RuntimeException { 5 | public JobsInternalRepositoryTimeoutException(String message, Throwable cause) { 6 | super(message, cause); 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /services/jobs/src/main/java/com/linkedin/openhouse/jobs/repository/exception/JobsInternalRepositoryUnavailableException.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.jobs.repository.exception; 2 | 3 | public class JobsInternalRepositoryUnavailableException extends RuntimeException { 4 | public JobsInternalRepositoryUnavailableException(String message, Throwable cause) { 5 | super(message, cause); 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /services/jobs/src/main/java/com/linkedin/openhouse/jobs/repository/exception/JobsTableCallerException.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.jobs.repository.exception; 2 | 3 | public class JobsTableCallerException extends RuntimeException { 4 | public JobsTableCallerException(String message, Throwable cause) { 5 | super(message, cause); 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /services/jobs/src/main/java/com/linkedin/openhouse/jobs/repository/exception/JobsTableConcurrentUpdateException.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.jobs.repository.exception; 2 | 3 | public class JobsTableConcurrentUpdateException extends RuntimeException { 4 | public JobsTableConcurrentUpdateException(String message, Throwable cause) { 5 | super(message, cause); 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /services/jobs/src/main/java/com/linkedin/openhouse/jobs/services/HouseJobHandle.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.jobs.services; 2 | 3 | /** 4 | * Interface for House Job handle that provides access to a specific job state and management 5 | * methods. 6 | */ 7 | public interface HouseJobHandle { 8 | /** Issues cancel request to the engine API to terminate the job. */ 9 | void cancel(); 10 | 11 | /** @return job info {@link com.linkedin.openhouse.jobs.services.JobInfo} */ 12 | JobInfo getInfo(); 13 | } 14 | -------------------------------------------------------------------------------- /services/jobs/src/main/java/com/linkedin/openhouse/jobs/services/HouseJobsCoordinator.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.jobs.services; 2 | 3 | import com.linkedin.openhouse.jobs.config.JobLaunchConf; 4 | 5 | /** 6 | * Interface for House Jobs coordinator, responsible for jobs launching, tracking and cancellation 7 | * via {@link HouseJobHandle}. 8 | */ 9 | public interface HouseJobsCoordinator { 10 | /** 11 | * Submits a job via Spark engine API of choice. 12 | * 13 | * @param conf - job launch config {@link JobLaunchConf} 14 | * @return handle object {@link HouseJobHandle} to manage this specific job 15 | */ 16 | HouseJobHandle submit(JobLaunchConf conf); 17 | 18 | /** 19 | * Given an id of the job engine API, creates a job handle. 20 | * 21 | * @param executionId - id of the job in the engine API, e.g. sessionId in Livy 22 | * @return handle object {@link HouseJobHandle} to manage this specific job 23 | */ 24 | HouseJobHandle obtainHandle(String executionId); 25 | } 26 | -------------------------------------------------------------------------------- /services/jobs/src/main/java/com/linkedin/openhouse/jobs/services/JobInfo.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.jobs.services; 2 | 3 | import com.linkedin.openhouse.common.JobState; 4 | import java.util.Map; 5 | 6 | /** Interface for access to job information. */ 7 | public interface JobInfo { 8 | /** @return state of the job {@link JobState} */ 9 | JobState getState(); 10 | 11 | /** @return id of the job in the engine API, e.g. sessionId in Livy. */ 12 | String getExecutionId(); 13 | 14 | /** @return id specific to cluster where the app is deployed, e.g. YARN application id. */ 15 | String getAppId(); 16 | 17 | /** @return properties specific to cluster where the app is deployed, e.g. YARN app urls. */ 18 | Map getAppInfo(); 19 | } 20 | -------------------------------------------------------------------------------- /services/jobs/src/main/resources/application.properties: -------------------------------------------------------------------------------- 1 | spring.codec.max-in-memory-size=20MB 2 | springdoc.packages-to-scan=com.linkedin.openhouse.jobs 3 | springdoc.swagger-ui.disable-swagger-default-url=true 4 | springdoc.swagger-ui.filter=true 5 | springdoc.swagger-ui.path=/jobs/api-docs 6 | springdoc.swagger-ui.operationsSorter=method 7 | server.tomcat.basedir=tomcat 8 | server.tomcat.accesslog.enabled=true 9 | server.tomcat.accesslog.rename-on-rotate=false 10 | server.tomcat.accesslog.request-attributes-enabled=false 11 | server.tomcat.accesslog.rotate=true 12 | management.endpoints.web.exposure.include=health, shutdown, prometheus, beans 13 | management.endpoint.health.enabled=true 14 | management.endpoint.shutdown.enabled=true 15 | management.endpoint.prometheus.enabled=true 16 | management.endpoint.beans.enabled=true 17 | management.metrics.web.server.request.autotime.percentiles=0.5,0.75,0.9,0.95,0.99 18 | management.metrics.distribution.percentiles-histogram.http.server.requests=true -------------------------------------------------------------------------------- /services/jobs/src/test/http/http-client.env.json: -------------------------------------------------------------------------------- 1 | { 2 | "local-server": { 3 | "base-url": "http://localhost:8080/jobs" 4 | }, 5 | "local-oh-hadoop-spark": { 6 | "base-url": "http://localhost:8002/jobs" 7 | } 8 | } -------------------------------------------------------------------------------- /services/jobs/src/test/http/orphan_files_deletion_job.http: -------------------------------------------------------------------------------- 1 | POST {{base-url}} 2 | Content-Type: application/json 3 | 4 | { 5 | "jobName": "ORPHAN_FILE_DELETION_db_tb", 6 | "clusterId": "LocalHadoopCluster", 7 | "jobConf": { 8 | "jobType": "ORPHAN_FILES_DELETION", 9 | "args": ["--tableName", "db.tb"] 10 | } 11 | } 12 | > {% 13 | client.global.set("jobId", response.body.jobId); 14 | %} 15 | 16 | ### 17 | GET {{base-url}}/{{jobId}} 18 | Accept: application/json 19 | 20 | ### 21 | PUT {{base-url}}/{{jobId}}/cancel 22 | Accept: application/json 23 | -------------------------------------------------------------------------------- /services/jobs/src/test/http/retention_job.http: -------------------------------------------------------------------------------- 1 | POST {{base-url}} 2 | Content-Type: application/json 3 | 4 | { 5 | "jobName": "RETENTION_db_tb", 6 | "clusterId": "LocalHadoopCluster", 7 | "jobConf": { 8 | "jobType": "RETENTION", 9 | "proxyUser": "DUMMY_ANONYMOUS_USER", 10 | "args": ["--columnName", "ts", "--tableName", "db.tb", "--granularity", "day", "--count", 3] 11 | } 12 | } 13 | > {% 14 | client.global.set("jobId", response.body.jobId); 15 | %} 16 | 17 | ### 18 | GET {{base-url}}/{{jobId}} 19 | Accept: application/json 20 | 21 | ### 22 | PUT {{base-url}}/{{jobId}}/cancel 23 | Accept: application/json 24 | -------------------------------------------------------------------------------- /services/jobs/src/test/http/snapshot_expiration_job.http: -------------------------------------------------------------------------------- 1 | POST {{base-url}} 2 | Content-Type: application/json 3 | 4 | { 5 | "jobName": "SNAPSHOT_EXPIRATION_db_tb", 6 | "clusterId": "LocalHadoopCluster", 7 | "jobConf": { 8 | "jobType": "SNAPSHOTS_EXPIRATION", 9 | "proxyUser": "DUMMY_ANONYMOUS_USER", 10 | "args": [ 11 | "--tableName", "db.tb", 12 | "--granularity", "day", 13 | "--maxAge", 1 14 | ] 15 | } 16 | } 17 | > {% 18 | client.global.set("jobId", response.body.jobId); 19 | %} 20 | 21 | ### 22 | GET {{base-url}}/{{jobId}} 23 | Accept: application/json 24 | 25 | ### 26 | PUT {{base-url}}/{{jobId}}/cancel 27 | Accept: application/json 28 | -------------------------------------------------------------------------------- /services/jobs/src/test/http/sql_test_job.http: -------------------------------------------------------------------------------- 1 | POST {{base-url}} 2 | Content-Type: application/json 3 | 4 | { 5 | "jobName": "sql_test_job", 6 | "clusterId": "LocalHadoopCluster", 7 | "jobConf": { 8 | "jobType": "SQL_TEST", 9 | "proxyUser": "DUMMY_ANONYMOUS_USER" 10 | } 11 | } 12 | > {% 13 | client.global.set("jobId", response.body.jobId); 14 | %} 15 | 16 | ### 17 | GET {{base-url}}/{{jobId}} 18 | Accept: application/json 19 | 20 | ### 21 | PUT {{base-url}}/{{jobId}}/cancel 22 | Accept: application/json 23 | 24 | 25 | -------------------------------------------------------------------------------- /services/jobs/src/test/http/staged_files_deletion_job.http: -------------------------------------------------------------------------------- 1 | POST {{base-url}} 2 | Content-Type: application/json 3 | 4 | { 5 | "jobName": "STAGED_FILES_DELETION_db_tb", 6 | "clusterId": "LocalHadoopCluster", 7 | "jobConf": { 8 | "jobType": "STAGED_FILES_DELETION", 9 | "proxyUser": "openhouse", 10 | "args": [ 11 | "--tableName", "db.tb", 12 | "--granularity", "day", 13 | "--count", 2 14 | ] 15 | } 16 | } 17 | > {% 18 | client.global.set("jobId", response.body.jobId); 19 | %} 20 | 21 | ### 22 | GET {{base-url}}/{{jobId}} 23 | Accept: application/json 24 | 25 | ### 26 | PUT {{base-url}}/{{jobId}}/cancel 27 | Accept: application/json 28 | -------------------------------------------------------------------------------- /services/jobs/src/test/resources/test-local-cluster.yaml: -------------------------------------------------------------------------------- 1 | cluster: 2 | name: "local-cluster" 3 | storage: 4 | root-path: "/tmp/unittest" 5 | housetables: 6 | base-uri: "http://localhost:8080" 7 | security: 8 | tables: 9 | authorizations: 10 | opa: 11 | base-uri: "http://localhost:8181" -------------------------------------------------------------------------------- /services/tables/src/main/java/com/linkedin/openhouse/tables/api/ApiConfig.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.tables.api; 2 | 3 | import com.linkedin.openhouse.tables.api.handler.TablesApiHandler; 4 | import com.linkedin.openhouse.tables.api.handler.impl.OpenHouseTablesApiHandler; 5 | import org.springframework.context.annotation.Bean; 6 | import org.springframework.context.annotation.Configuration; 7 | 8 | /** Class that holds all the Beans related to a controller. */ 9 | @Configuration 10 | public class ApiConfig { 11 | @Bean 12 | public TablesApiHandler tablesApiHandler() { 13 | return new OpenHouseTablesApiHandler(); 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /services/tables/src/main/java/com/linkedin/openhouse/tables/api/handler/IcebergSnapshotsApiHandler.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.tables.api.handler; 2 | 3 | import com.linkedin.openhouse.common.api.spec.ApiResponse; 4 | import com.linkedin.openhouse.tables.api.spec.v0.request.IcebergSnapshotsRequestBody; 5 | import com.linkedin.openhouse.tables.api.spec.v0.response.GetTableResponseBody; 6 | 7 | /** 8 | * Interface layer between REST and Iceberg Snapshots backend. The implementation is injected into 9 | * the Service Controller. 10 | */ 11 | public interface IcebergSnapshotsApiHandler { 12 | 13 | ApiResponse putIcebergSnapshots( 14 | String databaseId, 15 | String tableId, 16 | IcebergSnapshotsRequestBody icebergSnapshotRequestBody, 17 | String tableCreator); 18 | } 19 | -------------------------------------------------------------------------------- /services/tables/src/main/java/com/linkedin/openhouse/tables/api/spec/v0/request/components/PolicyTag.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.tables.api.spec.v0.request.components; 2 | 3 | import io.swagger.v3.oas.annotations.media.Schema; 4 | import java.util.Set; 5 | import lombok.AccessLevel; 6 | import lombok.AllArgsConstructor; 7 | import lombok.Builder; 8 | import lombok.EqualsAndHashCode; 9 | import lombok.Getter; 10 | import lombok.NoArgsConstructor; 11 | 12 | @Builder 13 | @EqualsAndHashCode 14 | @Getter 15 | @AllArgsConstructor(access = AccessLevel.PROTECTED) 16 | @NoArgsConstructor(access = AccessLevel.PROTECTED) 17 | public class PolicyTag { 18 | @Schema(description = "Policy tags", example = "PII, HC") 19 | Set tags; 20 | 21 | public enum Tag { 22 | PII, 23 | HC 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /services/tables/src/main/java/com/linkedin/openhouse/tables/api/spec/v0/response/GetAclPoliciesResponseBody.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.tables.api.spec.v0.response; 2 | 3 | import com.fasterxml.jackson.annotation.JsonProperty; 4 | import com.google.gson.Gson; 5 | import com.linkedin.openhouse.tables.api.spec.v0.response.components.AclPolicy; 6 | import io.swagger.v3.oas.annotations.media.Schema; 7 | import java.util.List; 8 | import lombok.Builder; 9 | import lombok.Value; 10 | 11 | @Builder 12 | @Value 13 | public class GetAclPoliciesResponseBody { 14 | @Schema(description = "List of acl policies associated with table/database") 15 | @JsonProperty(access = JsonProperty.Access.READ_ONLY) 16 | private List results; 17 | 18 | public String toJson() { 19 | return new Gson().toJson(this); 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /services/tables/src/main/java/com/linkedin/openhouse/tables/api/spec/v0/response/GetAllDatabasesResponseBody.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.tables.api.spec.v0.response; 2 | 3 | import com.fasterxml.jackson.annotation.JsonProperty; 4 | import com.google.gson.Gson; 5 | import io.swagger.v3.oas.annotations.media.Schema; 6 | import java.util.List; 7 | import lombok.Builder; 8 | import lombok.Value; 9 | 10 | @Builder 11 | @Value 12 | public class GetAllDatabasesResponseBody { 13 | @Schema(description = "List of Database objects", example = "") 14 | @JsonProperty(access = JsonProperty.Access.READ_ONLY) 15 | private List results; 16 | 17 | public String toJson() { 18 | return new Gson().toJson(this); 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /services/tables/src/main/java/com/linkedin/openhouse/tables/api/spec/v0/response/GetAllTablesResponseBody.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.tables.api.spec.v0.response; 2 | 3 | import com.fasterxml.jackson.annotation.JsonProperty; 4 | import com.google.gson.Gson; 5 | import io.swagger.v3.oas.annotations.media.Schema; 6 | import java.util.List; 7 | import lombok.Builder; 8 | import lombok.Value; 9 | 10 | @Builder 11 | @Value 12 | public class GetAllTablesResponseBody { 13 | 14 | @Schema(description = "List of Table objects in a database", example = "") 15 | @JsonProperty(access = JsonProperty.Access.READ_ONLY) 16 | private List results; 17 | 18 | /** 19 | * TODO: spec out the pagination part, something like _metadata: { "offset": 20, "limit": 10, 20 | * "total": 3465, } links: { "next": "", "self": "", "prev": "" } 21 | */ 22 | public String toJson() { 23 | return new Gson().toJson(this); 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /services/tables/src/main/java/com/linkedin/openhouse/tables/api/spec/v0/response/GetDatabaseResponseBody.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.tables.api.spec.v0.response; 2 | 3 | import com.fasterxml.jackson.annotation.JsonProperty; 4 | import io.swagger.v3.oas.annotations.media.Schema; 5 | import lombok.Builder; 6 | import lombok.Value; 7 | 8 | @Builder(toBuilder = true) 9 | @Value 10 | public class GetDatabaseResponseBody { 11 | @Schema(description = "Unique Resource identifier for the Database", example = "my_database") 12 | @JsonProperty(access = JsonProperty.Access.READ_ONLY) 13 | private String databaseId; 14 | 15 | @Schema( 16 | description = "Unique Resource identifier for the Cluster containing the Database", 17 | example = "my_cluster") 18 | @JsonProperty(access = JsonProperty.Access.READ_ONLY) 19 | private String clusterId; 20 | } 21 | -------------------------------------------------------------------------------- /services/tables/src/main/java/com/linkedin/openhouse/tables/api/spec/v1/request/CreateUpdateTableRequestBody.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.tables.api.spec.v1.request; 2 | 3 | /** WARNING: PLACEHOLDER CLASS FOR FUTURE REFERENCE, DO NOT USE. */ 4 | public class CreateUpdateTableRequestBody {} 5 | -------------------------------------------------------------------------------- /services/tables/src/main/java/com/linkedin/openhouse/tables/api/validator/DatabasesApiValidator.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.tables.api.validator; 2 | 3 | import com.linkedin.openhouse.tables.api.spec.v0.request.UpdateAclPoliciesRequestBody; 4 | 5 | public interface DatabasesApiValidator { 6 | /** 7 | * Function to validate a request to update aclPolicies on a Database Resource with a given 8 | * databaseId 9 | * 10 | * @param databaseId 11 | * @param updateAclPoliciesRequestBody 12 | * @throws com.linkedin.openhouse.common.exception.RequestValidationFailureException if request is 13 | * invalid 14 | */ 15 | void validateUpdateAclPolicies( 16 | String databaseId, UpdateAclPoliciesRequestBody updateAclPoliciesRequestBody); 17 | 18 | /** 19 | * Function to validate a request to get aclPolicies on a Database Resource with a given 20 | * databaseId 21 | * 22 | * @param databaseId 23 | */ 24 | void validateGetAclPolicies(String databaseId); 25 | } 26 | -------------------------------------------------------------------------------- /services/tables/src/main/java/com/linkedin/openhouse/tables/api/validator/IcebergSnapshotsApiValidator.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.tables.api.validator; 2 | 3 | import com.linkedin.openhouse.tables.api.spec.v0.request.IcebergSnapshotsRequestBody; 4 | 5 | public interface IcebergSnapshotsApiValidator { 6 | /** 7 | * Validate request body for snapshots PUT. 8 | * 9 | * @param clusterId Id of target cluster 10 | * @param databaseId Id of target database 11 | * @param tableId Id of target table 12 | * @param icebergSnapshotsRequestBody Request body for putting Iceberg snapshots 13 | */ 14 | void validatePutSnapshots( 15 | String clusterId, 16 | String databaseId, 17 | String tableId, 18 | IcebergSnapshotsRequestBody icebergSnapshotsRequestBody); 19 | } 20 | -------------------------------------------------------------------------------- /services/tables/src/main/java/com/linkedin/openhouse/tables/api/validator/impl/PolicySpecValidator.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.tables.api.validator.impl; 2 | 3 | import com.linkedin.openhouse.common.api.spec.TableUri; 4 | import com.linkedin.openhouse.tables.api.spec.v0.request.CreateUpdateTableRequestBody; 5 | 6 | public abstract class PolicySpecValidator { 7 | 8 | protected String failureMessage = ""; 9 | 10 | protected String errorField = ""; 11 | 12 | public abstract boolean validate( 13 | CreateUpdateTableRequestBody createUpdateTableRequestBody, TableUri tableUri); 14 | 15 | public String getField() { 16 | return errorField; 17 | } 18 | 19 | public String getMessage() { 20 | return failureMessage; 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /services/tables/src/main/java/com/linkedin/openhouse/tables/audit/DummyTableAuditHandler.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.tables.audit; 2 | 3 | import com.linkedin.openhouse.common.audit.AuditHandler; 4 | import com.linkedin.openhouse.tables.audit.model.TableAuditEvent; 5 | import lombok.extern.slf4j.Slf4j; 6 | import org.springframework.stereotype.Component; 7 | 8 | /** A dummy table audit handler which is used in only unit-tests and local docker-environments. */ 9 | @Slf4j 10 | @Component 11 | public class DummyTableAuditHandler implements AuditHandler { 12 | @Override 13 | public void audit(TableAuditEvent event) { 14 | log.info("Table audit event: \n" + event.toJson()); 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /services/tables/src/main/java/com/linkedin/openhouse/tables/audit/model/OperationStatus.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.tables.audit.model; 2 | 3 | /** The status for a specific table operation. */ 4 | public enum OperationStatus { 5 | FAILED, 6 | SUCCESS 7 | } 8 | -------------------------------------------------------------------------------- /services/tables/src/main/java/com/linkedin/openhouse/tables/audit/model/OperationType.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.tables.audit.model; 2 | 3 | /** The type of specific table operation */ 4 | public enum OperationType { 5 | CREATE, 6 | READ, 7 | UPDATE, 8 | COMMIT, 9 | DELETE, 10 | GRANT, 11 | REVOKE, 12 | STAGED_CREATE, 13 | STAGED_COMMIT 14 | } 15 | -------------------------------------------------------------------------------- /services/tables/src/main/java/com/linkedin/openhouse/tables/common/DefaultColumnPattern.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.tables.common; 2 | 3 | /** 4 | * ENUM for Date pattern associated with ColumnName in {@link 5 | * com.linkedin.openhouse.tables.api.spec.v0.request.components.RetentionColumnPattern} 6 | */ 7 | public enum DefaultColumnPattern { 8 | // default date pattern for datasets 9 | DAY("yyyy-MM-dd"), 10 | HOUR("yyyy-MM-dd-HH"); 11 | 12 | private final String pattern; 13 | 14 | DefaultColumnPattern(String pattern) { 15 | this.pattern = pattern; 16 | } 17 | 18 | public String getPattern() { 19 | return pattern; 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /services/tables/src/main/java/com/linkedin/openhouse/tables/common/ReplicationInterval.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.tables.common; 2 | 3 | import com.linkedin.openhouse.tables.api.spec.v0.request.components.ReplicationConfig; 4 | 5 | /** ENUM for default replication interval associated with Interval in {@link ReplicationConfig} */ 6 | public enum ReplicationInterval { 7 | // default interval to run replication jobs if no interval provided by user 8 | DEFAULT("1D"); 9 | 10 | private final String interval; 11 | 12 | ReplicationInterval(String interval) { 13 | this.interval = interval; 14 | } 15 | 16 | public String getInterval() { 17 | return interval; 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /services/tables/src/main/java/com/linkedin/openhouse/tables/common/TableType.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.tables.common; 2 | 3 | /** 4 | * Enum for table type: primary table is normal table, replica table is a read-only copy of a 5 | * primary table 6 | */ 7 | public enum TableType { 8 | PRIMARY_TABLE, 9 | REPLICA_TABLE 10 | } 11 | -------------------------------------------------------------------------------- /services/tables/src/main/java/com/linkedin/openhouse/tables/config/TablesMvcConstants.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.tables.config; 2 | 3 | public final class TablesMvcConstants { 4 | public static final String HTTP_HEADER_CLIENT_NAME = "X-Client-Name"; 5 | 6 | public static final String CLIENT_NAME_DEFAULT_VALUE = "unspecified"; 7 | public static final String METRIC_KEY_CLIENT_NAME = "client_name"; 8 | 9 | private TablesMvcConstants() { 10 | // Noop 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /services/tables/src/main/java/com/linkedin/openhouse/tables/config/TblPropsToggleRegistry.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.tables.config; 2 | 3 | import java.util.Optional; 4 | 5 | public interface TblPropsToggleRegistry { 6 | 7 | Optional obtainFeatureByKey(String key); 8 | } 9 | -------------------------------------------------------------------------------- /services/tables/src/main/java/com/linkedin/openhouse/tables/config/TblPropsToggleRegistryBaseImpl.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.tables.config; 2 | 3 | import java.util.HashMap; 4 | import java.util.Map; 5 | import java.util.Optional; 6 | import javax.annotation.PostConstruct; 7 | import org.springframework.stereotype.Component; 8 | 9 | /** A central registry for all table properties that are managed through Feature Toggle */ 10 | @Component 11 | public class TblPropsToggleRegistryBaseImpl implements TblPropsToggleRegistry { 12 | 13 | public static final String ENABLE_TBLTYPE = "enable_tabletype"; 14 | // TODO: Using these vocabularies as MySQL validation 15 | protected final Map featureKeys = new HashMap<>(); 16 | 17 | @PostConstruct 18 | public void initializeKeys() { 19 | // placeholders: demo purpose 20 | featureKeys.put("openhouse.tableType", ENABLE_TBLTYPE); 21 | } 22 | 23 | public Optional obtainFeatureByKey(String key) { 24 | return Optional.ofNullable(featureKeys.get(key)); 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /services/tables/src/main/java/com/linkedin/openhouse/tables/dto/mapper/TablesMapperHelper.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.tables.dto.mapper; 2 | 3 | import java.util.List; 4 | import java.util.stream.Collectors; 5 | import java.util.stream.StreamSupport; 6 | import org.apache.iceberg.SnapshotParser; 7 | import org.apache.iceberg.Table; 8 | 9 | /** 10 | * Utilities classes to simplify java code reference in {@link TablesMapper} with {@link 11 | * org.mapstruct.ap.shaded.freemarker.core.Expression}. 12 | */ 13 | public final class TablesMapperHelper { 14 | private TablesMapperHelper() { 15 | // noop for utilities class constructor 16 | } 17 | 18 | static List mapSnapshots(Table table) { 19 | return StreamSupport.stream(table.snapshots().spliterator(), false) 20 | .map(SnapshotParser::toJson) 21 | .collect(Collectors.toList()); 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /services/tables/src/main/java/com/linkedin/openhouse/tables/dto/mapper/attribute/PoliciesSpecConverter.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.tables.dto.mapper.attribute; 2 | 3 | import com.google.gson.Gson; 4 | import com.linkedin.openhouse.tables.api.spec.v0.request.components.Policies; 5 | import javax.persistence.AttributeConverter; 6 | import javax.persistence.Converter; 7 | 8 | @Converter 9 | public class PoliciesSpecConverter implements AttributeConverter { 10 | public String convertToDatabaseColumn(Policies attribute) { 11 | return (new Gson()).toJson(attribute); 12 | } 13 | 14 | @Override 15 | public Policies convertToEntityAttribute(String dbData) { 16 | return (new Gson()).fromJson(dbData, Policies.class); 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /services/tables/src/main/java/com/linkedin/openhouse/tables/dto/mapper/iceberg/TableTypeMapper.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.tables.dto.mapper.iceberg; 2 | 3 | import static com.linkedin.openhouse.internal.catalog.mapper.HouseTableSerdeUtils.*; 4 | 5 | import com.linkedin.openhouse.tables.common.TableType; 6 | import java.util.Map; 7 | import org.apache.iceberg.Table; 8 | import org.mapstruct.Mapper; 9 | import org.mapstruct.Named; 10 | 11 | /** 12 | * Mapper to convert tableType from {@link org.apache.iceberg.Table} properties to TableType ENUM 13 | */ 14 | @Mapper(componentModel = "spring") 15 | public class TableTypeMapper { 16 | 17 | @Named("toTableType") 18 | public TableType toTableType(Table table) { 19 | Map properties = table.properties(); 20 | if (properties.containsKey(getCanonicalFieldName("tableType"))) { 21 | return TableType.valueOf(properties.get(getCanonicalFieldName("tableType"))); 22 | } else { 23 | return null; 24 | } 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /services/tables/src/main/java/com/linkedin/openhouse/tables/model/DatabaseDto.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.tables.model; 2 | 3 | import javax.persistence.Entity; 4 | import javax.persistence.Id; 5 | import javax.persistence.IdClass; 6 | import lombok.AccessLevel; 7 | import lombok.AllArgsConstructor; 8 | import lombok.Builder; 9 | import lombok.EqualsAndHashCode; 10 | import lombok.Getter; 11 | import lombok.NoArgsConstructor; 12 | 13 | @Entity 14 | @IdClass(DatabaseDtoPrimaryKey.class) 15 | @Builder 16 | @Getter 17 | @EqualsAndHashCode 18 | @NoArgsConstructor(access = AccessLevel.PROTECTED) 19 | @AllArgsConstructor(access = AccessLevel.PROTECTED) 20 | public class DatabaseDto { 21 | @Id private String databaseId; 22 | 23 | private String clusterId; 24 | } 25 | -------------------------------------------------------------------------------- /services/tables/src/main/java/com/linkedin/openhouse/tables/model/DatabaseDtoPrimaryKey.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.tables.model; 2 | 3 | import java.io.Serializable; 4 | import lombok.AccessLevel; 5 | import lombok.AllArgsConstructor; 6 | import lombok.Builder; 7 | import lombok.EqualsAndHashCode; 8 | import lombok.Getter; 9 | import lombok.NoArgsConstructor; 10 | 11 | @Builder 12 | @EqualsAndHashCode 13 | @Getter 14 | @NoArgsConstructor 15 | @AllArgsConstructor(access = AccessLevel.PROTECTED) 16 | public class DatabaseDtoPrimaryKey implements Serializable { 17 | private String databaseId; 18 | } 19 | -------------------------------------------------------------------------------- /services/tables/src/main/java/com/linkedin/openhouse/tables/model/TableDtoPrimaryKey.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.tables.model; 2 | 3 | import java.io.Serializable; 4 | import lombok.AccessLevel; 5 | import lombok.AllArgsConstructor; 6 | import lombok.Builder; 7 | import lombok.EqualsAndHashCode; 8 | import lombok.Getter; 9 | import lombok.NoArgsConstructor; 10 | 11 | @Builder 12 | @EqualsAndHashCode 13 | @Getter 14 | @NoArgsConstructor 15 | @AllArgsConstructor(access = AccessLevel.PROTECTED) 16 | public class TableDtoPrimaryKey implements Serializable { 17 | private String tableId; 18 | 19 | private String databaseId; 20 | } 21 | -------------------------------------------------------------------------------- /services/tables/src/main/java/com/linkedin/openhouse/tables/repository/OpenHouseInternalRepository.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.tables.repository; 2 | 3 | import com.linkedin.openhouse.tables.model.TableDto; 4 | import com.linkedin.openhouse.tables.model.TableDtoPrimaryKey; 5 | import java.util.List; 6 | import org.springframework.data.repository.CrudRepository; 7 | import org.springframework.stereotype.Repository; 8 | 9 | /** 10 | * Base interface for repository backed by OpenHouseInternalCatalog for storing and retrieving 11 | * {@link TableDto} object. 12 | */ 13 | @Repository 14 | public interface OpenHouseInternalRepository extends CrudRepository { 15 | List findAllIds(); 16 | 17 | List searchTables(String databaseId); 18 | } 19 | -------------------------------------------------------------------------------- /services/tables/src/main/java/com/linkedin/openhouse/tables/repository/SchemaValidator.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.tables.repository; 2 | 3 | import com.linkedin.openhouse.common.exception.InvalidSchemaEvolutionException; 4 | import org.apache.iceberg.Schema; 5 | 6 | /** 7 | * Interface to define schema evolution rules. One should provide implementation of this interface 8 | * to customize schema evolution rules, e.g. enforcing no column-dropping. 9 | */ 10 | public interface SchemaValidator { 11 | void validateWriteSchema(Schema oldSchema, Schema newSchema, String tableUri) 12 | throws InvalidSchemaEvolutionException, IllegalArgumentException; 13 | } 14 | -------------------------------------------------------------------------------- /services/tables/src/main/java/com/linkedin/openhouse/tables/repository/impl/PreservedPropsToggleEnabler.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.tables.repository.impl; 2 | 3 | import java.lang.annotation.ElementType; 4 | import java.lang.annotation.Retention; 5 | import java.lang.annotation.RetentionPolicy; 6 | import java.lang.annotation.Target; 7 | 8 | /** 9 | * Annotation for the method with specific signature (see {@link 10 | * com.linkedin.openhouse.tables.toggle.FeatureToggleAspect} which controls if a table property is 11 | * considered preserved (read-only for users). 12 | * 13 | *

Usage of this annotation should be very specific to allow default-preserved keys to be 14 | * non-preserved. Do not use this annotation, e.g., on a denyList-like semantic like 15 | * "disablePoliciesForTable". 16 | * 17 | *

This annotation should be considered as syntactical sugar to avoid contaminating main body of 18 | * service implementation. 19 | */ 20 | @Retention(RetentionPolicy.RUNTIME) 21 | @Target(ElementType.METHOD) 22 | public @interface PreservedPropsToggleEnabler {} 23 | -------------------------------------------------------------------------------- /services/tables/src/main/java/com/linkedin/openhouse/tables/repository/impl/Timed.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.tables.repository.impl; 2 | 3 | import java.lang.annotation.ElementType; 4 | import java.lang.annotation.Retention; 5 | import java.lang.annotation.RetentionPolicy; 6 | import java.lang.annotation.Target; 7 | 8 | @Retention(RetentionPolicy.RUNTIME) 9 | @Target(ElementType.METHOD) 10 | public @interface Timed { 11 | String metricKey(); 12 | } 13 | -------------------------------------------------------------------------------- /services/tables/src/main/java/com/linkedin/openhouse/tables/services/IcebergSnapshotsService.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.tables.services; 2 | 3 | import com.linkedin.openhouse.tables.api.spec.v0.request.IcebergSnapshotsRequestBody; 4 | import com.linkedin.openhouse.tables.model.TableDto; 5 | import org.springframework.data.util.Pair; 6 | 7 | /** Service layer for loading Iceberg {@link org.apache.iceberg.Snapshot} provided by client. */ 8 | public interface IcebergSnapshotsService { 9 | 10 | /** @return pair of {@link TableDto} object and flag if the table was created. */ 11 | Pair putIcebergSnapshots( 12 | String databaseId, 13 | String tableId, 14 | IcebergSnapshotsRequestBody icebergSnapshotRequestBody, 15 | String tableCreator); 16 | } 17 | -------------------------------------------------------------------------------- /services/tables/src/main/java/com/linkedin/openhouse/tables/toggle/TableFeatureToggle.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.tables.toggle; 2 | 3 | /** Interface to check if a feature is toggled-on for a table */ 4 | public interface TableFeatureToggle { 5 | /** 6 | * Determine if given feature is activated for the table. 7 | * 8 | * @param databaseId databaseId 9 | * @param tableId tableId 10 | * @param featureId featureId 11 | * @return True if the feature is activated for the table. 12 | */ 13 | boolean isFeatureActivated(String databaseId, String tableId, String featureId); 14 | } 15 | -------------------------------------------------------------------------------- /services/tables/src/main/java/com/linkedin/openhouse/tables/toggle/ToggleStatusMapper.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.tables.toggle; 2 | 3 | import com.linkedin.openhouse.housetables.client.model.ToggleStatus; 4 | import com.linkedin.openhouse.tables.toggle.model.TableToggleStatus; 5 | import com.linkedin.openhouse.tables.toggle.model.ToggleStatusKey; 6 | import org.mapstruct.Mapper; 7 | import org.mapstruct.Mapping; 8 | 9 | @Mapper(componentModel = "spring") 10 | public interface ToggleStatusMapper { 11 | 12 | @Mapping(source = "toggleStatus.status", target = "toggleStatusEnum") 13 | TableToggleStatus toTableToggleStatus(ToggleStatusKey key, ToggleStatus toggleStatus); 14 | } 15 | -------------------------------------------------------------------------------- /services/tables/src/main/java/com/linkedin/openhouse/tables/toggle/model/TableToggleStatus.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.tables.toggle.model; 2 | 3 | import com.linkedin.openhouse.housetables.client.model.ToggleStatus; 4 | import javax.persistence.Entity; 5 | import javax.persistence.Id; 6 | import javax.persistence.IdClass; 7 | import lombok.AccessLevel; 8 | import lombok.AllArgsConstructor; 9 | import lombok.Builder; 10 | import lombok.EqualsAndHashCode; 11 | import lombok.Getter; 12 | import lombok.NoArgsConstructor; 13 | 14 | /** Data Model for persisting Feature-Toggle Rule in the HTS-Repository. */ 15 | @Entity 16 | @IdClass(ToggleStatusKey.class) 17 | @Builder(toBuilder = true) 18 | @Getter 19 | @EqualsAndHashCode 20 | @NoArgsConstructor(access = AccessLevel.PROTECTED) 21 | @AllArgsConstructor(access = AccessLevel.PROTECTED) 22 | public class TableToggleStatus { 23 | 24 | @Id private String featureId; 25 | 26 | @Id private String tableId; 27 | 28 | @Id private String databaseId; 29 | 30 | private ToggleStatus.StatusEnum toggleStatusEnum; 31 | } 32 | -------------------------------------------------------------------------------- /services/tables/src/main/java/com/linkedin/openhouse/tables/toggle/model/ToggleStatusKey.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.tables.toggle.model; 2 | 3 | import java.io.Serializable; 4 | import lombok.AccessLevel; 5 | import lombok.AllArgsConstructor; 6 | import lombok.Builder; 7 | import lombok.EqualsAndHashCode; 8 | import lombok.Getter; 9 | import lombok.NoArgsConstructor; 10 | 11 | @Builder 12 | @EqualsAndHashCode 13 | @Getter 14 | @NoArgsConstructor 15 | @AllArgsConstructor(access = AccessLevel.PROTECTED) 16 | public class ToggleStatusKey implements Serializable { 17 | private String featureId; 18 | 19 | private String tableId; 20 | 21 | private String databaseId; 22 | } 23 | -------------------------------------------------------------------------------- /services/tables/src/main/java/com/linkedin/openhouse/tables/toggle/repository/ToggleStatusesRepository.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.tables.toggle.repository; 2 | 3 | import com.linkedin.openhouse.tables.toggle.model.TableToggleStatus; 4 | import com.linkedin.openhouse.tables.toggle.model.ToggleStatusKey; 5 | import org.springframework.data.repository.CrudRepository; 6 | import org.springframework.stereotype.Repository; 7 | 8 | @Repository 9 | public interface ToggleStatusesRepository 10 | extends CrudRepository {} 11 | -------------------------------------------------------------------------------- /services/tables/src/main/resources/static/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linkedin/openhouse/7e9fa9b3e37d5ada57a70bf46dcd975def8ac47b/services/tables/src/main/resources/static/favicon.ico -------------------------------------------------------------------------------- /services/tables/src/test/java/com/linkedin/openhouse/tables/e2e/h2/ToggleH2StatusesRepository.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.tables.e2e.h2; 2 | 3 | import com.linkedin.openhouse.tables.toggle.repository.ToggleStatusesRepository; 4 | import org.springframework.context.annotation.Primary; 5 | import org.springframework.stereotype.Repository; 6 | 7 | /** 8 | * The {@link org.springframework.context.annotation.Bean} injected into /tables e2e tests when 9 | * communication to the implementation of {@link ToggleStatusesRepository} is not needed. With 10 | * {@link Primary} annotation, this repository will be the default injection. 11 | */ 12 | @Repository 13 | @Primary 14 | public interface ToggleH2StatusesRepository extends ToggleStatusesRepository {} 15 | -------------------------------------------------------------------------------- /services/tables/src/test/java/com/linkedin/openhouse/tables/mock/authorization/TestPrivileges.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.tables.mock.authorization; 2 | 3 | import static org.junit.jupiter.api.Assertions.*; 4 | 5 | import com.linkedin.openhouse.tables.authorization.Privileges; 6 | import org.junit.jupiter.api.Test; 7 | 8 | public class TestPrivileges { 9 | @Test 10 | public void validateValidPrivileges() { 11 | assertTrue(Privileges.Privilege.isSupportedPrivilege("CREATE_TABLE")); 12 | assertFalse(Privileges.Privilege.isSupportedPrivilege("SELECT_TABLE")); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /services/tables/src/test/java/com/linkedin/openhouse/tables/mock/controller/MockUnauthenticatedUser.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.tables.mock.controller; 2 | 3 | import java.lang.annotation.Documented; 4 | import java.lang.annotation.ElementType; 5 | import java.lang.annotation.Inherited; 6 | import java.lang.annotation.Retention; 7 | import java.lang.annotation.RetentionPolicy; 8 | import java.lang.annotation.Target; 9 | import org.springframework.security.test.context.support.WithSecurityContext; 10 | 11 | @Target({ElementType.METHOD, ElementType.TYPE}) 12 | @Retention(RetentionPolicy.RUNTIME) 13 | @Inherited 14 | @Documented 15 | @WithSecurityContext(factory = MockUnauthenticatedSecurityContextFactory.class) 16 | public @interface MockUnauthenticatedUser { 17 | boolean isAuthenticated() default false; 18 | } 19 | -------------------------------------------------------------------------------- /services/tables/src/test/java/com/linkedin/openhouse/tables/mock/properties/AuthorizationPropertiesInitializer.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.tables.mock.properties; 2 | 3 | import org.springframework.context.ApplicationContextInitializer; 4 | import org.springframework.context.ConfigurableApplicationContext; 5 | import org.springframework.test.context.support.TestPropertySourceUtils; 6 | 7 | /** Enable authorization for some Controller unit-tests. */ 8 | public class AuthorizationPropertiesInitializer 9 | implements ApplicationContextInitializer { 10 | @Override 11 | public void initialize(ConfigurableApplicationContext applicationContext) { 12 | TestPropertySourceUtils.addInlinedPropertiesToEnvironment( 13 | applicationContext, "cluster.security.tables.authorization.enabled=true"); 14 | TestPropertySourceUtils.addInlinedPropertiesToEnvironment( 15 | applicationContext, 16 | "cluster.security.token.interceptor.classname=com.linkedin.openhouse.common.security.DummyTokenInterceptor"); 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /services/tables/src/test/java/com/linkedin/openhouse/tables/mock/properties/AuthorizationPropertiesTest.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.tables.mock.properties; 2 | 3 | import com.linkedin.openhouse.cluster.configs.ClusterProperties; 4 | import org.junit.jupiter.api.Assertions; 5 | import org.junit.jupiter.api.Test; 6 | import org.springframework.beans.factory.annotation.Autowired; 7 | import org.springframework.boot.test.context.SpringBootTest; 8 | import org.springframework.test.context.ContextConfiguration; 9 | 10 | @SpringBootTest 11 | @ContextConfiguration(initializers = AuthorizationPropertiesInitializer.class) 12 | public class AuthorizationPropertiesTest { 13 | 14 | @Autowired private ClusterProperties clusterProperties; 15 | 16 | @Test 17 | public void testAuthPropertyPresent() { 18 | Assertions.assertEquals( 19 | "com.linkedin.openhouse.common.security.DummyTokenInterceptor", 20 | clusterProperties.getClusterSecurityTokenInterceptorClassname()); 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /services/tables/src/test/java/com/linkedin/openhouse/tables/mock/properties/CustomClusterPropertiesInitializer.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.tables.mock.properties; 2 | 3 | import java.io.File; 4 | import java.io.FileNotFoundException; 5 | import org.springframework.context.ApplicationContextInitializer; 6 | import org.springframework.context.ConfigurableApplicationContext; 7 | import org.springframework.util.ResourceUtils; 8 | 9 | public class CustomClusterPropertiesInitializer 10 | implements ApplicationContextInitializer { 11 | @Override 12 | public void initialize(ConfigurableApplicationContext applicationContext) { 13 | File file = null; 14 | try { 15 | file = 16 | ResourceUtils.getFile( 17 | CustomClusterPropertiesTest.class.getResource("/cluster-test-properties.yaml")); 18 | } catch (FileNotFoundException e) { 19 | e.printStackTrace(); 20 | } 21 | System.setProperty("OPENHOUSE_CLUSTER_CONFIG_PATH", file.getAbsolutePath()); 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /services/tables/src/test/java/com/linkedin/openhouse/tables/mock/properties/DefaultClusterPropertiesInitializer.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.tables.mock.properties; 2 | 3 | import org.springframework.context.ApplicationContextInitializer; 4 | import org.springframework.context.ConfigurableApplicationContext; 5 | 6 | public class DefaultClusterPropertiesInitializer 7 | implements ApplicationContextInitializer { 8 | 9 | @Override 10 | public void initialize(ConfigurableApplicationContext applicationContext) { 11 | System.setProperty("OPENHOUSE_CLUSTER_CONFIG_PATH", ""); 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /services/tables/src/test/java/com/linkedin/openhouse/tables/mock/properties/DefaultClusterPropertiesTest.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.tables.mock.properties; 2 | 3 | import com.linkedin.openhouse.cluster.configs.ClusterProperties; 4 | import org.junit.jupiter.api.AfterAll; 5 | import org.junit.jupiter.api.Assertions; 6 | import org.junit.jupiter.api.Test; 7 | import org.springframework.beans.factory.annotation.Autowired; 8 | import org.springframework.boot.test.context.SpringBootTest; 9 | import org.springframework.test.context.ContextConfiguration; 10 | 11 | @SpringBootTest 12 | @ContextConfiguration(initializers = DefaultClusterPropertiesInitializer.class) 13 | public class DefaultClusterPropertiesTest { 14 | 15 | @Autowired private ClusterProperties clusterProperties; 16 | 17 | @Test 18 | public void testClusterProperties() { 19 | Assertions.assertEquals("local-cluster", clusterProperties.getClusterName()); 20 | } 21 | 22 | @AfterAll 23 | static void unsetSysProp() { 24 | System.clearProperty("OPENHOUSE_CLUSTER_CONFIG_PATH"); 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /services/tables/src/test/java/com/linkedin/openhouse/tables/mock/properties/InvalidHandlerInterceptorResource.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.tables.mock.properties; 2 | 3 | public class InvalidHandlerInterceptorResource {} 4 | -------------------------------------------------------------------------------- /services/tables/src/test/java/com/linkedin/openhouse/tables/repository/impl/SettableInternalRepositoryForTest.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.tables.repository.impl; 2 | 3 | import org.apache.iceberg.catalog.Catalog; 4 | 5 | public class SettableInternalRepositoryForTest extends OpenHouseInternalRepositoryImpl { 6 | 7 | public void setCatalog(Catalog catalog) { 8 | this.catalog = catalog; 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /services/tables/src/test/java/com/linkedin/openhouse/tables/settable/SettableCatalogForTest.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.tables.settable; 2 | 3 | import com.linkedin.openhouse.internal.catalog.OpenHouseInternalCatalog; 4 | import com.linkedin.openhouse.internal.catalog.OpenHouseInternalTableOperations; 5 | import org.apache.iceberg.TableOperations; 6 | import org.apache.iceberg.catalog.TableIdentifier; 7 | import org.springframework.stereotype.Component; 8 | 9 | @Component 10 | public class SettableCatalogForTest extends OpenHouseInternalCatalog { 11 | private OpenHouseInternalTableOperations ops; 12 | 13 | public void setOperation(OpenHouseInternalTableOperations ops) { 14 | this.ops = ops; 15 | } 16 | 17 | @Override 18 | protected TableOperations newTableOps(TableIdentifier tableIdentifier) { 19 | return ops; 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /services/tables/src/test/java/com/linkedin/openhouse/tables/settable/SettableTestConfig.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.tables.settable; 2 | 3 | import com.linkedin.openhouse.tables.repository.OpenHouseInternalRepository; 4 | import com.linkedin.openhouse.tables.repository.impl.SettableInternalRepositoryForTest; 5 | import org.apache.iceberg.catalog.Catalog; 6 | import org.springframework.boot.test.context.TestConfiguration; 7 | import org.springframework.context.annotation.Bean; 8 | import org.springframework.context.annotation.Primary; 9 | 10 | @TestConfiguration 11 | public class SettableTestConfig { 12 | @Primary 13 | @Bean("SettableCatalog") 14 | public Catalog provideTestCatalog() { 15 | return new SettableCatalogForTest(); 16 | } 17 | 18 | @Primary 19 | @Bean("SettableInternalRepo") 20 | public OpenHouseInternalRepository provideTestInternalRepo() { 21 | return new SettableInternalRepositoryForTest(); 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /services/tables/src/test/resources/cluster-test-properties.yaml: -------------------------------------------------------------------------------- 1 | cluster: 2 | name: "TestCluster" 3 | storage: 4 | root-path: "/tmp/unittest" 5 | storages: 6 | default-type: "hdfs" 7 | types: 8 | hdfs: 9 | rootpath: "/tmp/unittest" 10 | endpoint: "file:///" 11 | parameters: 12 | key1: value1 13 | local: 14 | rootpath: "/tmp/unittest" 15 | endpoint: "file:///" 16 | storage-selector: 17 | name: "RegexStorageSelector" 18 | parameters: 19 | regex: local_db\.[a-zA-Z0-9_]+$ 20 | storage-type: local 21 | housetables: 22 | base-uri: "http://localhost:8080" 23 | tables: 24 | allowed-client-name-values: trino,spark 25 | security: 26 | tables: 27 | authorizations: 28 | opa: 29 | base-uri: "http://localhost:8181" -------------------------------------------------------------------------------- /services/tables/src/test/resources/dummy_healthy_schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "struct", 3 | "fields": [ 4 | { 5 | "id": 1, 6 | "required": true, 7 | "name": "id", 8 | "type": "string" 9 | }, 10 | { 11 | "id": 2, 12 | "required": true, 13 | "name": "name", 14 | "type": "string" 15 | }, 16 | { 17 | "id": 3, 18 | "required": true, 19 | "name":"timestampCol", 20 | "type": "timestamp" 21 | }, 22 | { 23 | "id": 4, 24 | "required": true, 25 | "name": "stats", 26 | "type": "double" 27 | }, 28 | { 29 | "id": 5, 30 | "required": true, 31 | "name": "count", 32 | "type": "int" 33 | }, 34 | { 35 | "id": 6, 36 | "required": true, 37 | "name": "timeLong", 38 | "type": "long" 39 | } 40 | ] 41 | } -------------------------------------------------------------------------------- /services/tables/src/test/resources/dummy_snapshot_serialized.json: -------------------------------------------------------------------------------- 1 | { 2 | "snapshot-id" : 6509092076245313362, 3 | "timestamp-ms" : 1661463827467, 4 | "summary" : { 5 | "operation" : "append", 6 | "added-data-files" : "1", 7 | "added-records" : "1", 8 | "added-files-size" : "10", 9 | "changed-partition-count" : "1", 10 | "total-records" : "1", 11 | "total-files-size" : "10", 12 | "total-data-files" : "1", 13 | "total-delete-files" : "0", 14 | "total-position-deletes" : "0", 15 | "total-equality-deletes" : "0" 16 | }, 17 | "manifest-list" : "/var/folders/cr/k9w0lym10tqcw0r_lcn7kln4000mp0/T/unittest4119925226031407025/d1/ebd6e2f0-4c92-4a23-b0a7-9f36d1e407f9/metadata/snap-6509092076245313362-1-5e1367d2-a1a7-46ac-bfe9-c92169e92e75.avro", 18 | "schema-id" : 0 19 | } -------------------------------------------------------------------------------- /services/tables/src/test/resources/dummy_unhealthy_cluster_schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "struct", 3 | "fields": [ 4 | { 5 | "id": 1, 6 | "required": true, 7 | "name": "id", 8 | "type": "string" 9 | }, 10 | { 11 | "id": 2, 12 | "required": true, 13 | "name": "name", 14 | "type": "string" 15 | }, 16 | { 17 | "id": 3, 18 | "required": true, 19 | "name":"timestampCol", 20 | "type": "timestamp" 21 | }, 22 | { 23 | "id": 4, 24 | "required": true, 25 | "name": "stats", 26 | "type": "double" 27 | }, 28 | { 29 | "id": 5, 30 | "required": true, 31 | "name": "country", 32 | "type": "string" 33 | }, 34 | { 35 | "id": 6, 36 | "required": true, 37 | "name":"city", 38 | "type": "string" 39 | } 40 | ] 41 | } -------------------------------------------------------------------------------- /services/tables/src/test/resources/evolved_dummy_healthy_schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "struct", 3 | "fields": [ 4 | { 5 | "id": 1, 6 | "required": true, 7 | "name": "id", 8 | "type": "string" 9 | }, 10 | { 11 | "id": 2, 12 | "required": true, 13 | "name": "name", 14 | "type": "string" 15 | }, 16 | { 17 | "id": 3, 18 | "required": true, 19 | "name": "timestampCol", 20 | "type": "timestamp" 21 | }, 22 | { 23 | "id": 4, 24 | "required": true, 25 | "name": "stats", 26 | "type": "double" 27 | }, 28 | { 29 | "id": 5, 30 | "required": true, 31 | "name": "count", 32 | "type": "int" 33 | }, 34 | { 35 | "id": 6, 36 | "required": true, 37 | "name": "timeLong", 38 | "type": "long" 39 | }, 40 | { 41 | "id": 7, 42 | "required": false, 43 | "name": "favorite_number", 44 | "type": "int", 45 | "initial-default": null 46 | } 47 | ] 48 | } -------------------------------------------------------------------------------- /services/tables/src/test/resources/evolved_dummy_healthy_schema_reorder.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "struct", 3 | "fields": [ 4 | { 5 | "id": 1, 6 | "required": true, 7 | "name": "name", 8 | "type": "string" 9 | }, 10 | { 11 | "id": 2, 12 | "required": true, 13 | "name": "id", 14 | "type": "string" 15 | }, 16 | { 17 | "id": 3, 18 | "required": true, 19 | "name": "timestampCol", 20 | "type": "timestamp" 21 | } 22 | ] 23 | } -------------------------------------------------------------------------------- /services/tables/src/test/resources/field_update/base.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "struct", 3 | "fields": [ 4 | { 5 | "id": 1, 6 | "required": true, 7 | "name": "id", 8 | "type": "string" 9 | }, 10 | { 11 | "id": 2, 12 | "required": true, 13 | "name": "name", 14 | "type": "string" 15 | }, 16 | { 17 | "id": 3, 18 | "required": true, 19 | "name": "timestampCol", 20 | "type": "timestamp" 21 | }, 22 | { 23 | "id": 4, 24 | "required": true, 25 | "name": "stats", 26 | "type": "int" 27 | } 28 | ] 29 | } -------------------------------------------------------------------------------- /services/tables/src/test/resources/field_update/base_int2long.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "struct", 3 | "fields": [ 4 | { 5 | "id": 1, 6 | "required": true, 7 | "name": "id", 8 | "type": "string" 9 | }, 10 | { 11 | "id": 2, 12 | "required": true, 13 | "name": "name", 14 | "type": "string" 15 | }, 16 | { 17 | "id": 3, 18 | "required": true, 19 | "name": "timestampCol", 20 | "type": "timestamp" 21 | }, 22 | { 23 | "id": 4, 24 | "required": true, 25 | "name": "stats", 26 | "type": "long" 27 | } 28 | ] 29 | } -------------------------------------------------------------------------------- /services/tables/src/test/resources/invalid_type_promote.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "struct", 3 | "fields": [ 4 | { 5 | "id": 1, 6 | "required": true, 7 | "name": "id", 8 | "type": "string" 9 | }, 10 | { 11 | "id": 2, 12 | "required": true, 13 | "name": "name", 14 | "type": "string" 15 | }, 16 | { 17 | "id": 3, 18 | "required": true, 19 | "name": "timestampCol", 20 | "type": "timestamp" 21 | }, 22 | { 23 | "id": 4, 24 | "required": true, 25 | "name": "stats", 26 | "type": "long" 27 | }, 28 | { 29 | "id": 5, 30 | "required": true, 31 | "name": "count", 32 | "type": "int" 33 | } 34 | ] 35 | } -------------------------------------------------------------------------------- /tables-test-fixtures/tables-test-fixtures-iceberg-1.2/src/main/java/com/linkedin/openhouse/tablestest/ToggleH2StatusesRepository.java: -------------------------------------------------------------------------------- 1 | package com.linkedin.openhouse.tablestest; 2 | 3 | import com.linkedin.openhouse.tables.toggle.repository.ToggleStatusesRepository; 4 | import org.springframework.context.annotation.Primary; 5 | import org.springframework.stereotype.Repository; 6 | 7 | /** Repository based on in-memory storage require to ensure test suites works as expected. */ 8 | @Repository 9 | @Primary 10 | public interface ToggleH2StatusesRepository extends ToggleStatusesRepository {} 11 | -------------------------------------------------------------------------------- /tables-test-fixtures/tables-test-fixtures-iceberg-1.2/src/main/resources/dummy.token: -------------------------------------------------------------------------------- 1 | eyJhbGciOiJIUzI1NiJ9.eyJpYXQiOjE2NTk2ODI4MDAsImp0aSI6IkRVTU1ZX0FOT05ZTU9VU19VU0VSZGJjNDk3MTMtMzM5ZC00Y2ZkLTkwMDgtZDY4NzlhZDQwZjE2Iiwic3ViIjoie1wiQ09ERVwiOlwiRFVNTVlfQ09ERVwiLFwiVVNFUi1JRFwiOlwiRFVNTVlfQU5PTllNT1VTX1VTRVJcIn0ifQ.W2WVBrMacFrXS8Xa29k_V_yD0yca2nEet5mSYq27Ayo --------------------------------------------------------------------------------