├── .gitignore ├── .repo └── pom.template.xml ├── README.md ├── binlog2delta ├── .DS_Store ├── .repo │ ├── desc.template.plugin │ └── pom.template.xml ├── README.md ├── desc.plugin ├── pom.xml └── src │ ├── .DS_Store │ └── main │ ├── .DS_Store │ ├── java │ ├── .DS_Store │ └── tech │ │ └── mlsql │ │ └── plugins │ │ └── binlog2delta │ │ └── JavaDoc.java │ └── resources │ ├── main.mlsql │ └── plugin.json ├── connect-persist ├── .repo │ ├── desc.template.plugin │ └── pom.template.xml ├── README.md ├── db.sql ├── desc.plugin ├── pom.xml └── src │ └── main │ └── java │ └── tech │ └── mlsql │ └── plugins │ ├── et │ └── ConnectPersistCommand.scala │ └── mllib │ └── ConnectPersistApp.scala ├── delta-enhancer ├── .repo │ └── pom.template.xml ├── README.md ├── desc.plugin ├── pom.xml └── src │ └── main │ └── java │ └── tech │ └── mlsql │ └── plugin │ └── et │ └── DeltaCommand.scala ├── desktop-publish.sh ├── dev ├── change-scala-version.sh ├── change-version-to-2.11.sh └── change-version-to-2.12.sh ├── ds-hbase-2x ├── .repo │ ├── desc.template.plugin │ └── pom.template.xml ├── README.md ├── desc.plugin ├── pom.xml └── src │ └── main │ └── java │ ├── org │ └── apache │ │ └── spark │ │ └── sql │ │ └── execution │ │ └── datasources │ │ └── hbase2x │ │ ├── DefaultSource.scala │ │ ├── HBaseConfBuilder.scala │ │ ├── HBaseType.scala │ │ ├── JavaDoc.java │ │ └── SparkHBaseConf.scala │ └── tech │ └── mlsql │ └── plugins │ └── ds │ └── MLSQLHBase2x.scala ├── echo-controller ├── .repo │ └── pom.template.xml ├── README.md ├── desc.plugin ├── pom.xml └── src │ └── main │ └── java │ └── tech │ └── mlsql │ └── plugins │ └── mllib │ └── echocontroller │ └── StreamApp.scala ├── install-all.sh ├── install.sh ├── last-command ├── .repo │ └── pom.template.xml ├── README.md ├── desc.plugin ├── pom.xml └── src │ └── main │ └── java │ └── tech │ └── mlsql │ └── plugins │ └── et │ └── LastCommand.scala ├── mlsql-analysis-toolkit ├── .repo │ └── pom.template.xml ├── README.md ├── desc.plugin ├── pom.xml └── src │ └── main │ └── java │ └── tech │ └── mlsql │ └── plugins │ └── analysis │ ├── AnalysisApp.scala │ ├── ApproxQuantile.scala │ └── DFTool.scala ├── mlsql-assert ├── .repo │ ├── desc.template.plugin │ └── pom.template.xml ├── README.md ├── desc.plugin ├── pom.xml └── src │ └── main │ └── java │ └── tech │ └── mlsql │ └── plugins │ └── assert │ ├── app │ └── MLSQLAssert.scala │ └── ets │ ├── Assert.scala │ └── MLSQLThrow.scala ├── mlsql-bigdl ├── .repo │ ├── desc.template.plugin │ └── pom.template.xml ├── README.md ├── desc.plugin ├── pom.xml └── src │ └── main │ └── java │ ├── com │ └── intel │ │ └── analytics │ │ └── bigdl │ │ └── visualization │ │ ├── LogTrainSummary.scala │ │ └── WowFileWriter.scala │ └── tech │ └── mlsql │ └── plugins │ └── bigdl │ ├── BigDLApp.scala │ ├── BigDLFunctions.scala │ ├── OptimizeParamExtractor.scala │ ├── SQLBigDLClassifyExt.scala │ ├── SQLImageLoaderExt.scala │ ├── SQLLeNet5Ext.scala │ ├── SQLMnistLoaderExt.scala │ ├── WowClassNLLCriterion.scala │ └── WowLoggerFilter.scala ├── mlsql-canal ├── .repo │ ├── desc.template.plugin │ └── pom.template.xml ├── README.md ├── desc.plugin ├── pom.xml └── src │ └── main │ └── scala │ └── tech │ └── mlsql │ └── plugins │ └── canal │ ├── CanalApp.scala │ ├── ets │ └── BinlogToDelta.scala │ ├── mysql │ ├── JdbcTypeParser.scala │ ├── MysqlType.java │ └── statement │ │ └── DDLStatementParser.scala │ ├── sink │ ├── BinlogConstants.scala │ ├── BinlogWritter.scala │ ├── DeltaSink.scala │ └── Sink.scala │ └── util │ └── JacksonUtil.scala ├── mlsql-cli ├── .repo │ ├── desc.template.plugin │ └── pom.template.xml ├── desc.plugin ├── pom.xml └── src │ └── main │ └── java │ └── tech │ └── mlsql │ └── plugin │ └── cli │ └── app │ ├── CliCommands.java │ ├── CliException.java │ ├── CliExceptionUtils.java │ ├── MLSQLCli.java │ └── MLSQLCmd.java ├── mlsql-cube ├── .repo │ ├── desc.template.plugin │ └── pom.template.xml ├── desc.plugin └── pom.xml ├── mlsql-ds ├── .repo │ ├── desc.template.plugin │ └── pom.template.xml ├── desc.plugin ├── pom.xml └── src │ └── main │ └── java │ └── tech │ └── mlsql │ └── plugins │ └── ds │ └── app │ ├── MLSQLDs.scala │ └── MLSQLXml.scala ├── mlsql-excel ├── .repo │ ├── desc.template.plugin │ └── pom.template.xml ├── README.md ├── desc.plugin ├── pom.xml └── src │ └── main │ └── java │ ├── com │ └── crealytics │ │ └── spark │ │ └── excel │ │ ├── DataColumn.scala │ │ ├── DataLocator.scala │ │ ├── DefaultSource.scala │ │ ├── DefaultSource15.scala │ │ ├── ExcelFileSaver.scala │ │ ├── ExcelRelation.scala │ │ ├── InferSchema.scala │ │ ├── PlainNumberFormat.scala │ │ ├── Utils.scala │ │ ├── WorkbookReader.scala │ │ └── package.scala │ └── tech │ └── mlsql │ └── plugins │ └── ds │ └── MLSQLExcel.scala ├── mlsql-ext-ets ├── .repo │ ├── desc.template.plugin │ └── pom.template.xml ├── desc.plugin ├── pom.xml └── src │ └── main │ └── java │ └── tech │ └── mlsql │ └── plugins │ └── ext │ └── ets │ └── app │ └── MLSQLETApp.scala ├── mlsql-ke ├── .repo │ ├── desc.template.plugin │ └── pom.template.xml ├── desc.plugin ├── pom.xml └── src │ └── main │ └── java │ └── tech │ └── mlsql │ └── plugins │ └── ke │ ├── app │ └── MLSQLKE.scala │ └── ets │ ├── KEAPISchedule.scala │ ├── KEAutoModel.scala │ └── KEBuildSegment.scala ├── mlsql-language-server ├── .repo │ ├── desc.template.plugin │ └── pom.template.xml ├── build.sh ├── desc.plugin ├── pom.xml └── src │ └── main │ └── java │ └── tech │ └── mlsql │ └── plugins │ └── langserver │ ├── AutoSuggestWrapper.scala │ ├── FileTracker.java │ ├── LSContext.java │ ├── MLSQLDocumentService.java │ ├── MLSQLLanguageServer.java │ ├── MLSQLWorkspaceService.java │ ├── commons │ └── client │ │ ├── Message.java │ │ └── TraceRecord.java │ └── launchers │ └── stdio │ ├── Launcher.java │ └── MLSQLDesktopApp.scala ├── mlsql-mllib ├── .repo │ ├── desc.template.plugin │ └── pom.template.xml ├── README.md ├── desc.plugin ├── pom.xml └── src │ └── main │ └── java │ └── tech │ └── mlsql │ └── plugins │ └── mllib │ ├── app │ └── MLSQLMllib.scala │ └── ets │ ├── AutoMLExt.scala │ ├── ClassificationEvaluator.scala │ ├── ColumnsExt.scala │ ├── PluginBaseETAuth.scala │ ├── RegressionEvaluator.scala │ ├── SampleDatasetExt.scala │ └── TakeRandomSampleExt.scala ├── mlsql-shell ├── .repo │ ├── desc.template.plugin │ └── pom.template.xml ├── README.md ├── desc.plugin ├── pom.xml └── src │ └── main │ └── java │ └── tech │ └── mlsql │ └── plugins │ └── shell │ ├── app │ └── MLSQLShell.scala │ └── ets │ ├── CopyFromLocal.scala │ └── ShellExecute.scala ├── pom.xml ├── run-script ├── .repo │ └── pom.template.xml ├── README.md ├── desc.plugin ├── pom.xml └── src │ └── main │ └── java │ └── tech │ └── mlsql │ └── plugins │ └── et │ └── RunScript.scala ├── save-then-load ├── .repo │ └── pom.template.xml ├── README.md ├── desc.plugin ├── pom.xml └── src │ └── main │ └── java │ └── tech │ └── mlsql │ └── plugins │ └── et │ └── SaveThenLoad.scala ├── stream-persist ├── .repo │ ├── desc.template.plugin │ └── pom.template.xml ├── README.md ├── db.sql ├── desc.plugin ├── pom.xml └── src │ └── main │ └── java │ └── tech │ └── mlsq │ └── streambootstrapatstartup │ ├── StreamApp.scala │ └── StreamPersistCommand.scala └── table-repartition ├── .repo ├── desc.template.plugin └── pom.template.xml ├── README.md ├── desc.plugin ├── pom.xml └── src └── main └── java └── tech └── mlsql └── plugins └── et └── TableRepartition.scala /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | *.iml 3 | target 4 | .DS_Store 5 | /**/build 6 | 7 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # mlsql-plugins 2 | 3 | This project is a collection of plugins for MLSQL. 4 | Please check every module in project for more detail. 5 | 6 | ## Build Shade Jar 7 | 8 | Requirements: 9 | 10 | 1. Python >= 3.6 11 | 2. Maven >= 3.0 12 | 13 | You can install [mlsql_plugin_tool](https://github.com/allwefantasy/mlsql_plugin_tool) to build module in this project. 14 | 15 | Install command: 16 | 17 | ``` 18 | pip install mlsql_plugin_tool 19 | ``` 20 | 21 | Build shard jar comamnd: 22 | 23 | ``` 24 | mlsql_plugin_tool build --module_name xxxxx --spark spark243 25 | ``` 26 | 27 | 1. spark: two options are avaiable, spark243, spark311 28 | 2. module_name e.g mlsql-excel, ds-hbase-2x 29 | 30 | Once build success, the system will show message like fowllowing: 31 | 32 | ``` 33 | 34 | ====Build success!===== 35 | File location 0: 36 | /Users/allwefantasy/Volumes/Samsung_T5/allwefantasy/CSDNWorkSpace/mlsqlplugins/ds-hbase-2x/target/ds-hbase-2x-2.4_2.11-0.1.0-SNAPSHOT.jar 37 | 38 | ``` 39 | 40 | Then you can install this plugin(jar file) in [MLSQL Engine](https://docs.mlsql.tech/mlsql-stack/plugin/offline_install.html) 41 | 42 | ## Plugins which Both Support Spark 2.4.3/3.1.1 43 | 44 | 1. binlog2delta 45 | 2. connect-persist 46 | 3. ds-hbase-2x 47 | 4. mlsql-bigdl 48 | 5. mlsql-excel 49 | 6. stream-persist 50 | 7. mlsql-mllib -------------------------------------------------------------------------------- /binlog2delta/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allwefantasy/mlsql-plugins/17e1a380d823a443b2503d883a9f1e50aeb832cf/binlog2delta/.DS_Store -------------------------------------------------------------------------------- /binlog2delta/.repo/desc.template.plugin: -------------------------------------------------------------------------------- 1 | mainClass=- 2 | version=0.1.0-SNAPSHOT 3 | author=allwefantasy 4 | mlsqlVersions="1.5.0,1.5.0-SNAPSHOT,1.6.0,1.6.0-SNAPSHOT" 5 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/binlog2delta 6 | scala_version=2.11 7 | mlsqlPluginType=script 8 | desc=wow 9 | -------------------------------------------------------------------------------- /binlog2delta/.repo/pom.template.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | mlsql-plugins-{{spark_binary_version}}_{{scala_binary_version}} 7 | tech.mlsql 8 | 0.1.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | binlog2delta_${scala.binary.version} 13 | 14 | 15 | -------------------------------------------------------------------------------- /binlog2delta/README.md: -------------------------------------------------------------------------------- 1 | ## Install command: 2 | 3 | ``` 4 | !plugin script add - binlog2delta; 5 | ``` 6 | 7 | ## Usage 8 | 9 | ```sql 10 | set checkpointLocation="/tmp/cpl-binlog-m"; 11 | include plugin.`binlog2delta`; 12 | ``` 13 | 14 | Here are parameter you can set before include the plugin: 15 | 16 | ```sql 17 | set streamName="binlog"; 18 | 19 | set host="127.0.0.1"; 20 | set port="3306"; 21 | set userName="root"; 22 | set password="mlsql"; 23 | set bingLogNamePrefix="mysql-bin"; 24 | set binlogIndex="1"; 25 | set binlogFileOffset="4"; 26 | set databaseNamePattern="mlsql_console"; 27 | set tableNamePattern="script_file"; 28 | 29 | set deltaTableHome="/tmp/binlog2delta"; 30 | set idCols="id"; 31 | set duration="10"; 32 | set checkpointLocation="/tmp/ck-binlog2delta"; 33 | 34 | ``` 35 | 36 | ## Check the content in plugin 37 | 38 | ```sql 39 | !plugin script show binlog2delta/plugin.json; 40 | ``` 41 | 42 | or 43 | 44 | ```sql 45 | !plugin script show binlog2delta/main.json; 46 | ``` 47 | 48 | -------------------------------------------------------------------------------- /binlog2delta/desc.plugin: -------------------------------------------------------------------------------- 1 | mainClass=- 2 | version=0.1.0-SNAPSHOT 3 | author=allwefantasy 4 | mlsqlVersions="1.5.0,1.5.0-SNAPSHOT,1.6.0,1.6.0-SNAPSHOT" 5 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/binlog2delta 6 | scala_version=2.11 7 | mlsqlPluginType=script 8 | desc=wow -------------------------------------------------------------------------------- /binlog2delta/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | mlsql-plugins-3.0_2.12 7 | tech.mlsql 8 | 0.1.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | binlog2delta_${scala.binary.version} 13 | 14 | 15 | -------------------------------------------------------------------------------- /binlog2delta/src/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allwefantasy/mlsql-plugins/17e1a380d823a443b2503d883a9f1e50aeb832cf/binlog2delta/src/.DS_Store -------------------------------------------------------------------------------- /binlog2delta/src/main/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allwefantasy/mlsql-plugins/17e1a380d823a443b2503d883a9f1e50aeb832cf/binlog2delta/src/main/.DS_Store -------------------------------------------------------------------------------- /binlog2delta/src/main/java/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allwefantasy/mlsql-plugins/17e1a380d823a443b2503d883a9f1e50aeb832cf/binlog2delta/src/main/java/.DS_Store -------------------------------------------------------------------------------- /binlog2delta/src/main/java/tech/mlsql/plugins/binlog2delta/JavaDoc.java: -------------------------------------------------------------------------------- 1 | package tech.mlsql.plugins.binlog2delta; 2 | 3 | /** 4 | * 2019-09-16 WilliamZhu(allwefantasy@gmail.com) 5 | */ 6 | public class JavaDoc { 7 | } 8 | -------------------------------------------------------------------------------- /binlog2delta/src/main/resources/main.mlsql: -------------------------------------------------------------------------------- 1 | 2 | set streamName="binlog" where type="defaultParam"; 3 | 4 | set host="127.0.0.1" where type="defaultParam"; 5 | set port="3306" where type="defaultParam"; 6 | set userName="root" where type="defaultParam"; 7 | set password="mlsql" where type="defaultParam"; 8 | set bingLogNamePrefix="mysql-bin" where type="defaultParam"; 9 | set binlogIndex="1" where type="defaultParam"; 10 | set binlogFileOffset="4" where type="defaultParam"; 11 | set databaseNamePattern="mlsql_console" where type="defaultParam"; 12 | set tableNamePattern="script_file" where type="defaultParam"; 13 | 14 | set deltaTableHome="/tmp/binlog2delta" where type="defaultParam"; 15 | set idCols="id" where type="defaultParam"; 16 | set duration="10" where type="defaultParam"; 17 | set checkpointLocation="/tmp/ck-binlog2delta" where type="defaultParam"; 18 | 19 | set dbPrefix="mysql" where type="defaultParam"; 20 | 21 | 22 | load binlog.`` where 23 | host="${host}" 24 | and port="${port}" 25 | and userName="${userName}" 26 | and password="${password}" 27 | and bingLogNamePrefix="${bingLogNamePrefix}" 28 | and binlogIndex="${binlogIndex}" 29 | and binlogFileOffset="${binlogFileOffset}" 30 | and databaseNamePattern="${databaseNamePattern}" 31 | and tableNamePattern="${tableNamePattern}" 32 | as binlogTable; 33 | 34 | save append binlogTable 35 | as rate.`${dbPrefix}_{db}.{table}` 36 | options mode="Append" 37 | and idCols="${idCols}" 38 | and syncType="binlog" 39 | and duration="${duration}" 40 | and checkpointLocation="${checkpointLocation}"; -------------------------------------------------------------------------------- /connect-persist/.repo/desc.template.plugin: -------------------------------------------------------------------------------- 1 | moduleName=connect-persist-app-{{spark_binary_version}} 2 | mainClass=tech.mlsql.plugins.app.ConnectPersistApp 3 | scala_version={{scala_binary_version}} 4 | spark_version={{spark_binary_version}} 5 | version=0.1.0-SNAPSHOT 6 | author=allwefantasy 7 | mlsqlVersions="1.5.0,1.5.0-SNAPSHOT,1.6.0,1.6.0-SNAPSHOT" 8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/stream-persist 9 | mlsqlPluginType=app 10 | desc=wow 11 | -------------------------------------------------------------------------------- /connect-persist/.repo/pom.template.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | mlsql-plugins-{{spark_binary_version}}_{{scala_binary_version}} 7 | tech.mlsql 8 | 0.1.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | connect-persist-${spark.binary.version}_${scala.binary.version} 13 | 14 | -------------------------------------------------------------------------------- /connect-persist/README.md: -------------------------------------------------------------------------------- 1 | ## Install command: 2 | 3 | ``` 4 | !plugin app add - 'connect-persist-app-2.4'; 5 | ``` 6 | 7 | > Notice: 8 | > If you set MLSQL meta store as MySQL, you should import db.sql file into 9 | > your meta database. 10 | 11 | ## Usage 12 | 13 | Use ET Plugin to persist stream job. 14 | 15 | ```sql 16 | !connectPersist; 17 | ``` 18 | 19 | And then once MLSQL Engine is restarted, and the connect info will be 20 | restored at the startup of MLSQL. 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /connect-persist/db.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE `w_connect_table` ( 2 | `id` int(11) unsigned NOT NULL AUTO_INCREMENT, 3 | `format` varchar(256) DEFAULT NULL, 4 | `db` varchar(256) DEFAULT NULL, 5 | `options` text, 6 | PRIMARY KEY (`id`) 7 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8; -------------------------------------------------------------------------------- /connect-persist/desc.plugin: -------------------------------------------------------------------------------- 1 | moduleName=connect-persist-app-3.0 2 | mainClass=tech.mlsql.plugins.app.ConnectPersistApp 3 | scala_version=2.12 4 | spark_version=3.0 5 | version=0.1.0-SNAPSHOT 6 | author=allwefantasy 7 | mlsqlVersions="1.5.0,1.5.0-SNAPSHOT,1.6.0,1.6.0-SNAPSHOT" 8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/stream-persist 9 | mlsqlPluginType=app 10 | desc=wow -------------------------------------------------------------------------------- /connect-persist/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | mlsql-plugins-3.0_2.12 7 | tech.mlsql 8 | 0.1.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | connect-persist-${spark.binary.version}_${scala.binary.version} 13 | 14 | -------------------------------------------------------------------------------- /connect-persist/src/main/java/tech/mlsql/plugins/et/ConnectPersistCommand.scala: -------------------------------------------------------------------------------- 1 | package tech.mlsql.plugins.et 2 | 3 | import java.util.concurrent.ConcurrentHashMap 4 | 5 | import org.apache.spark.sql.expressions.UserDefinedFunction 6 | import org.apache.spark.sql.{DataFrame, SparkSession} 7 | import streaming.dsl.{ConnectMeta, DBMappingKey} 8 | import streaming.dsl.auth.TableAuthResult 9 | import streaming.dsl.mmlib._ 10 | import streaming.dsl.mmlib.algs.Functions 11 | import streaming.dsl.mmlib.algs.param.{BaseParams, WowParams} 12 | import tech.mlsql.common.utils.classloader.ClassLoaderTool 13 | import tech.mlsql.common.utils.lang.sc.ScalaReflect 14 | import tech.mlsql.dsl.auth.ETAuth 15 | import tech.mlsql.dsl.auth.dsl.mmlib.ETMethod.ETMethod 16 | import tech.mlsql.store.DBStore 17 | import tech.mlsql.version.VersionCompatibility 18 | 19 | import scala.collection.JavaConverters._ 20 | 21 | /** 22 | * 15/1/2020 WilliamZhu(allwefantasy@gmail.com) 23 | */ 24 | class ConnectPersistCommand(override val uid: String) extends SQLAlg with VersionCompatibility with Functions with WowParams with ETAuth { 25 | def this() = this(BaseParams.randomUID()) 26 | 27 | 28 | override def train(df: DataFrame, path: String, params: Map[String, String]): DataFrame = { 29 | val session = df.sparkSession 30 | val dbMapping = ConnectMeta.toMap 31 | val items = dbMapping.toList.map(f => ConnectMetaItem(f._1.format, f._1.db, f._2)) 32 | import session.implicits._ 33 | val newdf = session.createDataset[ConnectMetaItem](items).toDF() 34 | DBStore.store.saveTable(session, newdf, ConnectPersistMeta.connectTableName, Option("format,db"), false) 35 | newdf 36 | } 37 | 38 | override def auth(etMethod: ETMethod, path: String, params: Map[String, String]): List[TableAuthResult] = { 39 | List() 40 | } 41 | 42 | override def supportedVersions: Seq[String] = { 43 | Seq("1.5.0-SNAPSHOT", "1.5.0", "1.6.0-SNAPSHOT", "1.6.0") 44 | } 45 | 46 | 47 | override def doc: Doc = Doc(MarkDownDoc, 48 | s""" 49 | | 50 | |``` 51 | |${codeExample.code} 52 | |``` 53 | """.stripMargin) 54 | 55 | 56 | override def codeExample: Code = Code(SQLCode, 57 | """ 58 | |example 59 | """.stripMargin) 60 | 61 | override def batchPredict(df: DataFrame, path: String, params: Map[String, String]): DataFrame = train(df, path, params) 62 | 63 | override def load(sparkSession: SparkSession, path: String, params: Map[String, String]): Any = ??? 64 | 65 | override def predict(sparkSession: SparkSession, _model: Any, name: String, params: Map[String, String]): UserDefinedFunction = ??? 66 | 67 | } 68 | 69 | object ConnectPersistMeta { 70 | def connectTableName = "__mlsql__.connect_table" 71 | } 72 | 73 | case class ConnectMetaItem(format: String, db: String, options: Map[String, String]) 74 | -------------------------------------------------------------------------------- /connect-persist/src/main/java/tech/mlsql/plugins/mllib/ConnectPersistApp.scala: -------------------------------------------------------------------------------- 1 | package tech.mlsql.plugins.mllib 2 | 3 | import _root_.streaming.core.strategy.platform.{PlatformManager, SparkRuntime} 4 | import _root_.streaming.dsl._ 5 | import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession} 6 | import tech.mlsql.common.utils.log.Logging 7 | import tech.mlsql.datalake.DataLake 8 | import tech.mlsql.dsl.CommandCollection 9 | import tech.mlsql.ets.register.ETRegister 10 | import tech.mlsql.plugins.et.{ConnectMetaItem, ConnectPersistCommand, ConnectPersistMeta} 11 | import tech.mlsql.store.DBStore 12 | import tech.mlsql.version.VersionCompatibility 13 | 14 | /** 15 | * 15/1/2020 WilliamZhu(allwefantasy@gmail.com) 16 | */ 17 | class ConnectPersistApp extends tech.mlsql.app.App with VersionCompatibility with Logging { 18 | override def run(args: Seq[String]): Unit = { 19 | val root = runtime.sparkSession 20 | import root.implicits._ 21 | 22 | ETRegister.register("ConnectPersistCommand", classOf[ConnectPersistCommand].getName) 23 | CommandCollection.refreshCommandMapping(Map("connectPersist" -> "ConnectPersistCommand")) 24 | 25 | val streams = DBStore.store.tryReadTable(root, ConnectPersistMeta.connectTableName, () => root.createDataset[ConnectMetaItem](Seq()).toDF()) 26 | streams.as[ConnectMetaItem].collect().foreach { item => 27 | logInfo(s"load connect statement format: ${item.format} db:${item.db}") 28 | ConnectMeta.options(DBMappingKey(item.format, item.db), item.options) 29 | } 30 | } 31 | 32 | def runtime = { 33 | PlatformManager.getRuntime.asInstanceOf[SparkRuntime] 34 | } 35 | 36 | override def supportedVersions: Seq[String] = Seq("1.5.0-SNAPSHOT", "1.5.0", "1.6.0-SNAPSHOT", "1.6.0") 37 | } 38 | 39 | -------------------------------------------------------------------------------- /delta-enhancer/.repo/pom.template.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | mlsql-plugins-{{spark_binary_version}}_{{scala_binary_version}} 7 | tech.mlsql 8 | 0.1.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | delta-enhancer-${spark.binary.version}_${scala.binary.version} 13 | 14 | 15 | tech.mlsql 16 | delta-plus_${scala.binary.version} 17 | ${delta-plus.version} 18 | ${scope} 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /delta-enhancer/README.md: -------------------------------------------------------------------------------- 1 | ## Install command: 2 | 3 | ``` 4 | !plugin et add tech.mlsql.plugin.et.DeltaCommand delta-enhancer 5 | named deltaEnhancer; 6 | ``` 7 | 8 | ## Usage 9 | 10 | ```sql 11 | !deltaEnhancer pruneDeletes __mlsql__.plugins 10000; 12 | ``` 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /delta-enhancer/desc.plugin: -------------------------------------------------------------------------------- 1 | tech.mlsql.plugin.et.DeltaCommand 2 | -------------------------------------------------------------------------------- /delta-enhancer/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | mlsql-plugins-3.0_2.12 7 | tech.mlsql 8 | 0.1.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | delta-enhancer-${spark.binary.version}_${scala.binary.version} 13 | 14 | 15 | tech.mlsql 16 | delta-plus_${scala.binary.version} 17 | ${delta-plus.version} 18 | ${scope} 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /delta-enhancer/src/main/java/tech/mlsql/plugin/et/DeltaCommand.scala: -------------------------------------------------------------------------------- 1 | package tech.mlsql.plugin.et 2 | 3 | import io.delta.tables.DeltaTable 4 | import org.apache.spark.sql.expressions.UserDefinedFunction 5 | import org.apache.spark.sql.{DataFrame, SparkSession} 6 | import streaming.dsl.mmlib.SQLAlg 7 | import streaming.dsl.mmlib.algs.Functions 8 | import streaming.dsl.mmlib.algs.param.{BaseParams, WowParams} 9 | import tech.mlsql.common.utils.path.PathFun 10 | import tech.mlsql.common.utils.serder.json.JSONTool 11 | import tech.mlsql.datalake.DataLake 12 | import tech.mlsql.version.VersionCompatibility 13 | 14 | /** 15 | * 2019-09-11 WilliamZhu(allwefantasy@gmail.com) 16 | */ 17 | class DeltaCommand(override val uid: String) extends SQLAlg with VersionCompatibility with Functions with WowParams { 18 | def this() = this(BaseParams.randomUID()) 19 | 20 | 21 | override def train(df: DataFrame, path: String, params: Map[String, String]): DataFrame = { 22 | val spark = df.sparkSession 23 | 24 | def resolveRealPath(dataPath: String) = { 25 | val dataLake = new DataLake(spark) 26 | if (dataLake.isEnable) { 27 | dataLake.identifyToPath(dataPath) 28 | } else { 29 | PathFun(path).add(dataPath).toPath 30 | } 31 | } 32 | 33 | 34 | val command = JSONTool.parseJson[List[String]](params("parameters")) 35 | command match { 36 | case Seq("pruneDeletes", dataPath, howManyHoures, _*) => 37 | val deltaLog = DeltaTable.forPath(spark, resolveRealPath(dataPath)) 38 | deltaLog.vacuum(howManyHoures.toInt) 39 | } 40 | 41 | } 42 | 43 | 44 | override def supportedVersions: Seq[String] = { 45 | Seq("1.5.0-SNAPSHOT", "1.5.0") 46 | } 47 | 48 | override def batchPredict(df: DataFrame, path: String, params: Map[String, String]): DataFrame = train(df, path, params) 49 | 50 | override def load(sparkSession: SparkSession, path: String, params: Map[String, String]): Any = ??? 51 | 52 | override def predict(sparkSession: SparkSession, _model: Any, name: String, params: Map[String, String]): UserDefinedFunction = ??? 53 | 54 | 55 | } 56 | -------------------------------------------------------------------------------- /desktop-publish.sh: -------------------------------------------------------------------------------- 1 | SOURCE=/Users/allwefantasy/Volumes/Samsung_T5/allwefantasy/CSDNWorkSpace/mlsqlplugins 2 | TARGET=/Users/allwefantasy/projects/mlsql-desktop 3 | #conda activate mlsql-plugin-tool 4 | 5 | mods=${1:-mlsql-language-server mlsql-excel mlsql-assert mlsql-shell} 6 | 7 | for mod in ${mods} 8 | do 9 | echo "build= $mod" 10 | mlsql_plugin_tool build --module_name ${mod} --spark spark311 11 | 12 | for os in linux mac win 13 | do 14 | cp ${SOURCE}/$mod/build/${mod}-3.0_2.12-0.1.0-SNAPSHOT.jar ${TARGET}/${os}/plugin 15 | done 16 | done 17 | 18 | #mlsql-language-server mlsql-excel mlsql-assert mlsql-shell 19 | -------------------------------------------------------------------------------- /dev/change-scala-version.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # 4 | # Licensed to the Apache Software Foundation (ASF) under one or more 5 | # contributor license agreements. See the NOTICE file distributed with 6 | # this work for additional information regarding copyright ownership. 7 | # The ASF licenses this file to You under the Apache License, Version 2.0 8 | # (the "License"); you may not use this file except in compliance with 9 | # the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | set -e 21 | 22 | VALID_VERSIONS=( 2.11 2.12 ) 23 | 24 | usage() { 25 | echo "Usage: $(basename $0) [-h|--help] 26 | where : 27 | -h| --help Display this help text 28 | valid version values : ${VALID_VERSIONS[*]} 29 | " 1>&2 30 | exit 1 31 | } 32 | 33 | if [[ ($# -ne 1) || ( $1 == "--help") || $1 == "-h" ]]; then 34 | usage 35 | fi 36 | 37 | TO_VERSION=$1 38 | 39 | check_scala_version() { 40 | for i in ${VALID_VERSIONS[*]}; do [ $i = "$1" ] && return 0; done 41 | echo "Invalid Scala version: $1. Valid versions: ${VALID_VERSIONS[*]}" 1>&2 42 | exit 1 43 | } 44 | 45 | check_scala_version "$TO_VERSION" 46 | 47 | if [ $TO_VERSION = "2.12" ]; then 48 | FROM_VERSION="2.11" 49 | else 50 | FROM_VERSION="2.12" 51 | fi 52 | 53 | sed_i() { 54 | sed -e "$1" "$2" > "$2.tmp" && mv "$2.tmp" "$2" 55 | } 56 | 57 | export -f sed_i 58 | 59 | BASEDIR=$(dirname $0)/.. 60 | find "$BASEDIR" -name 'pom.xml' -not -path '*target*' -print \ 61 | -exec bash -c "sed_i 's/\(artifactId.*\)_'$FROM_VERSION'/\1_'$TO_VERSION'/g' {}" \; 62 | 63 | # Also update in parent POM 64 | # Match any scala binary version to ensure idempotency 65 | sed_i '1,/[0-9]*\.[0-9]*[0-9]*\.[0-9]*'$TO_VERSION' 13 | HBaseRecord(i, "extra") 14 | } 15 | val tableName = "t1" 16 | val familyName = "c1" 17 | 18 | 19 | import spark.implicits._ 20 | sc.parallelize(data).toDF.write 21 | .options(Map( 22 | "outputTableName" -> cat, 23 | "family" -> family 24 | ) ++ options) 25 | .format("org.apache.spark.sql.execution.datasources.hbase2x") 26 | .save() 27 | 28 | val df = spark.read.format("org.apache.spark.sql.execution.datasources.hbase2x").options( 29 | Map( 30 | "inputTableName" -> tableName, 31 | "family" -> familyName, 32 | "field.type.col1" -> "BooleanType", 33 | "field.type.col2" -> "DoubleType", 34 | "field.type.col3" -> "FloatType", 35 | "field.type.col4" -> "IntegerType", 36 | "field.type.col5" -> "LongType", 37 | "field.type.col6" -> "ShortType", 38 | "field.type.col7" -> "StringType", 39 | "field.type.col8" -> "ByteType" 40 | ) 41 | ).load() 42 | ``` 43 | 44 | MLSQL: 45 | 46 | ```sql 47 | set rawText=''' 48 | {"id":9,"content":"Spark好的语言1","label":0.0} 49 | {"id":10,"content":"MLSQL是一个好的语言7","label":0.0} 50 | {"id":12,"content":"MLSQL是一个好的语言7","label":0.0} 51 | '''; 52 | 53 | load jsonStr.`rawText` as orginal_text_corpus; 54 | 55 | select cast(id as String) as rowkey,content,label from orginal_text_corpus as orginal_text_corpus1; 56 | 57 | connect hbase2x where `zk`="127.0.0.1:2181" 58 | and `family`="cf" as hbase1; 59 | 60 | save overwrite orginal_text_corpus1 61 | as hbase2x.`hbase1:mlsql_example`; 62 | 63 | load hbase2x.`hbase1:mlsql_example` where field.type.label="DoubleType" 64 | as mlsql_example ; 65 | 66 | select * from mlsql_example as show_data; 67 | ``` 68 | 69 | You should configure parameters like `zookeeper.znode.parent`,`hbase.rootdir` according by 70 | your HBase configuration. 71 | 72 | Parameters: 73 | 74 | | Property Name | Meaning | 75 | |---|---| 76 | | tsSuffix |to overwrite hbase value's timestamp| 77 | |namespace|hbase namespace| 78 | | family |hbase family,family="" means load all existing families| 79 | | field.type.ck | specify type for ck(field name),now supports:LongType、FloatType、DoubleType、IntegerType、BooleanType、BinaryType、TimestampType、DateType,default: StringType。| 80 | 81 | 82 | 83 | 84 | -------------------------------------------------------------------------------- /ds-hbase-2x/desc.plugin: -------------------------------------------------------------------------------- 1 | moduleName=ds-hbase-2x-3.0 2 | mainClass=tech.mlsql.plugins.ds.MLSQLHBase2x 3 | scala_version=2.12 4 | spark_version=3.0 5 | version=0.1.0-SNAPSHOT 6 | author=allwefantasy 7 | mlsqlVersions="1.5.0,1.5.0-SNAPSHOT,1.6.0,1.6.0-SNAPSHOT" 8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/ds-hbase-2x 9 | mlsqlPluginType=ds 10 | desc=wow 11 | 12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /ds-hbase-2x/src/main/java/org/apache/spark/sql/execution/datasources/hbase2x/HBaseConfBuilder.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.sql.execution.datasources.hbase2x 2 | 3 | import org.apache.hadoop.hbase.HBaseConfiguration 4 | import org.apache.spark.sql.SparkSession 5 | import org.json4s.DefaultFormats 6 | import scala.collection.JavaConversions._ 7 | 8 | /** 9 | * 2019-07-08 WilliamZhu(allwefantasy@gmail.com) 10 | */ 11 | object HBaseConfBuilder { 12 | def build(spark: SparkSession, parameters: Map[String, String]) = { 13 | val testConf = spark.sqlContext.sparkContext.conf.getBoolean(SparkHBaseConf.testConf, false) 14 | if (testConf) SparkHBaseConf.conf 15 | else { 16 | implicit val formats = DefaultFormats 17 | 18 | // task is already broadcast; since hConf is per HBaseRelation (currently), broadcast'ing 19 | // it again does not help - it actually hurts. When we add support for 20 | // caching hConf across HBaseRelation, we can revisit broadcast'ing it (with a caching 21 | // mechanism in place) 22 | val hc = HBaseConfiguration.create() 23 | 24 | if (parameters.containsKey("zk") || parameters.containsKey("hbase.zookeeper.quorum")) { 25 | hc.set("hbase.zookeeper.quorum", parameters.getOrElse("zk", parameters.getOrElse("hbase.zookeeper.quorum", "127.0.0.1:2181"))) 26 | } 27 | 28 | if (parameters.containsKey("znode")) { 29 | hc.set("zookeeper.znode.parent", parameters.get("znode").get) 30 | } 31 | 32 | if (parameters.containsKey("rootdir")) { 33 | hc.set("hbase.rootdir", parameters.get("rootdir").get) 34 | } 35 | 36 | /** 37 | * when people confgiure the wrong zk address, by default the HBase client will 38 | * try infinitely. We should control this group parameters to limit the try times. 39 | */ 40 | hc.set("hbase.client.pause", parameters.getOrElse("hbase.client.pause", "1000")) 41 | hc.set("zookeeper.recovery.retry", parameters.getOrElse("zookeeper.recovery.retry", "60")) 42 | hc.set("hbase.client.retries.number", parameters.getOrElse("hbase.client.retries.number", "60")) 43 | 44 | 45 | parameters.filter { f => 46 | f._1.startsWith("hbase.") || f._1.startsWith("zookeeper.") || f._1.startsWith("phoenix.") 47 | }.foreach { f => 48 | hc.set(f._1, f._2) 49 | } 50 | 51 | hc 52 | } 53 | 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /ds-hbase-2x/src/main/java/org/apache/spark/sql/execution/datasources/hbase2x/HBaseType.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.sql.execution.datasources.hbase2x 2 | 3 | /** 4 | * 2019-07-08 WilliamZhu(allwefantasy@gmail.com) 5 | */ 6 | package object hbase2x { 7 | type HBaseType = Array[Byte] 8 | } 9 | -------------------------------------------------------------------------------- /ds-hbase-2x/src/main/java/org/apache/spark/sql/execution/datasources/hbase2x/JavaDoc.java: -------------------------------------------------------------------------------- 1 | package org.apache.spark.sql.execution.datasources.hbase2x; 2 | 3 | /** 4 | * 2019-07-06 WilliamZhu(allwefantasy@gmail.com) 5 | */ 6 | public class JavaDoc { 7 | } 8 | -------------------------------------------------------------------------------- /ds-hbase-2x/src/main/java/org/apache/spark/sql/execution/datasources/hbase2x/SparkHBaseConf.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.sql.execution.datasources.hbase2x 2 | 3 | import org.apache.hadoop.conf.Configuration 4 | 5 | /** 6 | * 2019-07-08 WilliamZhu(allwefantasy@gmail.com) 7 | */ 8 | object SparkHBaseConf { 9 | val testConf = "spark.hbase.connector.test" 10 | val credentialsManagerEnabled = "spark.hbase.connector.security.credentials.enabled" 11 | val expireTimeFraction = "spark.hbase.connector.security.credentials.expireTimeFraction" 12 | val refreshTimeFraction = "spark.hbase.connector.security.credentials.refreshTimeFraction" 13 | val refreshDurationMins = "spark.hbase.connector.security.credentials.refreshDurationMins" 14 | val principal = "spark.hbase.connector.security.credentials" 15 | val keytab = "spark.hbase.connector.security.keytab" 16 | 17 | var conf: Configuration = _ 18 | var BulkGetSize = "spark.hbase.connector.bulkGetSize" 19 | var defaultBulkGetSize = 100 20 | var CachingSize = "spark.hbase.connector.cacheSize" 21 | var defaultCachingSize = 100 22 | // in milliseconds 23 | val connectionCloseDelay = 10 * 60 * 1000 24 | } 25 | -------------------------------------------------------------------------------- /echo-controller/.repo/pom.template.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | mlsql-plugins-{{spark_binary_version}}_{{scala_binary_version}} 7 | tech.mlsql 8 | 0.1.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | echo-controller-${spark.binary.version}_${scala.binary.version} 13 | 14 | 15 | -------------------------------------------------------------------------------- /echo-controller/README.md: -------------------------------------------------------------------------------- 1 | ## Install 2 | 3 | ```sql 4 | !plugin app add - echo-controller-2.4; 5 | ``` 6 | 7 | ## Usage 8 | 9 | ``` 10 | select crawler_http("http://127.0.0.1:9003/run/script","POST",map("owner","wow","sql","select 1 as a as output;","executeMode","echo")) as c as output; 11 | ``` 12 | 13 | The server will response with `select 1 as a as output;` back instead of execute the sql. -------------------------------------------------------------------------------- /echo-controller/desc.plugin: -------------------------------------------------------------------------------- 1 | tech.mlsql.plugins.app.echocontroller.StreamApp 2 | -------------------------------------------------------------------------------- /echo-controller/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | mlsql-plugins-3.0_2.12 7 | tech.mlsql 8 | 0.1.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | echo-controller-${spark.binary.version}_${scala.binary.version} 13 | 14 | 15 | -------------------------------------------------------------------------------- /echo-controller/src/main/java/tech/mlsql/plugins/mllib/echocontroller/StreamApp.scala: -------------------------------------------------------------------------------- 1 | package tech.mlsql.plugins.mllib.echocontroller 2 | 3 | import tech.mlsql.app.CustomController 4 | import tech.mlsql.common.utils.serder.json.JSONTool 5 | import tech.mlsql.runtime.AppRuntimeStore 6 | import tech.mlsql.version.VersionCompatibility 7 | 8 | /** 9 | * 7/11/2019 WilliamZhu(allwefantasy@gmail.com) 10 | */ 11 | class StreamApp extends tech.mlsql.app.App with VersionCompatibility { 12 | override def run(args: Seq[String]): Unit = { 13 | AppRuntimeStore.store.registerController("echo", classOf[EchoController].getName) 14 | } 15 | 16 | override def supportedVersions: Seq[String] = Seq("1.5.0-SNAPSHOT", "1.5.0", "1.6.0-SNAPSHOT", "1.6.0") 17 | } 18 | 19 | class EchoController extends CustomController { 20 | override def run(params: Map[String, String]): String = { 21 | JSONTool.toJsonStr(List(params("sql"))) 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /install-all.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | ALL_MODUELS="mlsql-shell mlsql-assert mlsql-mllib mlsql-excel connect-persist last-command run-script save-then-load stream-persist table-repartition" 4 | 5 | MODUELS=${1} 6 | 7 | if [[ "${MODUELS}" == "" ]];then 8 | MODUELS = ALL_MODUELS 9 | fi 10 | 11 | for spark_version in spark243 spark311 12 | do 13 | for module in ${MODUELS} 14 | do 15 | ./install.sh ${module} ${spark_version} 16 | done 17 | done 18 | 19 | 20 | # ./install.sh ds-hbase-2x 21 | # ./install.sh mlsql-bigdl -------------------------------------------------------------------------------- /install.sh: -------------------------------------------------------------------------------- 1 | PROJECT=/Users/allwefantasy/Volumes/Samsung_T5/allwefantasy/CSDNWorkSpace/mlsqlplugins 2 | 3 | MOUDLE_NAME=$1 4 | VERSION="0.1.0-SNAPSHOT" 5 | V=${2:-3.0} 6 | MIDDLE="2.4_2.11" 7 | 8 | SPARK="spark311" 9 | 10 | if [[ "${V}" == "2.4" ]] 11 | then 12 | SPARK=spark243 13 | fi 14 | 15 | if [[ "${SPARK}" == "spark311" ]] 16 | then 17 | MIDDLE="3.0_2.12" 18 | fi 19 | 20 | echo ${MOUDLE_NAME} 21 | echo ${SPARK} 22 | echo ${MIDDLE} 23 | 24 | mlsql_plugin_tool build --module_name ${MOUDLE_NAME} --spark ${SPARK} 25 | mlsql_plugin_tool upload \ 26 | --module_name ${MOUDLE_NAME} \ 27 | --user ${STORE_USER} \ 28 | --password ${STORE_PASSWORD} \ 29 | --jar_path ${PROJECT}/${MOUDLE_NAME}/build/${MOUDLE_NAME}-${MIDDLE}-${VERSION}.jar 30 | -------------------------------------------------------------------------------- /last-command/.repo/pom.template.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | mlsql-plugins-{{spark_binary_version}}_{{scala_binary_version}} 7 | tech.mlsql 8 | 0.1.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | last-command-${spark.binary.version}_${scala.binary.version} 13 | 14 | 15 | -------------------------------------------------------------------------------- /last-command/README.md: -------------------------------------------------------------------------------- 1 | ## Install 2 | 3 | ``` 4 | !plugin et add - last-command-2.4 named lastCommand; 5 | ``` 6 | 7 | ## Help 8 | 9 | 10 | ```sql 11 | !show et LastCommand; 12 | ``` 13 | 14 | ## Usage 15 | 16 | ```sql 17 | !hdfs -ls /tmp/; 18 | !lastCommand named hdfsTmpTable; 19 | select * from hdfsTmpTable as output; 20 | ``` 21 | 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /last-command/desc.plugin: -------------------------------------------------------------------------------- 1 | moduleName=last-command-2.4 2 | mainClass=tech.mlsql.plugins.et.LastCommand 3 | version=0.1.0-SNAPSHOT 4 | author=allwefantasy 5 | mlsqlVersions="1.5.0,1.5.0-SNAPSHOT,1.6.0,1.6.0-SNAPSHOT" 6 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/last-command 7 | scala_version=2.11 8 | spark_version=2.4 9 | mlsqlPluginType=et 10 | desc=last command 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /last-command/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | mlsql-plugins-3.0_2.12 7 | tech.mlsql 8 | 0.1.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | last-command-${spark.binary.version}_${scala.binary.version} 13 | 14 | 15 | -------------------------------------------------------------------------------- /last-command/src/main/java/tech/mlsql/plugins/et/LastCommand.scala: -------------------------------------------------------------------------------- 1 | package tech.mlsql.plugins.et 2 | 3 | import org.apache.spark.sql.expressions.UserDefinedFunction 4 | import org.apache.spark.sql.{DataFrame, SparkSession} 5 | import streaming.dsl.ScriptSQLExec 6 | import streaming.dsl.mmlib._ 7 | import streaming.dsl.mmlib.algs.Functions 8 | import streaming.dsl.mmlib.algs.param.{BaseParams, WowParams} 9 | import tech.mlsql.common.utils.serder.json.JSONTool 10 | import tech.mlsql.version.VersionCompatibility 11 | 12 | 13 | class LastCommand(override val uid: String) extends SQLAlg with VersionCompatibility with Functions with WowParams { 14 | def this() = this(BaseParams.randomUID()) 15 | 16 | // 17 | override def train(df: DataFrame, path: String, params: Map[String, String]): DataFrame = { 18 | 19 | val context = ScriptSQLExec.context() 20 | val command = JSONTool.parseJson[List[String]](params("parameters")).toArray 21 | 22 | //!last named table1; 23 | context.execListener.getLastSelectTable() match { 24 | case Some(tableName) => 25 | command match { 26 | case Array("named", newTableName) => 27 | val newDf = context.execListener.sparkSession.table(tableName) 28 | newDf.createOrReplaceTempView(newTableName) 29 | newDf 30 | } 31 | case None => throw new RuntimeException("no table found in previous command") 32 | } 33 | } 34 | 35 | 36 | override def supportedVersions: Seq[String] = { 37 | Seq("1.5.0-SNAPSHOT", "1.5.0", "1.6.0-SNAPSHOT", "1.6.0") 38 | } 39 | 40 | 41 | override def doc: Doc = Doc(MarkDownDoc, 42 | s""" 43 | |When you want to get the result from command and used 44 | | in next command(SQL), you can use !last command. 45 | | 46 | |For example: 47 | | 48 | |``` 49 | |${codeExample.code} 50 | |``` 51 | """.stripMargin) 52 | 53 | 54 | override def codeExample: Code = Code(SQLCode, 55 | """ 56 | |!hdfs /tmp; 57 | |!last named hdfsTmpTable; 58 | |select * from hdfsTmpTable; 59 | """.stripMargin) 60 | 61 | override def batchPredict(df: DataFrame, path: String, params: Map[String, String]): DataFrame = train(df, path, params) 62 | 63 | override def load(sparkSession: SparkSession, path: String, params: Map[String, String]): Any = ??? 64 | 65 | override def predict(sparkSession: SparkSession, _model: Any, name: String, params: Map[String, String]): UserDefinedFunction = ??? 66 | } 67 | -------------------------------------------------------------------------------- /mlsql-analysis-toolkit/.repo/pom.template.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | mlsql-plugins-{{spark_binary_version}}_{{scala_binary_version}} 7 | tech.mlsql 8 | 0.1.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | mlsql-analysis-toolkit-${spark.binary.version}_${scala.binary.version} 13 | 14 | 15 | -------------------------------------------------------------------------------- /mlsql-analysis-toolkit/README.md: -------------------------------------------------------------------------------- 1 | ## Install command: 2 | 3 | ``` 4 | !plugin app add - "mlsql-analysis-toolkit-2.4"; 5 | ``` 6 | 7 | 8 | ## Usage 9 | 10 | To compute field in table the medium number. 11 | 12 | ```sql 13 | !approxQuantile time_temp birthday "0.5" valued time_quantile; 14 | select ${time_quantile} as quantile as output; 15 | ``` 16 | 17 | 18 | To create table with id column with fix size: 19 | 20 | ```sql 21 | !dataframe build range 100000 named table1; 22 | ``` 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /mlsql-analysis-toolkit/desc.plugin: -------------------------------------------------------------------------------- 1 | moduleName=mlsql-analysis-toolkit-2.4 2 | mainClass=tech.mlsql.plugins.analysis.AnalysisApp 3 | version=0.1.0-SNAPSHOT 4 | author=allwefantasy 5 | mlsqlVersions="1.5.0,1.5.0-SNAPSHOT,1.6.0,1.6.0-SNAPSHOT" 6 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-analysis-toolkit 7 | scala_version=2.11 8 | spark_version=2.4 9 | mlsqlPluginType=app 10 | desc=wow 11 | 12 | -------------------------------------------------------------------------------- /mlsql-analysis-toolkit/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | mlsql-plugins-3.0_2.12 7 | tech.mlsql 8 | 0.1.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | mlsql-analysis-toolkit-${spark.binary.version}_${scala.binary.version} 13 | 14 | 15 | -------------------------------------------------------------------------------- /mlsql-analysis-toolkit/src/main/java/tech/mlsql/plugins/analysis/AnalysisApp.scala: -------------------------------------------------------------------------------- 1 | package tech.mlsql.plugins.analysis 2 | 3 | import tech.mlsql.dsl.CommandCollection 4 | import tech.mlsql.ets.register.ETRegister 5 | import tech.mlsql.version.VersionCompatibility 6 | 7 | /** 8 | * 26/4/2020 WilliamZhu(allwefantasy@gmail.com) 9 | */ 10 | class AnalysisApp extends tech.mlsql.app.App with VersionCompatibility { 11 | override def run(args: Seq[String]): Unit = { 12 | ETRegister.register("ApproxQuantile", classOf[ApproxQuantile].getName) 13 | CommandCollection.refreshCommandMapping(Map("approxQuantile" -> "ApproxQuantile")) 14 | 15 | ETRegister.register("DFTool", classOf[DFTool].getName) 16 | CommandCollection.refreshCommandMapping(Map("dataframe" -> "DFTool")) 17 | } 18 | 19 | override def supportedVersions: Seq[String] = Seq("1.5.0-SNAPSHOT", "1.5.0", "1.6.0-SNAPSHOT", "1.6.0") 20 | } -------------------------------------------------------------------------------- /mlsql-analysis-toolkit/src/main/java/tech/mlsql/plugins/analysis/ApproxQuantile.scala: -------------------------------------------------------------------------------- 1 | package tech.mlsql.plugins.analysis 2 | 3 | import org.apache.spark.sql.expressions.UserDefinedFunction 4 | import org.apache.spark.sql.{DataFrame, SparkSession} 5 | import streaming.dsl.ScriptSQLExec 6 | import streaming.dsl.mmlib.SQLAlg 7 | import streaming.dsl.mmlib.algs.Functions 8 | import streaming.dsl.mmlib.algs.param.{BaseParams, WowParams} 9 | import tech.mlsql.common.utils.serder.json.JSONTool 10 | import tech.mlsql.version.VersionCompatibility 11 | 12 | /** 13 | * 26/4/2020 WilliamZhu(allwefantasy@gmail.com) 14 | */ 15 | class ApproxQuantile(override val uid: String) extends SQLAlg with VersionCompatibility with Functions with WowParams { 16 | def this() = this(BaseParams.randomUID()) 17 | 18 | override def train(df: DataFrame, path: String, params: Map[String, String]): DataFrame = { 19 | val command = JSONTool.parseJson[List[String]](params("parameters")).toArray 20 | 21 | def compute(table: String, field: String, quantile: String, error: String) = { 22 | df.sparkSession.table(table).stat.approxQuantile(field, Array(quantile.toDouble), error.toDouble) 23 | } 24 | 25 | var tableName: String = null 26 | 27 | val res = command match { 28 | case Array(table, field, quantile) => 29 | compute(table, field, quantile, "0").head 30 | 31 | case Array(table, field, quantile, "valued", value) => 32 | val f = compute(table, field, quantile, "0").head 33 | ScriptSQLExec.context().execListener.addEnv(value, f.toString) 34 | f 35 | case Array(table, field, quantile, "named", value) => 36 | tableName = value 37 | compute(table, field, quantile, "0").head 38 | 39 | case Array(table, field, quantile, error) => 40 | compute(table, field, quantile, error).head 41 | 42 | case Array(table, field, quantile, error, "valued", value) => 43 | val f = compute(table, field, quantile, error).head 44 | ScriptSQLExec.context().execListener.addEnv(value, f.toString) 45 | f 46 | case Array(table, field, quantile, error, "named", value) => 47 | tableName = value 48 | compute(table, field, quantile, error).head 49 | } 50 | 51 | import df.sparkSession.implicits._ 52 | val newdf = df.sparkSession.createDataset[Double](Seq(res)).toDF("value") 53 | if (tableName != null) { 54 | newdf.createOrReplaceTempView(tableName) 55 | } 56 | newdf 57 | 58 | } 59 | 60 | 61 | override def batchPredict(df: DataFrame, path: String, params: Map[String, String]): DataFrame = train(df, path, params) 62 | 63 | override def load(sparkSession: SparkSession, path: String, params: Map[String, String]): Any = ??? 64 | 65 | override def predict(sparkSession: SparkSession, _model: Any, name: String, params: Map[String, String]): UserDefinedFunction = ??? 66 | 67 | override def supportedVersions: Seq[String] = { 68 | Seq("1.5.0-SNAPSHOT", "1.5.0", "1.6.0-SNAPSHOT", "1.6.0") 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /mlsql-analysis-toolkit/src/main/java/tech/mlsql/plugins/analysis/DFTool.scala: -------------------------------------------------------------------------------- 1 | package tech.mlsql.plugins.analysis 2 | 3 | import org.apache.spark.sql.expressions.UserDefinedFunction 4 | import org.apache.spark.sql.{DataFrame, SparkSession} 5 | import streaming.dsl.mmlib.SQLAlg 6 | import streaming.dsl.mmlib.algs.Functions 7 | import streaming.dsl.mmlib.algs.param.{BaseParams, WowParams} 8 | import tech.mlsql.common.utils.serder.json.JSONTool 9 | import tech.mlsql.version.VersionCompatibility 10 | 11 | /** 12 | * 1/5/2020 WilliamZhu(allwefantasy@gmail.com) 13 | */ 14 | class DFTool(override val uid: String) extends SQLAlg with VersionCompatibility with Functions with WowParams { 15 | def this() = this(BaseParams.randomUID()) 16 | 17 | /** 18 | * !dataframe build range 100 named table1; 19 | */ 20 | override def train(df: DataFrame, path: String, params: Map[String, String]): DataFrame = { 21 | val command = JSONTool.parseJson[List[String]](params("parameters")).toArray 22 | 23 | val newdf = command match { 24 | case Array("build", "range", end, "named", table) => 25 | val temp = df.sparkSession.range(end.toLong).toDF() 26 | temp.createOrReplaceTempView(table) 27 | temp 28 | } 29 | newdf 30 | } 31 | 32 | 33 | override def batchPredict(df: DataFrame, path: String, params: Map[String, String]): DataFrame = train(df, path, params) 34 | 35 | override def load(sparkSession: SparkSession, path: String, params: Map[String, String]): Any = ??? 36 | 37 | override def predict(sparkSession: SparkSession, _model: Any, name: String, params: Map[String, String]): UserDefinedFunction = ??? 38 | 39 | override def supportedVersions: Seq[String] = { 40 | Seq("1.5.0-SNAPSHOT", "1.5.0", "1.6.0-SNAPSHOT", "1.6.0") 41 | } 42 | } 43 | 44 | -------------------------------------------------------------------------------- /mlsql-assert/.repo/desc.template.plugin: -------------------------------------------------------------------------------- 1 | moduleName=mlsql-assert-{{spark_binary_version}} 2 | mainClass=tech.mlsql.plugins.assert.app.MLSQLAssert 3 | scala_version={{scala_binary_version}} 4 | spark_version={{spark_binary_version}} 5 | version=0.1.0-SNAPSHOT 6 | author=allwefantasy 7 | mlsqlVersions="" 8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-assert 9 | mlsqlPluginType=app 10 | desc=mlsql-shell 11 | 12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /mlsql-assert/.repo/pom.template.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | mlsql-plugins-{{spark_binary_version}}_{{scala_binary_version}} 7 | tech.mlsql 8 | 0.1.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | mlsql-assert-{{spark_binary_version}}_{{scala_binary_version}} 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | shade 21 | 22 | 23 | 24 | org.apache.maven.plugins 25 | maven-shade-plugin 26 | 3.2.0 27 | 28 | 29 | 30 | *:* 31 | 32 | META-INF/*.SF 33 | META-INF/*.DSA 34 | META-INF/*.RSA 35 | 36 | 37 | 38 | false 39 | 40 | 41 | org.apache.poi 42 | shadeio.poi 43 | 44 | 45 | com.norbitltd.spoiwo 46 | shadeio.spoiwo 47 | 48 | 49 | com.github.pjfanning 50 | shadeio.pjfanning 51 | 52 | 53 | org.apache.commons.compress 54 | shadeio.commons.compress 55 | 56 | 57 | 58 | 59 | 60 | 61 | package 62 | 63 | shade 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | -------------------------------------------------------------------------------- /mlsql-assert/README.md: -------------------------------------------------------------------------------- 1 | # mlsql-shell 2 | 3 | This plugin provide assert in table. 4 | 5 | 6 | ## Install from store 7 | 8 | Execute following command in web console: 9 | 10 | ``` 11 | !plugin app add - "mlsql-assert-2.4"; 12 | ``` 13 | 14 | 15 | ## Install Manually 16 | 17 | Firstly, build shade jar in your terminal: 18 | 19 | ```shell 20 | pip install mlsql_plugin_tool 21 | mlsql_plugin_tool build --module_name mlsql-assert --spark spark243 22 | ``` 23 | 24 | then change start script of MLSQL Engine, 25 | 26 | Add Jar: 27 | 28 | ``` 29 | --jars YOUR_JAR_PATH 30 | ``` 31 | 32 | Register Class: 33 | 34 | ``` 35 | -streaming.plugin.clzznames tech.mlsql.plugins.shell.app.MLSQLShell 36 | ``` 37 | 38 | If there are more than one class, use comma to seperate them. For example: 39 | 40 | ``` 41 | -streaming.plugin.clzznames classA,classB,classC 42 | ``` 43 | 44 | ## Usage 45 | 46 | ```sql 47 | 48 | -- !plugin app remove "mlsql-assert-2.4"; 49 | -- !plugin app add - "mlsql-assert-2.4"; 50 | -- create test data 51 | set jsonStr=''' 52 | {"features":[5.1,3.5,1.4,0.2],"label":0.0}, 53 | {"features":[5.1,3.5,1.4,0.2],"label":1.0} 54 | {"features":[5.1,3.5,1.4,0.2],"label":0.0} 55 | {"features":[4.4,2.9,1.4,0.2],"label":0.0} 56 | {"features":[5.1,3.5,1.4,0.2],"label":1.0} 57 | {"features":[5.1,3.5,1.4,0.2],"label":0.0} 58 | {"features":[5.1,3.5,1.4,0.2],"label":0.0} 59 | {"features":[4.7,3.2,1.3,0.2],"label":1.0} 60 | {"features":[5.1,3.5,1.4,0.2],"label":0.0} 61 | {"features":[5.1,3.5,1.4,0.2],"label":0.0} 62 | '''; 63 | load jsonStr.`jsonStr` as data; 64 | select vec_dense(features) as features ,label as label from data 65 | as data1; 66 | 67 | -- use RandomForest 68 | train data1 as RandomForest.`/tmp/model` where 69 | 70 | -- once set true,every time you run this script, MLSQL will generate new directory for you model 71 | keepVersion="true" 72 | 73 | -- specicy the test dataset which will be used to feed evaluator to generate some metrics e.g. F1, Accurate 74 | and evaluateTable="data1" 75 | 76 | -- specify group 0 parameters 77 | and `fitParam.0.labelCol`="features" 78 | and `fitParam.0.featuresCol`="label" 79 | and `fitParam.0.maxDepth`="2" 80 | 81 | -- specify group 1 parameters 82 | and `fitParam.1.featuresCol`="features" 83 | and `fitParam.1.labelCol`="label" 84 | and `fitParam.1.maxDepth`="10" 85 | as model_result; 86 | 87 | select name,value from model_result where name="status" as result; 88 | -- make sure status of all models are success. 89 | !assert result ''':value=="success"''' "all model status should be success"; 90 | 91 | ``` 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | -------------------------------------------------------------------------------- /mlsql-assert/desc.plugin: -------------------------------------------------------------------------------- 1 | moduleName=mlsql-assert-3.0 2 | mainClass=tech.mlsql.plugins.assert.app.MLSQLAssert 3 | scala_version=2.12 4 | spark_version=3.0 5 | version=0.1.0-SNAPSHOT 6 | author=allwefantasy 7 | mlsqlVersions="" 8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-assert 9 | mlsqlPluginType=app 10 | desc=mlsql-shell 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /mlsql-assert/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | mlsql-plugins-3.0_2.12 7 | tech.mlsql 8 | 0.1.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | mlsql-assert-3.0_2.12 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | shade 21 | 22 | 23 | 24 | org.apache.maven.plugins 25 | maven-shade-plugin 26 | 3.2.0 27 | 28 | 29 | 30 | *:* 31 | 32 | META-INF/*.SF 33 | META-INF/*.DSA 34 | META-INF/*.RSA 35 | 36 | 37 | 38 | false 39 | 40 | 41 | org.apache.poi 42 | shadeio.poi 43 | 44 | 45 | com.norbitltd.spoiwo 46 | shadeio.spoiwo 47 | 48 | 49 | com.github.pjfanning 50 | shadeio.pjfanning 51 | 52 | 53 | org.apache.commons.compress 54 | shadeio.commons.compress 55 | 56 | 57 | 58 | 59 | 60 | 61 | package 62 | 63 | shade 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | -------------------------------------------------------------------------------- /mlsql-assert/src/main/java/tech/mlsql/plugins/assert/app/MLSQLAssert.scala: -------------------------------------------------------------------------------- 1 | package tech.mlsql.plugins.assert.app 2 | 3 | import tech.mlsql.common.utils.log.Logging 4 | import tech.mlsql.dsl.CommandCollection 5 | import tech.mlsql.ets.register.ETRegister 6 | import tech.mlsql.plugins.assert.ets.{Assert, MLSQLThrow} 7 | import tech.mlsql.version.VersionCompatibility 8 | 9 | /** 10 | * 4/6/2021 WilliamZhu(allwefantasy@gmail.com) 11 | */ 12 | class MLSQLAssert extends tech.mlsql.app.App with VersionCompatibility with Logging { 13 | override def run(args: Seq[String]): Unit = { 14 | ETRegister.register("Assert", classOf[Assert].getName) 15 | ETRegister.register("Throw", classOf[MLSQLThrow].getName) 16 | CommandCollection.refreshCommandMapping(Map("assert" -> 17 | """ 18 | |run command as Assert.`` where parameters='''{:all}''' 19 | |""".stripMargin)) 20 | CommandCollection.refreshCommandMapping(Map("throw" -> 21 | """ 22 | |run command as Throw.`` where msg='''{0}''' 23 | |""".stripMargin)) 24 | } 25 | 26 | 27 | override def supportedVersions: Seq[String] = { 28 | MLSQLAssert.versions 29 | } 30 | } 31 | 32 | object MLSQLAssert { 33 | val versions = Seq("2.1.0", "2.1.0-SNAPSHOT", "2.0.0", "2.0.1") 34 | } -------------------------------------------------------------------------------- /mlsql-assert/src/main/java/tech/mlsql/plugins/assert/ets/MLSQLThrow.scala: -------------------------------------------------------------------------------- 1 | package tech.mlsql.plugins.assert.ets 2 | 3 | import org.apache.spark.sql.expressions.UserDefinedFunction 4 | import org.apache.spark.sql.mlsql.session.MLSQLException 5 | import org.apache.spark.sql.{DataFrame, SparkSession} 6 | import streaming.dsl.auth.TableAuthResult 7 | import streaming.dsl.mmlib.algs.Functions 8 | import streaming.dsl.mmlib.algs.param.{BaseParams, WowParams} 9 | import streaming.dsl.mmlib._ 10 | import tech.mlsql.dsl.auth.ETAuth 11 | import tech.mlsql.dsl.auth.dsl.mmlib.ETMethod.ETMethod 12 | import tech.mlsql.plugins.assert.app.MLSQLAssert 13 | import tech.mlsql.version.VersionCompatibility 14 | 15 | /** 16 | * 4/9/2021 WilliamZhu(allwefantasy@gmail.com) 17 | */ 18 | class MLSQLThrow(override val uid: String) extends SQLAlg 19 | with VersionCompatibility with Functions with WowParams with ETAuth { 20 | def this() = this(BaseParams.randomUID()) 21 | 22 | override def train(df: DataFrame, path: String, params: Map[String, String]): DataFrame = { 23 | throw new RuntimeException(params("msg")) 24 | } 25 | 26 | override def batchPredict(df: DataFrame, path: String, params: Map[String, String]): DataFrame = train(df, path, params) 27 | 28 | override def skipPathPrefix: Boolean = true 29 | 30 | override def load(sparkSession: SparkSession, path: String, params: Map[String, String]): Any = throw new MLSQLException(s"${getClass.getName} not support register ") 31 | 32 | override def predict(sparkSession: SparkSession, _model: Any, name: String, params: Map[String, String]): UserDefinedFunction = throw new MLSQLException(s"${getClass.getName} not support register ") 33 | 34 | override def supportedVersions: Seq[String] = MLSQLAssert.versions 35 | 36 | override def auth(etMethod: ETMethod, path: String, params: Map[String, String]): List[TableAuthResult] = { 37 | List() 38 | } 39 | 40 | override def modelType: ModelType = ProcessType 41 | 42 | override def doc: Doc = Doc(HtmlDoc, 43 | """ 44 | | 45 | | This ET is used to stop the execute of the script. 46 | """.stripMargin) 47 | 48 | 49 | override def codeExample: Code = Code(SQLCode, 50 | """ 51 | | 52 | |!throw "exception msg"; 53 | |run command as Throw.`` where msg=""; 54 | """.stripMargin) 55 | } 56 | -------------------------------------------------------------------------------- /mlsql-bigdl/.repo/desc.template.plugin: -------------------------------------------------------------------------------- 1 | moduleName=mlsql-bigdl-{{spark_binary_version}} 2 | mainClass=tech.mlsql.plugins.bigdl.BigDLApp 3 | scala_version={{scala_binary_version}} 4 | spark_version={{spark_binary_version}} 5 | version=0.1.0-SNAPSHOT 6 | author=allwefantasy 7 | mlsqlVersions="1.5.0,1.5.0-SNAPSHOT,1.6.0,1.6.0-SNAPSHOT" 8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-bigdl 9 | mlsqlPluginType=app 10 | desc=bigdl 11 | 12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /mlsql-bigdl/README.md: -------------------------------------------------------------------------------- 1 | ## Install 2 | 3 | ``` 4 | !plugin app add - "mlsql-bigdl-2.4"; 5 | ``` 6 | 7 | ## Usage 8 | 9 | Check this [Doc](http://docs.mlsql.tech/zh/dl/load_image.html) 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /mlsql-bigdl/desc.plugin: -------------------------------------------------------------------------------- 1 | moduleName=mlsql-bigdl-3.0 2 | mainClass=tech.mlsql.plugins.bigdl.BigDLApp 3 | scala_version=2.12 4 | spark_version=3.0 5 | version=0.1.0-SNAPSHOT 6 | author=allwefantasy 7 | mlsqlVersions="1.5.0,1.5.0-SNAPSHOT,1.6.0,1.6.0-SNAPSHOT" 8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-bigdl 9 | mlsqlPluginType=app 10 | desc=bigdl 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /mlsql-bigdl/src/main/java/com/intel/analytics/bigdl/visualization/LogTrainSummary.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.intel.analytics.bigdl.visualization 20 | 21 | import org.apache.spark.internal.Logging 22 | import streaming.log.WowLog 23 | 24 | 25 | class LogTrainSummary(logDir: String, 26 | appName: String) extends TrainSummary(logDir, appName) with Logging with WowLog { 27 | 28 | override def addScalar(tag: String, value: Float, step: Long): LogTrainSummary.this.type = { 29 | // tag match { 30 | // case "Throughput" => 31 | // logInfo(format(s"global step: ${step} Throughput is ${value} records/second. ")) 32 | // case "Loss" => 33 | // logInfo(format(s"global step: ${step} Loss is ${value}")) 34 | // case _ => 35 | // logInfo(format(s"global step: ${step} ${tag} is ${value}")) 36 | // } 37 | 38 | super.addScalar(tag, value, step) 39 | } 40 | } 41 | 42 | class LogValidateSummary(logDir: String, 43 | appName: String) extends ValidationSummary(logDir, appName) with Logging with WowLog { 44 | override def addScalar(tag: String, value: Float, step: Long): LogValidateSummary.this.type = { 45 | //logInfo(format(s"global step: ${step} ${tag} is ${value}")) 46 | super.addScalar(tag, value, step) 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /mlsql-bigdl/src/main/java/com/intel/analytics/bigdl/visualization/WowFileWriter.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.intel.analytics.bigdl.visualization 20 | 21 | import com.intel.analytics.bigdl.visualization.tensorboard.FileWriter 22 | 23 | 24 | class WowFileWriter(folder: String) extends FileWriter(folder) { 25 | 26 | } 27 | -------------------------------------------------------------------------------- /mlsql-bigdl/src/main/java/tech/mlsql/plugins/bigdl/BigDLApp.scala: -------------------------------------------------------------------------------- 1 | package tech.mlsql.plugins.bigdl 2 | 3 | import tech.mlsql.ets.register.ETRegister 4 | import tech.mlsql.version.VersionCompatibility 5 | 6 | /** 7 | * 5/4/2020 WilliamZhu(allwefantasy@gmail.com) 8 | */ 9 | class BigDLApp extends tech.mlsql.app.App with VersionCompatibility { 10 | override def run(args: Seq[String]): Unit = { 11 | ETRegister.register("ImageLoaderExt", classOf[SQLImageLoaderExt].getName) 12 | ETRegister.register("MnistLoaderExt", classOf[SQLMnistLoaderExt].getName) 13 | ETRegister.register("BigDLClassifyExt", classOf[SQLBigDLClassifyExt].getName) 14 | ETRegister.register("LeNet5Ext", classOf[SQLLeNet5Ext].getName) 15 | } 16 | 17 | override def supportedVersions: Seq[String] = Seq("1.5.0-SNAPSHOT", "1.5.0", "1.6.0-SNAPSHOT", "1.6.0") 18 | } 19 | 20 | -------------------------------------------------------------------------------- /mlsql-bigdl/src/main/java/tech/mlsql/plugins/bigdl/WowClassNLLCriterion.scala: -------------------------------------------------------------------------------- 1 | package tech.mlsql.plugins.bigdl 2 | 3 | import com.intel.analytics.bigdl.nn.ClassNLLCriterion 4 | import com.intel.analytics.bigdl.tensor.Tensor 5 | import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric 6 | import streaming.dsl.mmlib.algs.bigdl.ClassWeightParamExtractor 7 | 8 | 9 | object WowClassNLLCriterion { 10 | def apply( 11 | paramsExtractor: ClassWeightParamExtractor 12 | )(implicit ev: TensorNumeric[Float]): ClassNLLCriterion[Float] = { 13 | val weights = paramsExtractor.weights.map(f => Tensor(f, Array(f.size))).getOrElse(null) 14 | new ClassNLLCriterion[Float](weights, 15 | paramsExtractor.sizeAverage.getOrElse(true), 16 | paramsExtractor.logProbAsInput.getOrElse(true), 17 | paramsExtractor.paddingValue.getOrElse(-1) 18 | ) 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /mlsql-canal/.repo/desc.template.plugin: -------------------------------------------------------------------------------- 1 | moduleName=mlsql-canal-{{spark_binary_version}} 2 | mainClass=tech.mlsql.plugins.canal.CanalApp 3 | scala_version={{scala_binary_version}} 4 | spark_version={{spark_binary_version}} 5 | version=0.1.0-SNAPSHOT 6 | author=zml1206 7 | mlsqlVersions="" 8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-canal 9 | mlsqlPluginType=app 10 | desc=mlsql-canal 11 | 12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /mlsql-canal/.repo/pom.template.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | mlsql-plugins-{{spark_binary_version}}_{{scala_binary_version}} 7 | tech.mlsql 8 | 0.1.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | mlsql-canal-{{spark_binary_version}}_{{scala_binary_version}} 13 | 14 | 15 | 16 | 17 | 18 | 19 | shade 20 | 21 | 22 | 23 | org.apache.maven.plugins 24 | maven-shade-plugin 25 | 3.2.0 26 | 27 | 28 | 29 | *:* 30 | 31 | META-INF/*.SF 32 | META-INF/*.DSA 33 | META-INF/*.RSA 34 | 35 | 36 | 37 | false 38 | 39 | 40 | org.apache.poi 41 | shadeio.poi 42 | 43 | 44 | com.norbitltd.spoiwo 45 | shadeio.spoiwo 46 | 47 | 48 | com.github.pjfanning 49 | shadeio.pjfanning 50 | 51 | 52 | org.apache.commons.compress 53 | shadeio.commons.compress 54 | 55 | 56 | 57 | 58 | 59 | 60 | package 61 | 62 | shade 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | -------------------------------------------------------------------------------- /mlsql-canal/README.md: -------------------------------------------------------------------------------- 1 | mlsql-canal 2 | 3 | Used in streaming, parse canal binlog, store it to delta lake, support ddl. 4 | Only support spark 3.X. 5 | 6 | ## Install 7 | 8 | ``` 9 | !plugin ds add - "mlsql-canal-3.0"; 10 | ``` 11 | 12 | or install as app: 13 | 14 | ``` 15 | !plugin app add "tech.mlsql.plugins.canal.CanalApp" "mlsql-canal-3.0"; 16 | ``` 17 | 18 | 19 | ## Usage 20 | 21 | ```sql 22 | set streamName="binlog_to_delta"; 23 | 24 | load kafka.`binlog-canal_test` 25 | options `kafka.bootstrap.servers` = "***" 26 | and `maxOffsetsPerTrigger`="600000" 27 | as kafka_record; 28 | 29 | select cast(value as string) as value from kafka_record 30 | as kafka_value; 31 | 32 | save append kafka_value 33 | as custom.`` 34 | options mode = "Append" 35 | and duration = "20" 36 | and sourceTable = "kafka_value" 37 | and checkpointLocation = "checkpoint/binlog_to_delta" 38 | and code = ''' 39 | run kafka_value 40 | as BinlogToDelta.`` 41 | options dbTable = "canal_test.test"; 42 | '''; 43 | ``` -------------------------------------------------------------------------------- /mlsql-canal/desc.plugin: -------------------------------------------------------------------------------- 1 | moduleName=mlsql-canal-3.0 2 | mainClass=tech.mlsql.plugins.canal.CanalApp 3 | scala_version=2.12 4 | spark_version=3.0 5 | version=0.1.0-SNAPSHOT 6 | author=zml1206 7 | mlsqlVersions="" 8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-canal 9 | mlsqlPluginType=app 10 | desc=mlsql-canal 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /mlsql-canal/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | mlsql-plugins-3.0_2.12 7 | tech.mlsql 8 | 0.1.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | mlsql-canal-3.0_2.12 13 | 14 | 15 | 16 | 17 | 18 | 19 | shade 20 | 21 | 22 | 23 | org.apache.maven.plugins 24 | maven-shade-plugin 25 | 3.2.0 26 | 27 | 28 | 29 | *:* 30 | 31 | META-INF/*.SF 32 | META-INF/*.DSA 33 | META-INF/*.RSA 34 | 35 | 36 | 37 | false 38 | 39 | 40 | org.apache.poi 41 | shadeio.poi 42 | 43 | 44 | com.norbitltd.spoiwo 45 | shadeio.spoiwo 46 | 47 | 48 | com.github.pjfanning 49 | shadeio.pjfanning 50 | 51 | 52 | org.apache.commons.compress 53 | shadeio.commons.compress 54 | 55 | 56 | 57 | 58 | 59 | 60 | package 61 | 62 | shade 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | -------------------------------------------------------------------------------- /mlsql-canal/src/main/scala/tech/mlsql/plugins/canal/CanalApp.scala: -------------------------------------------------------------------------------- 1 | package tech.mlsql.plugins.canal 2 | 3 | import tech.mlsql.ets.register.ETRegister 4 | import tech.mlsql.version.VersionCompatibility 5 | 6 | /** 7 | * Created by zhuml on 2021/6/11. 8 | */ 9 | class CanalApp extends tech.mlsql.app.App with VersionCompatibility { 10 | 11 | override def run(args: Seq[String]): Unit = { 12 | ETRegister.register("BinlogToDelta", "tech.mlsql.plugins.canal.ets.BinlogToDelta") 13 | } 14 | 15 | override def supportedVersions: Seq[String] = Seq("1.6.0-SNAPSHOT") 16 | 17 | } 18 | -------------------------------------------------------------------------------- /mlsql-canal/src/main/scala/tech/mlsql/plugins/canal/ets/BinlogToDelta.scala: -------------------------------------------------------------------------------- 1 | package tech.mlsql.plugins.canal.ets 2 | 3 | import org.apache.spark.ml.param.Param 4 | import org.apache.spark.sql.expressions.UserDefinedFunction 5 | import org.apache.spark.sql.mlsql.session.MLSQLException 6 | import org.apache.spark.sql.{DataFrame, SparkSession} 7 | import streaming.dsl.mmlib._ 8 | import streaming.dsl.mmlib.algs.param.WowParams 9 | import tech.mlsql.common.utils.log.Logging 10 | import tech.mlsql.plugins.canal.sink.{BinlogWritter, DeltaSink} 11 | 12 | /** 13 | * Created by zhuml on 2021/6/11. 14 | */ 15 | class BinlogToDelta(override val uid: String) extends SQLAlg with WowParams with Logging { 16 | 17 | override def train(df: DataFrame, path: String, params: Map[String, String]): DataFrame = { 18 | 19 | val spark = df.sparkSession 20 | 21 | params.get(dbTable.name) 22 | .map(m => set(dbTable, m)).getOrElse { 23 | throw new MLSQLException(s"${dbTable.name} is required") 24 | } 25 | params.get(maxTs.name) 26 | .map(m => set(maxTs, m)).getOrElse { 27 | set(maxTs, "0") 28 | } 29 | 30 | val sink = new DeltaSink(spark, $(dbTable)) 31 | new BinlogWritter(sink, df, $(maxTs).toLong).write 32 | 33 | spark.emptyDataFrame 34 | } 35 | 36 | override def load(sparkSession: SparkSession, path: String, params: Map[String, String]): Any = { 37 | throw new RuntimeException(s"${ 38 | getClass.getName 39 | } not support load function.") 40 | } 41 | 42 | override def predict(sparkSession: SparkSession, _model: Any, name: String, params: Map[String, 43 | String]): UserDefinedFunction = { 44 | throw new RuntimeException(s"${ 45 | getClass.getName 46 | } not support predict function.") 47 | } 48 | 49 | override def explainParams(sparkSession: SparkSession): DataFrame = { 50 | _explainParams(sparkSession) 51 | } 52 | 53 | final val dbTable: Param[String] = new Param[String](this, "dbTable", "db.table") 54 | final val maxTs: Param[String] = new Param[String](this, "maxTs", "delta table max ts") 55 | 56 | override def doc: Doc = Doc(MarkDownDoc, 57 | """ 58 | |BinlogToDelta CDC数据变更捕获解析同步delta模块 59 | | 60 | |```sql 61 | |run table as BinlogToDelta.`` 62 | |options daTable="a.b" 63 | | as t; 64 | |``` 65 | | 66 | """.stripMargin) 67 | 68 | override def modelType: ModelType = ProcessType 69 | 70 | def this() = this(WowParams.randomUID()) 71 | } 72 | 73 | 74 | -------------------------------------------------------------------------------- /mlsql-canal/src/main/scala/tech/mlsql/plugins/canal/mysql/JdbcTypeParser.scala: -------------------------------------------------------------------------------- 1 | package tech.mlsql.plugins.canal.mysql 2 | 3 | import com.alibaba.druid.sql.ast.{SQLDataType, SQLDataTypeImpl} 4 | import org.apache.spark.sql.types.{DataType, DecimalType, StructField, StructType} 5 | 6 | /** 7 | * Created by zhuml on 2021/6/11. 8 | */ 9 | object JdbcTypeParser { 10 | 11 | val UNSIGNED = """.*(unsigned)""".r 12 | 13 | // 判断是否为有符号数 14 | def isSigned(typeName: String) = { 15 | typeName.trim match { 16 | case UNSIGNED(unsigned) => false 17 | case _ => true 18 | } 19 | } 20 | 21 | val FIXED_DECIMAL = """decimal\(\s*(\d+)\s*,\s*(\-?\d+)\s*\)""".r 22 | val FIXED_NUMERIC = """numeric\(\s*(\d+)\s*,\s*(\-?\d+)\s*\)""".r 23 | val FIXED_SCALE = """\w*\(\s*(\d+)\s*\)""".r 24 | 25 | 26 | // decimal/numeric 数据类型 具有precision固定精度(最大位数)和scale小数位数(点右侧的位数)的十进制数。 27 | def parsePrecisionScale(name: String) = { 28 | name match { 29 | case "decimal" | "numeric" => Array(DecimalType.SYSTEM_DEFAULT.precision, DecimalType.SYSTEM_DEFAULT.scale) 30 | case FIXED_DECIMAL(precision, scale) => Array(precision.toInt, scale.toInt) 31 | case FIXED_NUMERIC(precision, scale) => Array(precision.toInt, scale.toInt) 32 | case FIXED_SCALE(scale) => Array(scale.toInt, 0) 33 | case _ => Array(0, 0) 34 | } 35 | } 36 | 37 | def getMysqlStructType(sqlTypeMap: Map[String, Int], mysqlTypeMap: Map[String, String]): StructType = { 38 | 39 | val fields = mysqlTypeMap.map(k => { 40 | val sqlType = sqlTypeMap(k._1) 41 | val Array(precision, scale) = parsePrecisionScale(k._2) 42 | val signed = isSigned(k._2) 43 | val columnType = getCatalystTypePrivate(sqlType, precision, scale, signed).asInstanceOf[DataType] 44 | StructField(k._1, columnType) 45 | }).toArray 46 | new StructType(fields) 47 | } 48 | 49 | def getSqlTypeCode(name: String): Integer = { 50 | val _type = """\w*""".r.findFirstIn(name).getOrElse("").toUpperCase 51 | MysqlType.valueOf(_type).getVendorTypeNumber 52 | } 53 | 54 | def sqlTypeToDataType(sqlDataType: SQLDataType): DataType = { 55 | val name = sqlDataType.getName 56 | val Array(precision, scale) = parsePrecisionScale(name) 57 | val sqlType = getSqlTypeCode(name) 58 | getCatalystTypePrivate(sqlType, precision, scale, !sqlDataType.asInstanceOf[SQLDataTypeImpl].isUnsigned).asInstanceOf[DataType] 59 | } 60 | 61 | // JDBC type to Catalyst type 62 | lazy val getCatalystTypePrivate = { 63 | import scala.reflect.runtime.{universe => ru} 64 | val classMirror = ru.runtimeMirror(getClass.getClassLoader) 65 | val JdbcUtils = classMirror.staticModule("org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils") 66 | val methods = classMirror.reflectModule(JdbcUtils) 67 | val instanceMirror = classMirror.reflect(methods.instance) 68 | val method = methods.symbol.typeSignature.member(ru.TermName("getCatalystType")).asMethod 69 | 70 | instanceMirror.reflectMethod(method) 71 | } 72 | 73 | 74 | } 75 | -------------------------------------------------------------------------------- /mlsql-canal/src/main/scala/tech/mlsql/plugins/canal/sink/BinlogConstants.scala: -------------------------------------------------------------------------------- 1 | package tech.mlsql.plugins.canal.sink 2 | 3 | /** 4 | * Created by zhuml on 2021/6/11. 5 | */ 6 | object BinlogConstants { 7 | 8 | val TS_FIELD = "___ts___" 9 | val DELETE_FIELD = "___delete___" 10 | } 11 | 12 | case class BinlogRecord(data: Array[Map[String, String]], 13 | database: String, 14 | es: String, 15 | id: Long, 16 | isDdl: Boolean, 17 | mysqlType: Map[String, String], 18 | old: Array[Map[String, String]], 19 | pkNames: Array[String], 20 | sql: String, 21 | sqlType: Map[String, Int], 22 | table: String, 23 | ts: Long, 24 | `type`: String) 25 | -------------------------------------------------------------------------------- /mlsql-canal/src/main/scala/tech/mlsql/plugins/canal/sink/BinlogWritter.scala: -------------------------------------------------------------------------------- 1 | package tech.mlsql.plugins.canal.sink 2 | 3 | import org.apache.spark.sql.DataFrame 4 | import tech.mlsql.plugins.canal.mysql.statement.DDLStatementParser 5 | import tech.mlsql.plugins.canal.util.JacksonUtil 6 | 7 | /** 8 | * Created by zhuml on 2021/6/11. 9 | */ 10 | class BinlogWritter(@transient sink: Sink, df: DataFrame, maxTs: Long) extends Serializable { 11 | 12 | val spark = df.sparkSession 13 | 14 | def write = { 15 | sink.addTsIfNotExsit 16 | val filterDF = filter() 17 | //segment merge by ddl 18 | val ddls = filterDF.filter(r => r.isDdl && Array("ALTER", "TRUNCATE").contains(r.`type`)).collect() 19 | val dmlDS = filterDF.filter(r => !r.isDdl && Array("INSERT", "UPDATE", "DELETE").contains(r.`type`.toUpperCase)) 20 | var tsMin = 0L 21 | var tsMax = 0L 22 | ddls.foreach(ddl => { 23 | val ddlParser = new DDLStatementParser(sink.tableLoad, ddl.sql) 24 | ddlParser.parseDF() 25 | if (ddlParser.isUpdate) { 26 | tsMax = ddl.ts 27 | sink.mergeData(dmlDS.filter(r => r.ts >= tsMin && r.ts < tsMax)) 28 | sink.updateSchema(ddlParser.df) 29 | tsMin = tsMax 30 | } 31 | }) 32 | sink.mergeData(dmlDS.filter(r => r.ts >= tsMin)) 33 | } 34 | 35 | def filter() = { 36 | import spark.implicits._ 37 | val table = sink.table 38 | df.map(r => JacksonUtil.fromJson(r.getString(0), classOf[BinlogRecord])) 39 | .filter(r => r.ts >= maxTs 40 | && s"${r.database}.${r.table}".equals(table)) 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /mlsql-canal/src/main/scala/tech/mlsql/plugins/canal/sink/DeltaSink.scala: -------------------------------------------------------------------------------- 1 | package tech.mlsql.plugins.canal.sink 2 | 3 | import io.delta.tables.DeltaTable 4 | import org.apache.spark.sql.{DataFrame, Dataset, SparkSession} 5 | import tech.mlsql.common.utils.path.PathFun 6 | import tech.mlsql.datalake.DataLake 7 | 8 | /** 9 | * Created by zhuml on 2021/6/11. 10 | */ 11 | class DeltaSink(spark: SparkSession, dbTable: String) extends Sink(dbTable: String) { 12 | 13 | val dataLake = new DataLake(spark) 14 | 15 | val finalPath = if (dataLake.isEnable) { 16 | dataLake.identifyToPath(dbTable) 17 | } else { 18 | PathFun(dbTable).add(dbTable).toPath 19 | } 20 | 21 | override def tableLoad() = spark.read.format("delta").load(finalPath) 22 | 23 | override def updateSchema(df: DataFrame): Unit = { 24 | df.write 25 | .format("delta") 26 | .mode("overwrite") 27 | .option("overwriteSchema", "true") 28 | .save(finalPath) 29 | } 30 | 31 | override def mergeData(ds: Dataset[BinlogRecord]): Unit = { 32 | val records = ds.take(1) 33 | if (records.length > 0) { 34 | val record = records(0) 35 | val schema = tableLoad.drop(BinlogConstants.TS_FIELD).schema 36 | val changesDF = duplicate(ds, schema) 37 | mergeToDelta(changesDF, record.pkNames, BinlogConstants.DELETE_FIELD) 38 | } 39 | } 40 | 41 | def mergeToDelta(df: DataFrame, pkNames: Array[String], deleteField: String): Unit = { 42 | val deltaTable = DeltaTable.forPath(spark, finalPath) 43 | val condition = pkNames.map(pk => s"s.${pk} = t.${pk}").mkString(" and ") 44 | deltaTable.as("t") 45 | .merge( 46 | df.as("s"), condition) 47 | .whenMatched(s"s.${deleteField} = true") 48 | .delete() 49 | .whenMatched().updateAll() 50 | .whenNotMatched(s"s.${deleteField} = false").insertAll() 51 | .execute() 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /mlsql-canal/src/main/scala/tech/mlsql/plugins/canal/sink/Sink.scala: -------------------------------------------------------------------------------- 1 | package tech.mlsql.plugins.canal.sink 2 | 3 | import org.apache.spark.sql.functions._ 4 | import org.apache.spark.sql.types._ 5 | import org.apache.spark.sql.{DataFrame, Dataset, functions => F} 6 | import tech.mlsql.plugins.canal.util.JacksonUtil 7 | 8 | /** 9 | * Created by zhuml on 2021/6/11. 10 | */ 11 | abstract class Sink(val table: String) { 12 | 13 | def tableLoad: DataFrame 14 | 15 | def updateSchema(df: DataFrame) 16 | 17 | def mergeData(ds: Dataset[BinlogRecord]) 18 | 19 | def addTsIfNotExsit = { 20 | { 21 | val df = tableLoad 22 | if (!df.schema.fieldNames.contains(BinlogConstants.TS_FIELD)) { 23 | updateSchema(df.withColumn(BinlogConstants.TS_FIELD, typedLit[Long](0))) 24 | } 25 | } 26 | } 27 | 28 | // duplicate binlog and parser data 29 | def duplicate(df: Dataset[BinlogRecord], 30 | schema: StructType): DataFrame = { 31 | import df.sparkSession.implicits._ 32 | val schemaMap = schema.fields.map(s => s.name -> s.dataType).toMap 33 | 34 | val f = F.udf((dataJson: String) => { 35 | val dataMap = JacksonUtil.fromJson(dataJson, classOf[Map[String, String]]) 36 | .map(data => { 37 | if (data._2 != null) { 38 | schemaMap.get(data._1) match { 39 | case Some(IntegerType) => (data._1, data._2.toInt) 40 | case Some(LongType) => (data._1, data._2.toLong) 41 | case Some(DoubleType) => (data._1, data._2.toDouble) 42 | case Some(FloatType) => (data._1, data._2.toFloat) 43 | case _ => data 44 | } 45 | } else { 46 | data 47 | } 48 | }) 49 | JacksonUtil.toJson(dataMap) 50 | }) 51 | 52 | df.flatMap(r => { 53 | r.data.map(data => { 54 | (r.pkNames.map(data.get(_)), (r.ts, r.`type`, JacksonUtil.toJson(data))) 55 | }) 56 | }).groupBy("_1").agg(max("_2").as("latest")) 57 | .withColumn(("data"), f(F.col("latest._3"))) 58 | .select(from_json($"data", schema).as("data"), $"latest._1".as(BinlogConstants.TS_FIELD), $"latest._2".as(BinlogConstants.DELETE_FIELD)) 59 | .selectExpr("data.*", s"${BinlogConstants.TS_FIELD}", s"if(${BinlogConstants.DELETE_FIELD}='DELETE',true,false) as ${BinlogConstants.DELETE_FIELD}") 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /mlsql-canal/src/main/scala/tech/mlsql/plugins/canal/util/JacksonUtil.scala: -------------------------------------------------------------------------------- 1 | package tech.mlsql.plugins.canal.util 2 | 3 | import com.fasterxml.jackson.databind.ObjectMapper 4 | import com.fasterxml.jackson.module.scala.DefaultScalaModule 5 | 6 | import scala.util.control.NonFatal 7 | 8 | object JacksonUtil { 9 | 10 | private val _mapper = new ObjectMapper() 11 | _mapper.registerModule(DefaultScalaModule) 12 | 13 | def toJson[T](obj: T): String = { 14 | _mapper.writeValueAsString(obj) 15 | } 16 | 17 | def fromJson[T](json: String, `class`: Class[T]): T = { 18 | try { 19 | _mapper.readValue(json, `class`) 20 | } catch { 21 | case NonFatal(e) => 22 | null.asInstanceOf[T] 23 | } 24 | } 25 | 26 | def prettyPrint[T](obj: T): String = { 27 | _mapper.writerWithDefaultPrettyPrinter().writeValueAsString(obj) 28 | } 29 | 30 | } 31 | -------------------------------------------------------------------------------- /mlsql-cli/.repo/desc.template.plugin: -------------------------------------------------------------------------------- 1 | moduleName=mlsql-cli-{{spark_binary_version}} 2 | mainClass=tech.mlsql.plugins.cli.app.MLSQLCli 3 | scala_version={{scala_binary_version}} 4 | spark_version={{spark_binary_version}} 5 | version=0.1.0-SNAPSHOT 6 | author=allwefantasy 7 | mlsqlVersions="" 8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-cli 9 | mlsqlPluginType=app 10 | desc=mlsql-cli 11 | 12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /mlsql-cli/desc.plugin: -------------------------------------------------------------------------------- 1 | moduleName=mlsql-cli-3.0 2 | mainClass=tech.mlsql.plugins.cli.app.MLSQLCli 3 | scala_version=2.12 4 | spark_version=3.0 5 | version=0.1.0-SNAPSHOT 6 | author=allwefantasy 7 | mlsqlVersions="" 8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-cli 9 | mlsqlPluginType=app 10 | desc=mlsql-cli 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /mlsql-cli/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | mlsql-plugins-3.0_2.12 7 | tech.mlsql 8 | 0.1.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | mlsql-cli-3.0_2.12 13 | 14 | 15 | 16 | shade 17 | 18 | 19 | 20 | org.apache.maven.plugins 21 | maven-shade-plugin 22 | 3.2.0 23 | 24 | 25 | 26 | *:* 27 | 28 | META-INF/*.SF 29 | META-INF/*.DSA 30 | META-INF/*.RSA 31 | 32 | 33 | 34 | false 35 | 36 | 37 | org.apache.poi 38 | shadeio.poi 39 | 40 | 41 | com.norbitltd.spoiwo 42 | shadeio.spoiwo 43 | 44 | 45 | com.github.pjfanning 46 | shadeio.pjfanning 47 | 48 | 49 | org.apache.commons.compress 50 | shadeio.commons.compress 51 | 52 | 53 | 54 | 55 | 56 | 57 | package 58 | 59 | shade 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | info.picocli 72 | picocli 73 | 4.0.1 74 | 75 | 76 | 77 | -------------------------------------------------------------------------------- /mlsql-cli/src/main/java/tech/mlsql/plugin/cli/app/CliCommands.java: -------------------------------------------------------------------------------- 1 | package tech.mlsql.plugin.cli.app; 2 | 3 | /** 4 | * 25/8/2021 WilliamZhu(allwefantasy@gmail.com) 5 | */ 6 | public class CliCommands { 7 | public static final String DEFAULT = "default"; 8 | public static final String HELP = "help"; 9 | public static final String VERSION = "version"; 10 | public static final String RUN = "run"; 11 | public static final String HOME = "home"; 12 | } 13 | -------------------------------------------------------------------------------- /mlsql-cli/src/main/java/tech/mlsql/plugin/cli/app/CliException.java: -------------------------------------------------------------------------------- 1 | package tech.mlsql.plugin.cli.app; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | /** 7 | * 25/8/2021 WilliamZhu(allwefantasy@gmail.com) 8 | */ 9 | public class CliException extends RuntimeException { 10 | private List detailedMessages = new ArrayList<>(); 11 | 12 | public List getDetailedMessages() { 13 | return detailedMessages; 14 | } 15 | 16 | void addMessage(String message) { 17 | detailedMessages.add(message); 18 | } 19 | 20 | public List getMessages() { 21 | return detailedMessages; 22 | } 23 | } 24 | 25 | 26 | -------------------------------------------------------------------------------- /mlsql-cli/src/main/java/tech/mlsql/plugin/cli/app/CliExceptionUtils.java: -------------------------------------------------------------------------------- 1 | package tech.mlsql.plugin.cli.app; 2 | 3 | /** 4 | * 25/8/2021 WilliamZhu(allwefantasy@gmail.com) 5 | */ 6 | public class CliExceptionUtils { 7 | public static CliException createUsageExceptionWithHelp(String errorMsg) { 8 | CliException launcherException = new CliException(); 9 | launcherException.addMessage("mlsql: " + errorMsg); 10 | launcherException.addMessage("Run 'mlsql help' for usage."); 11 | return launcherException; 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /mlsql-cli/src/main/java/tech/mlsql/plugin/cli/app/MLSQLCli.java: -------------------------------------------------------------------------------- 1 | package tech.mlsql.plugin.cli.app; 2 | 3 | import picocli.CommandLine; 4 | import tech.mlsql.core.version.MLSQLVersion; 5 | import tech.mlsql.core.version.VersionInfo; 6 | 7 | import java.io.PrintStream; 8 | import java.util.ArrayList; 9 | import java.util.List; 10 | 11 | /** 12 | * 25/8/2021 WilliamZhu(allwefantasy@gmail.com) 13 | */ 14 | public class MLSQLCli { 15 | 16 | private static PrintStream errStream = System.err; 17 | private static PrintStream outStream = System.out; 18 | 19 | public static void main(String[] args) { 20 | 21 | } 22 | 23 | @CommandLine.Command(description = "Default Command.", name = "default") 24 | private static class DefaultCmd implements MLSQLCmd { 25 | 26 | @CommandLine.Option(names = {"--help", "-h", "?"}, hidden = true, description = "for more information") 27 | private boolean helpFlag; 28 | 29 | @CommandLine.Option(names = {"--version", "-v"}, hidden = true) 30 | private boolean versionFlag; 31 | 32 | @CommandLine.Parameters(arity = "0..1") 33 | private List argList = new ArrayList<>(); 34 | 35 | @Override 36 | public void execute() { 37 | if (versionFlag) { 38 | printVersionInfo(); 39 | return; 40 | } 41 | 42 | if (!argList.isEmpty()) { 43 | printUsageInfo(argList.get(0)); 44 | return; 45 | } 46 | 47 | printUsageInfo(CliCommands.HELP); 48 | } 49 | 50 | @Override 51 | public String getName() { 52 | return "default"; 53 | } 54 | 55 | @Override 56 | public void printLongDesc(StringBuilder out) { 57 | 58 | } 59 | 60 | @Override 61 | public void printUsage(StringBuilder out) { 62 | 63 | } 64 | 65 | @Override 66 | public void setParentCmdParser(CommandLine parentCmdParser) { 67 | } 68 | } 69 | 70 | private static void printUsageInfo(String commandName) { 71 | String usageInfo = MLSQLCmd.getCommandUsageInfo(commandName); 72 | errStream.println(usageInfo); 73 | } 74 | 75 | private static void printVersionInfo() { 76 | VersionInfo verison = MLSQLVersion.version(); 77 | String output = "MLSQL: " + verison.version() + "; Spark Core: None"; 78 | outStream.print(output); 79 | } 80 | } 81 | 82 | 83 | -------------------------------------------------------------------------------- /mlsql-cli/src/main/java/tech/mlsql/plugin/cli/app/MLSQLCmd.java: -------------------------------------------------------------------------------- 1 | package tech.mlsql.plugin.cli.app; 2 | 3 | import picocli.CommandLine; 4 | 5 | import java.io.BufferedReader; 6 | import java.io.IOException; 7 | import java.io.InputStream; 8 | import java.io.InputStreamReader; 9 | import java.nio.charset.StandardCharsets; 10 | 11 | public interface MLSQLCmd { 12 | 13 | 14 | void execute(); 15 | 16 | 17 | String getName(); 18 | 19 | 20 | void printLongDesc(StringBuilder out); 21 | 22 | 23 | void printUsage(StringBuilder out); 24 | 25 | 26 | void setParentCmdParser(CommandLine parentCmdParser); 27 | 28 | 29 | static String getCommandUsageInfo(String commandName) { 30 | if (commandName == null) { 31 | throw CliExceptionUtils.createUsageExceptionWithHelp("invalid command"); 32 | } 33 | 34 | String fileName = "cli-help/mlsql-" + commandName + ".help"; 35 | try { 36 | return readFileAsString(fileName); 37 | } catch (IOException e) { 38 | throw CliExceptionUtils.createUsageExceptionWithHelp("usage info not available for command: " + commandName); 39 | } 40 | } 41 | 42 | static String readFileAsString(String path) throws IOException { 43 | InputStream is = ClassLoader.getSystemResourceAsStream(path); 44 | InputStreamReader inputStreamREader = null; 45 | BufferedReader br = null; 46 | StringBuilder sb = new StringBuilder(); 47 | try { 48 | inputStreamREader = new InputStreamReader(is, StandardCharsets.UTF_8); 49 | br = new BufferedReader(inputStreamREader); 50 | String content = br.readLine(); 51 | if (content == null) { 52 | return sb.toString(); 53 | } 54 | 55 | sb.append(content); 56 | 57 | while ((content = br.readLine()) != null) { 58 | sb.append('\n').append(content); 59 | } 60 | } finally { 61 | if (inputStreamREader != null) { 62 | try { 63 | inputStreamREader.close(); 64 | } catch (IOException ignore) { 65 | } 66 | } 67 | if (br != null) { 68 | try { 69 | br.close(); 70 | } catch (IOException ignore) { 71 | } 72 | } 73 | } 74 | return sb.toString(); 75 | } 76 | } -------------------------------------------------------------------------------- /mlsql-cube/.repo/desc.template.plugin: -------------------------------------------------------------------------------- 1 | moduleName=mlsql-cube-{{spark_binary_version}} 2 | mainClass=tech.mlsql.plugins.shell.app.MLSQLCube 3 | scala_version={{scala_binary_version}} 4 | spark_version={{spark_binary_version}} 5 | version=0.1.0-SNAPSHOT 6 | author=allwefantasy 7 | mlsqlVersions="" 8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-cube 9 | mlsqlPluginType=app 10 | desc=mlsql-shell 11 | 12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /mlsql-cube/.repo/pom.template.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | mlsql-plugins-{{spark_binary_version}}_{{scala_binary_version}} 7 | tech.mlsql 8 | 0.1.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | mlsql-cube-{{spark_binary_version}}_{{scala_binary_version}} 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | shade 21 | 22 | 23 | 24 | org.apache.maven.plugins 25 | maven-shade-plugin 26 | 3.2.0 27 | 28 | 29 | 30 | *:* 31 | 32 | META-INF/*.SF 33 | META-INF/*.DSA 34 | META-INF/*.RSA 35 | 36 | 37 | 38 | false 39 | 40 | 41 | org.apache.poi 42 | shadeio.poi 43 | 44 | 45 | com.norbitltd.spoiwo 46 | shadeio.spoiwo 47 | 48 | 49 | com.github.pjfanning 50 | shadeio.pjfanning 51 | 52 | 53 | org.apache.commons.compress 54 | shadeio.commons.compress 55 | 56 | 57 | 58 | 59 | 60 | 61 | package 62 | 63 | shade 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | -------------------------------------------------------------------------------- /mlsql-cube/desc.plugin: -------------------------------------------------------------------------------- 1 | moduleName=mlsql-cube-3.0 2 | mainClass=tech.mlsql.plugins.shell.app.MLSQLCube 3 | scala_version=2.12 4 | spark_version=3.0 5 | version=0.1.0-SNAPSHOT 6 | author=allwefantasy 7 | mlsqlVersions="" 8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-cube 9 | mlsqlPluginType=app 10 | desc=mlsql-shell 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /mlsql-cube/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | mlsql-plugins-3.0_2.12 7 | tech.mlsql 8 | 0.1.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | mlsql-cube-3.0_2.12 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | shade 21 | 22 | 23 | 24 | org.apache.maven.plugins 25 | maven-shade-plugin 26 | 3.2.0 27 | 28 | 29 | 30 | *:* 31 | 32 | META-INF/*.SF 33 | META-INF/*.DSA 34 | META-INF/*.RSA 35 | 36 | 37 | 38 | false 39 | 40 | 41 | org.apache.poi 42 | shadeio.poi 43 | 44 | 45 | com.norbitltd.spoiwo 46 | shadeio.spoiwo 47 | 48 | 49 | com.github.pjfanning 50 | shadeio.pjfanning 51 | 52 | 53 | org.apache.commons.compress 54 | shadeio.commons.compress 55 | 56 | 57 | 58 | 59 | 60 | 61 | package 62 | 63 | shade 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | -------------------------------------------------------------------------------- /mlsql-ds/.repo/desc.template.plugin: -------------------------------------------------------------------------------- 1 | moduleName=mlsql-ds-{{spark_binary_version}} 2 | mainClass=tech.mlsql.plugins.ds.app.MLSQLDs 3 | scala_version={{scala_binary_version}} 4 | spark_version={{spark_binary_version}} 5 | version=0.1.0-SNAPSHOT 6 | author=allwefantasy 7 | mlsqlVersions="" 8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-ds 9 | mlsqlPluginType=app 10 | desc=ds 11 | 12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /mlsql-ds/desc.plugin: -------------------------------------------------------------------------------- 1 | moduleName=mlsql-ds-3.0 2 | mainClass=tech.mlsql.plugins.ds.app.MLSQLDs 3 | scala_version=2.12 4 | spark_version=3.0 5 | version=0.1.0-SNAPSHOT 6 | author=allwefantasy 7 | mlsqlVersions="" 8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-ds 9 | mlsqlPluginType=app 10 | desc=ds 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /mlsql-ds/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | mlsql-plugins-3.0_2.12 7 | tech.mlsql 8 | 0.1.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | mlsql-ds-3.0_2.12 13 | 14 | 15 | com.databricks 16 | spark-xml_2.12 17 | 0.13.0 18 | 19 | 20 | 21 | 22 | 23 | 24 | shade 25 | 26 | 27 | 28 | org.apache.maven.plugins 29 | maven-shade-plugin 30 | 3.2.0 31 | 32 | 33 | 34 | *:* 35 | 36 | META-INF/*.SF 37 | META-INF/*.DSA 38 | META-INF/*.RSA 39 | 40 | 41 | 42 | false 43 | 44 | 45 | org.apache.poi 46 | shadeio.poi 47 | 48 | 49 | com.norbitltd.spoiwo 50 | shadeio.spoiwo 51 | 52 | 53 | com.github.pjfanning 54 | shadeio.pjfanning 55 | 56 | 57 | org.apache.commons.compress 58 | shadeio.commons.compress 59 | 60 | 61 | 62 | 63 | 64 | 65 | package 66 | 67 | shade 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | -------------------------------------------------------------------------------- /mlsql-ds/src/main/java/tech/mlsql/plugins/ds/app/MLSQLDs.scala: -------------------------------------------------------------------------------- 1 | package tech.mlsql.plugins.ds.app 2 | 3 | import streaming.core.datasource.MLSQLRegistry 4 | import tech.mlsql.common.utils.classloader.ClassLoaderTool 5 | import tech.mlsql.common.utils.log.Logging 6 | import tech.mlsql.version.VersionCompatibility 7 | 8 | /** 9 | * 1/6/2021 WilliamZhu(allwefantasy@gmail.com) 10 | */ 11 | class MLSQLDs extends tech.mlsql.app.App with VersionCompatibility with Logging { 12 | override def run(args: Seq[String]): Unit = { 13 | registerDS(classOf[MLSQLXml].getName) 14 | } 15 | 16 | 17 | def registerDS(name: String) = { 18 | val dataSource = ClassLoaderTool.classForName(name).newInstance() 19 | if (dataSource.isInstanceOf[MLSQLRegistry]) { 20 | dataSource.asInstanceOf[MLSQLRegistry].register() 21 | } 22 | } 23 | 24 | override def supportedVersions: Seq[String] = { 25 | MLSQLDs.versions 26 | } 27 | } 28 | 29 | object MLSQLDs { 30 | val versions = Seq(">=2.1.0") 31 | } -------------------------------------------------------------------------------- /mlsql-ds/src/main/java/tech/mlsql/plugins/ds/app/MLSQLXml.scala: -------------------------------------------------------------------------------- 1 | package tech.mlsql.plugins.ds.app 2 | 3 | import org.apache.spark.sql.SparkSession 4 | import streaming.core.datasource._ 5 | import streaming.dsl.ScriptSQLExec 6 | import streaming.dsl.mmlib.algs.param.{BaseParams, WowParams} 7 | import tech.mlsql.version.VersionCompatibility 8 | 9 | /** 10 | * 29/9/2021 WilliamZhu(allwefantasy@gmail.com) 11 | */ 12 | class MLSQLXml(override val uid: String) 13 | extends MLSQLBaseFileSource 14 | with WowParams with VersionCompatibility { 15 | def this() = this(BaseParams.randomUID()) 16 | 17 | override def sourceInfo(config: DataAuthConfig): SourceInfo = { 18 | val context = ScriptSQLExec.contextGetOrForTest() 19 | val owner = config.config.get("owner").getOrElse(context.owner) 20 | SourceInfo(shortFormat, "", resourceRealPath(context.execListener, Option(owner), config.path)) 21 | } 22 | 23 | override def explainParams(spark: SparkSession) = { 24 | _explainParams(spark) 25 | } 26 | 27 | override def register(): Unit = { 28 | DataSourceRegistry.register(MLSQLDataSourceKey(fullFormat, MLSQLSparkDataSourceType), this) 29 | DataSourceRegistry.register(MLSQLDataSourceKey(shortFormat, MLSQLSparkDataSourceType), this) 30 | } 31 | 32 | override def fullFormat: String = "com.databricks.spark.xml" 33 | 34 | override def shortFormat: String = "xml" 35 | 36 | override def supportedVersions: Seq[String] = { 37 | MLSQLDs.versions 38 | } 39 | } -------------------------------------------------------------------------------- /mlsql-excel/.repo/desc.template.plugin: -------------------------------------------------------------------------------- 1 | moduleName=mlsql-excel-{{spark_binary_version}} 2 | mainClass=tech.mlsql.plugins.ds.MLSQLExcel 3 | scala_version={{scala_binary_version}} 4 | spark_version={{spark_binary_version}} 5 | version=0.1.0-SNAPSHOT 6 | author=allwefantasy 7 | mlsqlVersions="1.5.0,1.5.0-SNAPSHOT,1.6.0,1.6.0-SNAPSHOT" 8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-excel 9 | mlsqlPluginType=ds 10 | desc=excel 11 | 12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /mlsql-excel/README.md: -------------------------------------------------------------------------------- 1 | ## Install 2 | 3 | ``` 4 | !plugin ds add - "mlsql-excel-2.4"; 5 | ``` 6 | 7 | or install as app: 8 | 9 | ``` 10 | !plugin app add "tech.mlsql.plugins.ds.MLSQLApp" "mlsql-excel-2.4"; 11 | ``` 12 | 13 | 14 | ## Usage 15 | 16 | ```sql 17 | load excel.`/tmp/upload/example_en.xlsx` 18 | where useHeader="true" and 19 | maxRowsInMemory="100" 20 | and dataAddress="A1:C8" 21 | as data; 22 | 23 | select * from data as output; 24 | ``` 25 | 26 | 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /mlsql-excel/desc.plugin: -------------------------------------------------------------------------------- 1 | moduleName=mlsql-excel-3.0 2 | mainClass=tech.mlsql.plugins.ds.MLSQLExcel 3 | scala_version=2.12 4 | spark_version=3.0 5 | version=0.1.0-SNAPSHOT 6 | author=allwefantasy 7 | mlsqlVersions="1.5.0,1.5.0-SNAPSHOT,1.6.0,1.6.0-SNAPSHOT" 8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-excel 9 | mlsqlPluginType=ds 10 | desc=excel 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /mlsql-excel/src/main/java/com/crealytics/spark/excel/DefaultSource.scala: -------------------------------------------------------------------------------- 1 | package com.crealytics.spark.excel 2 | 3 | import org.apache.hadoop.fs.Path 4 | import org.apache.spark.sql.sources._ 5 | import org.apache.spark.sql.types.StructType 6 | import org.apache.spark.sql.{DataFrame, SQLContext, SaveMode} 7 | 8 | class DefaultSource extends RelationProvider with SchemaRelationProvider with CreatableRelationProvider { 9 | 10 | /** Creates a new relation for retrieving data from an Excel file 11 | */ 12 | override def createRelation(sqlContext: SQLContext, parameters: Map[String, String]): ExcelRelation = 13 | createRelation(sqlContext, parameters, null) 14 | 15 | /** Creates a new relation for retrieving data from an Excel file 16 | */ 17 | override def createRelation( 18 | sqlContext: SQLContext, 19 | parameters: Map[String, String], 20 | schema: StructType 21 | ): ExcelRelation = { 22 | val wbReader = WorkbookReader(parameters, sqlContext.sparkContext.hadoopConfiguration) 23 | val dataLocator = DataLocator(parameters) 24 | ExcelRelation( 25 | header = checkParameter(parameters, "header").toBoolean, 26 | treatEmptyValuesAsNulls = parameters.get("treatEmptyValuesAsNulls").fold(false)(_.toBoolean), 27 | usePlainNumberFormat = parameters.get("usePlainNumberFormat").fold(false)(_.toBoolean), 28 | userSchema = Option(schema), 29 | inferSheetSchema = parameters.get("inferSchema").fold(false)(_.toBoolean), 30 | addColorColumns = parameters.get("addColorColumns").fold(false)(_.toBoolean), 31 | timestampFormat = parameters.get("timestampFormat"), 32 | excerptSize = parameters.get("excerptSize").fold(10)(_.toInt), 33 | dataLocator = dataLocator, 34 | workbookReader = wbReader 35 | )(sqlContext) 36 | } 37 | 38 | override def createRelation( 39 | sqlContext: SQLContext, 40 | mode: SaveMode, 41 | parameters: Map[String, String], 42 | data: DataFrame 43 | ): BaseRelation = { 44 | val path = checkParameter(parameters, "path") 45 | val header = checkParameter(parameters, "header").toBoolean 46 | val filesystemPath = new Path(path) 47 | val fs = filesystemPath.getFileSystem(sqlContext.sparkContext.hadoopConfiguration) 48 | new ExcelFileSaver( 49 | fs, 50 | filesystemPath, 51 | data, 52 | saveMode = mode, 53 | header = header, 54 | dataLocator = DataLocator(parameters) 55 | ).save() 56 | 57 | createRelation(sqlContext, parameters, data.schema) 58 | } 59 | 60 | // Forces a Parameter to exist, otherwise an exception is thrown. 61 | private def checkParameter(map: Map[String, String], param: String): String = { 62 | if (!map.contains(param)) { 63 | throw new IllegalArgumentException(s"Parameter ${'"'}$param${'"'} is missing in options.") 64 | } else { 65 | map.apply(param) 66 | } 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /mlsql-excel/src/main/java/com/crealytics/spark/excel/DefaultSource15.scala: -------------------------------------------------------------------------------- 1 | package com.crealytics.spark.excel 2 | 3 | import org.apache.spark.sql.sources.DataSourceRegister 4 | 5 | class DefaultSource15 extends DefaultSource with DataSourceRegister { 6 | override def shortName(): String = "excel" 7 | } 8 | -------------------------------------------------------------------------------- /mlsql-excel/src/main/java/com/crealytics/spark/excel/ExcelFileSaver.scala: -------------------------------------------------------------------------------- 1 | package com.crealytics.spark.excel 2 | 3 | import com.norbitltd.spoiwo.model._ 4 | import com.norbitltd.spoiwo.natures.streaming.xlsx.Model2XlsxConversions._ 5 | import org.apache.hadoop.fs.{FSDataInputStream, FileSystem, Path} 6 | import org.apache.poi.xssf.usermodel.XSSFWorkbook 7 | import org.apache.spark.sql.{DataFrame, SaveMode} 8 | import java.io.BufferedOutputStream 9 | 10 | import org.apache.poi.xssf.streaming.SXSSFWorkbook 11 | 12 | import scala.collection.JavaConverters._ 13 | 14 | object ExcelFileSaver { 15 | final val DEFAULT_SHEET_NAME = "Sheet1" 16 | final val DEFAULT_DATE_FORMAT = "yy-m-d h:mm" 17 | final val DEFAULT_TIMESTAMP_FORMAT = "yyyy-mm-dd hh:mm:ss.000" 18 | } 19 | 20 | class ExcelFileSaver( 21 | fs: FileSystem, 22 | location: Path, 23 | dataFrame: DataFrame, 24 | saveMode: SaveMode, 25 | dataLocator: DataLocator, 26 | header: Boolean = true 27 | ) { 28 | def save(): Unit = { 29 | def sheet(workbook: SXSSFWorkbook) = { 30 | val headerRow = if (header) Some(dataFrame.schema.fields.map(_.name).toSeq) else None 31 | val dataRows = dataFrame 32 | .toLocalIterator() 33 | .asScala 34 | .map(_.toSeq) 35 | dataLocator.toSheet(headerRow, dataRows, workbook) 36 | } 37 | val fileAlreadyExists = fs.exists(location) 38 | def writeToWorkbook(workbook: SXSSFWorkbook): Unit = { 39 | Workbook(sheet(workbook)).writeToExisting(workbook) 40 | autoClose(new BufferedOutputStream(fs.create(location)))(workbook.write) 41 | } 42 | (fileAlreadyExists, saveMode) match { 43 | case (false, _) | (_, SaveMode.Overwrite) => 44 | if (fileAlreadyExists) { 45 | fs.delete(location, true) 46 | } 47 | writeToWorkbook(new SXSSFWorkbook()) 48 | case (true, SaveMode.ErrorIfExists) => 49 | sys.error(s"path $location already exists.") 50 | case (true, SaveMode.Ignore) => () 51 | case (true, SaveMode.Append) => 52 | val inputStream: FSDataInputStream = fs.open(location) 53 | val workbook = new SXSSFWorkbook(new XSSFWorkbook(inputStream)) 54 | inputStream.close() 55 | writeToWorkbook(workbook) 56 | } 57 | } 58 | 59 | def autoClose[A <: AutoCloseable, B](closeable: A)(fun: (A) => B): B = { 60 | try { 61 | fun(closeable) 62 | } finally { 63 | closeable.close() 64 | } 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /mlsql-excel/src/main/java/com/crealytics/spark/excel/PlainNumberFormat.scala: -------------------------------------------------------------------------------- 1 | package com.crealytics.spark.excel 2 | 3 | import java.math.BigDecimal 4 | import java.text.FieldPosition 5 | import java.text.Format 6 | import java.text.ParsePosition 7 | 8 | /** A format that formats a double as a plain string without rounding and scientific notation. 9 | * All other operations are unsupported. 10 | * @see [[org.apache.poi.ss.usermodel.ExcelGeneralNumberFormat]] and SSNFormat from 11 | * [[org.apache.poi.ss.usermodel.DataFormatter]] from Apache POI. 12 | */ 13 | object PlainNumberFormat extends Format { 14 | 15 | override def format(number: AnyRef, toAppendTo: StringBuffer, pos: FieldPosition): StringBuffer = 16 | toAppendTo.append(new BigDecimal(number.toString).toPlainString) 17 | 18 | override def parseObject(source: String, pos: ParsePosition): AnyRef = 19 | throw new UnsupportedOperationException() 20 | } 21 | -------------------------------------------------------------------------------- /mlsql-excel/src/main/java/com/crealytics/spark/excel/Utils.scala: -------------------------------------------------------------------------------- 1 | package com.crealytics.spark.excel 2 | import scala.util.{Success, Try} 3 | 4 | object Utils { 5 | implicit class RichTry[T](t: Try[T]) { 6 | def toEither: Either[Throwable, T] = t.transform(s => Success(Right(s)), f => Success(Left(f))).get 7 | } 8 | 9 | case class MapIncluding[K](keys: Seq[K], optionally: Seq[K] = Seq()) { 10 | def unapply[V](m: Map[K, V]): Option[(Seq[V], Seq[Option[V]])] = 11 | if (keys.forall(m.contains)) { 12 | Some((keys.map(m), optionally.map(m.get))) 13 | } else { 14 | None 15 | } 16 | } 17 | sealed trait MapRequirements[K] { 18 | type ResultType[V] 19 | def unapplySeq[V](m: Map[K, V]): Option[ResultType[V]] 20 | } 21 | case class RequiredKeys[K](keys: K*) extends MapRequirements[K] { 22 | type ResultType[V] = Seq[V] 23 | def unapplySeq[V](m: Map[K, V]): Option[Seq[V]] = 24 | if (keys.forall(m.contains)) { 25 | Some(keys.map(m)) 26 | } else { 27 | None 28 | } 29 | } 30 | case class OptionalKeys[K](keys: K*) extends MapRequirements[K] { 31 | type ResultType[V] = Seq[Option[V]] 32 | def unapplySeq[V](m: Map[K, V]): Option[Seq[Option[V]]] = Some(keys.map(m.get)) 33 | } 34 | case class MapWith[K]( 35 | requiredKeys: RequiredKeys[K] = RequiredKeys[K](), 36 | optionalKeys: OptionalKeys[K] = OptionalKeys[K]() 37 | ) { 38 | def unapply[V](m: Map[K, V]): Option[(requiredKeys.ResultType[V], optionalKeys.ResultType[V])] = 39 | for { 40 | req <- requiredKeys.unapplySeq(m) 41 | opt <- optionalKeys.unapplySeq(m) 42 | } yield (req, opt) 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /mlsql-excel/src/main/java/com/crealytics/spark/excel/WorkbookReader.scala: -------------------------------------------------------------------------------- 1 | package com.crealytics.spark.excel 2 | 3 | import java.io.InputStream 4 | 5 | import com.crealytics.spark.excel.Utils.MapIncluding 6 | import com.github.pjfanning.xlsx.StreamingReader 7 | import org.apache.hadoop.conf.Configuration 8 | import org.apache.hadoop.fs.{FileSystem, Path} 9 | import org.apache.poi.ss.usermodel.{Workbook, WorkbookFactory} 10 | 11 | trait WorkbookReader { 12 | protected def openWorkbook(): Workbook 13 | def withWorkbook[T](f: Workbook => T): T = { 14 | val workbook = openWorkbook() 15 | val res = f(workbook) 16 | workbook.close() 17 | res 18 | } 19 | def sheetNames: Seq[String] = { 20 | withWorkbook(workbook => 21 | for (sheetIx <- (0 until workbook.getNumberOfSheets())) yield { 22 | workbook.getSheetAt(sheetIx).getSheetName() 23 | } 24 | ) 25 | } 26 | } 27 | 28 | object WorkbookReader { 29 | val WithLocationMaxRowsInMemoryAndPassword = 30 | MapIncluding(Seq("path"), optionally = Seq("maxRowsInMemory", "workbookPassword")) 31 | 32 | def apply(parameters: Map[String, String], hadoopConfiguration: Configuration): WorkbookReader = { 33 | def readFromHadoop(location: String) = { 34 | val path = new Path(location) 35 | FileSystem.get(path.toUri, hadoopConfiguration).open(path) 36 | } 37 | parameters match { 38 | case WithLocationMaxRowsInMemoryAndPassword(Seq(location), Seq(Some(maxRowsInMemory), passwordOption)) => 39 | new StreamingWorkbookReader(readFromHadoop(location), passwordOption, maxRowsInMemory.toInt) 40 | case WithLocationMaxRowsInMemoryAndPassword(Seq(location), Seq(None, passwordOption)) => 41 | new DefaultWorkbookReader(readFromHadoop(location), passwordOption) 42 | } 43 | } 44 | } 45 | class DefaultWorkbookReader(inputStreamProvider: => InputStream, workbookPassword: Option[String]) 46 | extends WorkbookReader { 47 | protected def openWorkbook(): Workbook = 48 | workbookPassword 49 | .fold(WorkbookFactory.create(inputStreamProvider))(password => 50 | WorkbookFactory.create(inputStreamProvider, password) 51 | ) 52 | } 53 | 54 | class StreamingWorkbookReader(inputStreamProvider: => InputStream, workbookPassword: Option[String], maxRowsInMem: Int) 55 | extends WorkbookReader { 56 | override protected def openWorkbook(): Workbook = { 57 | val builder = StreamingReader 58 | .builder() 59 | .rowCacheSize(maxRowsInMem) 60 | .bufferSize(4096) 61 | workbookPassword 62 | .fold(builder)(password => builder.password(password)) 63 | .open(inputStreamProvider) 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /mlsql-ext-ets/.repo/desc.template.plugin: -------------------------------------------------------------------------------- 1 | moduleName=mlsql-ext-ets-{{spark_binary_version}} 2 | mainClass=tech.mlsql.plugins.ext.ets.app.MLSQLETApp 3 | scala_version={{scala_binary_version}} 4 | spark_version={{spark_binary_version}} 5 | version=0.1.0-SNAPSHOT 6 | author=allwefantasy 7 | mlsqlVersions="" 8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-ext-ets 9 | mlsqlPluginType=app 10 | desc=mlsql-ext-ets 11 | 12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /mlsql-ext-ets/desc.plugin: -------------------------------------------------------------------------------- 1 | moduleName=mlsql-ext-ets-3.0 2 | mainClass=tech.mlsql.plugins.ext.ets.app.MLSQLETApp 3 | scala_version=2.12 4 | spark_version=3.0 5 | version=0.1.0-SNAPSHOT 6 | author=allwefantasy 7 | mlsqlVersions="" 8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-ext-ets 9 | mlsqlPluginType=app 10 | desc=mlsql-ext-ets 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /mlsql-ext-ets/src/main/java/tech/mlsql/plugins/ext/ets/app/MLSQLETApp.scala: -------------------------------------------------------------------------------- 1 | package tech.mlsql.plugins.ext.ets.app 2 | 3 | import tech.mlsql.common.utils.log.Logging 4 | import tech.mlsql.version.VersionCompatibility 5 | 6 | /** 7 | * 31/5/2021 WilliamZhu(allwefantasy@gmail.com) 8 | */ 9 | class MLSQLETApp extends tech.mlsql.app.App with VersionCompatibility with Logging { 10 | override def run(args: Seq[String]): Unit = { 11 | 12 | } 13 | 14 | 15 | override def supportedVersions: Seq[String] = { 16 | MLSQLETApp.versions 17 | } 18 | } 19 | 20 | object MLSQLETApp { 21 | val versions = Seq("2.1.0", "2.1.0-SNAPSHOT", "2.0.0", "2.0.1") 22 | } -------------------------------------------------------------------------------- /mlsql-ke/.repo/desc.template.plugin: -------------------------------------------------------------------------------- 1 | moduleName=mlsql-ke-{{spark_binary_version}} 2 | mainClass=tech.mlsql.plugins.ke.app.MLSQLKE 3 | scala_version={{scala_binary_version}} 4 | spark_version={{spark_binary_version}} 5 | version=0.1.0-SNAPSHOT 6 | author=allwefantasy 7 | mlsqlVersions="" 8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-ke 9 | mlsqlPluginType=app 10 | desc=mlsql-ke 11 | 12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /mlsql-ke/desc.plugin: -------------------------------------------------------------------------------- 1 | moduleName=mlsql-ke-3.0 2 | mainClass=tech.mlsql.plugins.ke.app.MLSQLKE 3 | scala_version=2.12 4 | spark_version=3.0 5 | version=0.1.0-SNAPSHOT 6 | author=allwefantasy 7 | mlsqlVersions="" 8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-ke 9 | mlsqlPluginType=app 10 | desc=mlsql-ke 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /mlsql-ke/src/main/java/tech/mlsql/plugins/ke/app/MLSQLKE.scala: -------------------------------------------------------------------------------- 1 | package tech.mlsql.plugins.ke.app 2 | 3 | import tech.mlsql.common.utils.log.Logging 4 | import tech.mlsql.ets.register.ETRegister 5 | import tech.mlsql.plugins.ke.ets.{KEAPISchedule, KEAutoModel, KEBuildSegment} 6 | import tech.mlsql.version.VersionCompatibility 7 | 8 | /** 9 | * 2/6/2021 WilliamZhu(allwefantasy@gmail.com) 10 | */ 11 | class MLSQLKE extends tech.mlsql.app.App with VersionCompatibility with Logging { 12 | override def run(args: Seq[String]): Unit = { 13 | ETRegister.register("KeApi", classOf[KEAPISchedule].getName) 14 | ETRegister.register("KeAutoModel", classOf[KEAutoModel].getName) 15 | ETRegister.register("KeBuildSegment", classOf[KEBuildSegment].getName) 16 | } 17 | 18 | 19 | override def supportedVersions: Seq[String] = { 20 | MLSQLKE.versions 21 | } 22 | } 23 | 24 | object MLSQLKE { 25 | val versions = Seq("2.1.0", "2.1.0-SNAPSHOT", "2.0.0", "2.0.1") 26 | } 27 | -------------------------------------------------------------------------------- /mlsql-ke/src/main/java/tech/mlsql/plugins/ke/ets/KEBuildSegment.scala: -------------------------------------------------------------------------------- 1 | package tech.mlsql.plugins.ke.ets 2 | 3 | import com.alibaba.fastjson.{JSON, JSONObject} 4 | import org.apache.spark.ml.util.Identifiable 5 | import org.apache.spark.sql.DataFrame 6 | import streaming.dsl.{ConnectMeta, DBMappingKey} 7 | import streaming.dsl.mmlib.algs.param.WowParams 8 | import tech.mlsql.common.utils.log.Logging 9 | 10 | class KEBuildSegment(override val uid: String) extends KEAPISchedule with WowParams with Logging { 11 | 12 | def this() = this(Identifiable.randomUID("tech.mlsql.plugins.ets.BuildSegment")) 13 | 14 | override def train(df: DataFrame, path: String, params: Map[String, String]): DataFrame = { 15 | val jsonObj = new JSONObject 16 | val split = path.split("\\.") 17 | val connectName = split(0) 18 | jsonObj.put("project", split(1)) 19 | if (params.contains("start")) { 20 | jsonObj.put("start", params("start")) 21 | } 22 | if (params.contains("end")) { 23 | jsonObj.put("end", params("end")) 24 | } 25 | if (params.contains("sub_partition_values")) { 26 | jsonObj.put("sub_partition_values", JSON.parseArray(params("sub_partition_values"))) 27 | } 28 | if (params.contains("build_all_indexes")) { 29 | jsonObj.put("build_all_indexes", params("build_all_indexes").toBoolean) 30 | } 31 | if (params.contains("build_all_sub_partitions")) { 32 | jsonObj.put("build_all_sub_partitions", params("build_all_sub_partitions").toBoolean) 33 | } 34 | if (params.contains("priority")) { 35 | jsonObj.put("priority", params("priority").toInt) 36 | } 37 | var url = new String 38 | ConnectMeta.presentThenCall(DBMappingKey("ke", connectName), options => { 39 | url = "http://" + options("host") + ":" + options("port") + "/kylin/api/models/" + params("model") + "/segments" 40 | }) 41 | sendPostAPI(df, params, jsonObj, url, connectName) 42 | } 43 | } 44 | 45 | -------------------------------------------------------------------------------- /mlsql-language-server/.repo/desc.template.plugin: -------------------------------------------------------------------------------- 1 | moduleName=mlsql-language-server-{{spark_binary_version}} 2 | mainClass=tech.mlsql.plugins.ke.app.MLSQLLanguageServer 3 | scala_version={{scala_binary_version}} 4 | spark_version={{spark_binary_version}} 5 | version=0.1.0-SNAPSHOT 6 | author=allwefantasy 7 | mlsqlVersions="" 8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-language-server 9 | mlsqlPluginType=app 10 | desc=mlsql-language-server 11 | 12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /mlsql-language-server/build.sh: -------------------------------------------------------------------------------- 1 | SOURCE=/Users/allwefantasy/Volumes/Samsung_T5/allwefantasy/CSDNWorkSpace/mlsqlplugins/mlsql-language-server/build/ 2 | TARGET=/Users/allwefantasy/projects/mlsql/src/mlsql-lang/mlsql-app_2.4-2.1.0-SNAPSHOT/plugin 3 | #conda activate mlsql-plugin-tool 4 | mlsql_plugin_tool build --module_name mlsql-language-server --spark spark243 5 | scp ${SOURCE}/mlsql-language-server-2.4_2.11-0.1.0-SNAPSHOT.jar ${TARGET}/ 6 | -------------------------------------------------------------------------------- /mlsql-language-server/desc.plugin: -------------------------------------------------------------------------------- 1 | moduleName=mlsql-language-server-3.0 2 | mainClass=tech.mlsql.plugins.ke.app.MLSQLLanguageServer 3 | scala_version=2.12 4 | spark_version=3.0 5 | version=0.1.0-SNAPSHOT 6 | author=allwefantasy 7 | mlsqlVersions="" 8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-language-server 9 | mlsqlPluginType=app 10 | desc=mlsql-language-server 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /mlsql-language-server/src/main/java/tech/mlsql/plugins/langserver/AutoSuggestWrapper.scala: -------------------------------------------------------------------------------- 1 | package tech.mlsql.plugins.langserver 2 | 3 | import net.csdn.common.exception.RenderFinish 4 | import net.csdn.common.jline.ANSI.Renderer.RenderException 5 | import net.csdn.modules.http.DefaultRestRequest 6 | import net.csdn.modules.mock.MockRestResponse 7 | import streaming.rest.RestController 8 | import tech.mlsql.autosuggest.statement.SuggestItem 9 | import tech.mlsql.common.utils.log.Logging 10 | import tech.mlsql.common.utils.serder.json.JSONTool 11 | 12 | import scala.collection.JavaConverters._ 13 | 14 | /** 15 | * 1/9/2021 WilliamZhu(allwefantasy@gmail.com) 16 | */ 17 | class AutoSuggestWrapper(params: java.util.Map[String, String]) extends Logging { 18 | def run() = { 19 | try { 20 | params.put("executeMode", "autoSuggest") 21 | logInfo(JSONTool.toJsonStr(params.asScala.toMap)) 22 | 23 | val restRequest = new DefaultRestRequest("POST", params) 24 | val restReponse = new MockRestResponse() 25 | val controller = new RestController() 26 | net.csdn.modules.http.RestController.enhanceApplicationController(controller, restRequest, restReponse) 27 | try { 28 | controller.script 29 | } catch { 30 | case _: RenderFinish => 31 | } 32 | val jsonStr = restReponse.content() 33 | JSONTool.parseJson[List[SuggestItem]](jsonStr).asJava 34 | } catch { 35 | case e: Exception => 36 | logInfo("Suggest fail", e) 37 | List[SuggestItem]().asJava 38 | } 39 | 40 | 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /mlsql-language-server/src/main/java/tech/mlsql/plugins/langserver/LSContext.java: -------------------------------------------------------------------------------- 1 | package tech.mlsql.plugins.langserver; 2 | 3 | import net.sf.json.JSONObject; 4 | 5 | import java.util.HashMap; 6 | import java.util.Map; 7 | 8 | /** 9 | * 2/9/2021 WilliamZhu(allwefantasy@gmail.com) 10 | */ 11 | public class LSContext { 12 | final public static Map initParams = new HashMap<>(); 13 | 14 | public static void parse(String jsonStr) { 15 | JSONObject obj = JSONObject.fromObject(jsonStr); 16 | for (Object key : obj.keySet()) { 17 | initParams.put(key.toString(), obj.getString(key.toString())); 18 | } 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /mlsql-language-server/src/main/java/tech/mlsql/plugins/langserver/MLSQLWorkspaceService.java: -------------------------------------------------------------------------------- 1 | package tech.mlsql.plugins.langserver; 2 | 3 | import org.eclipse.lsp4j.DidChangeConfigurationParams; 4 | import org.eclipse.lsp4j.DidChangeWatchedFilesParams; 5 | import org.eclipse.lsp4j.services.WorkspaceService; 6 | 7 | /** 8 | * 25/8/2021 WilliamZhu(allwefantasy@gmail.com) 9 | */ 10 | public class MLSQLWorkspaceService implements WorkspaceService { 11 | @Override 12 | public void didChangeConfiguration(DidChangeConfigurationParams params) { 13 | 14 | } 15 | 16 | @Override 17 | public void didChangeWatchedFiles(DidChangeWatchedFilesParams params) { 18 | 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /mlsql-language-server/src/main/java/tech/mlsql/plugins/langserver/commons/client/Message.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018, WSO2 Inc. (http://wso2.com) All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package tech.mlsql.plugins.langserver.commons.client; 17 | /** 18 | * {@link Message} Parsed log message sent to client. 19 | * 20 | */ 21 | public class Message { 22 | private String id; 23 | private String direction; 24 | private String headers; 25 | private String httpMethod; 26 | private String path; 27 | private String contentType; 28 | private String payload; 29 | private String headerType; 30 | 31 | public Message(String id, String direction, String headers, String httpMethod, String path, String contentType, 32 | String payload, String headerType) { 33 | this.id = id; 34 | this.direction = direction; 35 | this.headers = headers; 36 | this.httpMethod = httpMethod; 37 | this.path = path; 38 | this.contentType = contentType; 39 | this.payload = payload; 40 | this.headerType = headerType; 41 | } 42 | 43 | public void setId(String id) { 44 | this.id = id; 45 | } 46 | 47 | public String getId() { 48 | return id; 49 | } 50 | 51 | public String getDirection() { 52 | return direction; 53 | } 54 | 55 | public String getHeaders() { 56 | return headers; 57 | } 58 | 59 | public String getHttpMethod() { 60 | return httpMethod; 61 | } 62 | 63 | public String getPath() { 64 | return path; 65 | } 66 | 67 | public String getContentType() { 68 | return contentType; 69 | } 70 | 71 | public String getPayload() { 72 | return payload; 73 | } 74 | 75 | public String getHeaderType() { 76 | return headerType; 77 | } 78 | } 79 | 80 | -------------------------------------------------------------------------------- /mlsql-language-server/src/main/java/tech/mlsql/plugins/langserver/commons/client/TraceRecord.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018, WSO2 Inc. (http://wso2.com) All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package tech.mlsql.plugins.langserver.commons.client; 17 | 18 | import com.google.gson.JsonObject; 19 | 20 | import java.util.UUID; 21 | 22 | /** 23 | * Model class for trace log. 24 | */ 25 | public class TraceRecord { 26 | private Message message; 27 | private String rawMessage; 28 | private String id; 29 | private String millis; 30 | private String sequence; 31 | private String logger; 32 | private String sourceClass; 33 | private String sourceMethod; 34 | private String thread; 35 | 36 | public TraceRecord(Message message, JsonObject record, String rawMessage) { 37 | this.message = message; 38 | this.rawMessage = rawMessage; 39 | this.id = UUID.randomUUID().toString(); 40 | this.millis = record.get("millis").getAsString(); 41 | this.sequence = record.get("sequenceNumber").getAsString(); 42 | this.logger = record.get("loggerName").getAsString(); 43 | this.sourceClass = record.get("sourceClassName").getAsString(); 44 | this.sourceMethod = record.get("sourceMethodName").getAsString(); 45 | this.thread = record.get("threadID").getAsString(); 46 | } 47 | 48 | public String getRawMessage() { 49 | return rawMessage; 50 | } 51 | 52 | public String getLogger() { 53 | return logger; 54 | } 55 | 56 | public String getSequence() { 57 | return sequence; 58 | } 59 | 60 | public String getThread() { 61 | return thread; 62 | } 63 | 64 | public String getMillis() { 65 | return millis; 66 | } 67 | 68 | public String getSourceMethod() { 69 | return sourceMethod; 70 | } 71 | 72 | public String getSourceClass() { 73 | return sourceClass; 74 | } 75 | 76 | public Message getMessage() { 77 | return message; 78 | } 79 | 80 | public String getId() { 81 | return id; 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /mlsql-language-server/src/main/java/tech/mlsql/plugins/langserver/launchers/stdio/Launcher.java: -------------------------------------------------------------------------------- 1 | package tech.mlsql.plugins.langserver.launchers.stdio; 2 | 3 | import org.eclipse.lsp4j.services.LanguageClient; 4 | import tech.mlsql.plugins.langserver.MLSQLLanguageServer; 5 | 6 | import java.io.IOException; 7 | import java.io.InputStream; 8 | import java.io.PrintWriter; 9 | import java.util.concurrent.ExecutionException; 10 | 11 | 12 | /** 13 | * 25/8/2021 WilliamZhu(allwefantasy@gmail.com) 14 | */ 15 | public class Launcher { 16 | public static void main(String[] args) throws InterruptedException, ExecutionException { 17 | 18 | MLSQLLanguageServer server = new MLSQLLanguageServer(); 19 | 20 | boolean lspInspectorTrace = false; 21 | 22 | 23 | org.eclipse.lsp4j.jsonrpc.Launcher launcher = null; 24 | 25 | if (lspInspectorTrace) { 26 | launcher = org.eclipse.lsp4j.jsonrpc.Launcher.createLauncher(server, LanguageClient.class, exitOnClose(System.in), System.out, 27 | true, new PrintWriter(System.err)); 28 | } else { 29 | launcher = org.eclipse.lsp4j.jsonrpc.Launcher.createLauncher(server, LanguageClient.class, System.in, System.out); 30 | } 31 | 32 | 33 | LanguageClient client = launcher.getRemoteProxy(); 34 | server.connect(client); 35 | launcher.startListening().get(); 36 | 37 | } 38 | 39 | private static InputStream exitOnClose(InputStream delegate) { 40 | return new InputStream() { 41 | @Override 42 | public int read() throws IOException { 43 | return exitIfNegative(delegate.read()); 44 | } 45 | 46 | int exitIfNegative(int result) { 47 | if (result < 0) { 48 | System.err.println("Input stream has closed. Exiting..."); 49 | System.exit(0); 50 | } 51 | return result; 52 | } 53 | }; 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /mlsql-language-server/src/main/java/tech/mlsql/plugins/langserver/launchers/stdio/MLSQLDesktopApp.scala: -------------------------------------------------------------------------------- 1 | package tech.mlsql.plugins.langserver.launchers.stdio 2 | 3 | import streaming.core.StreamingApp 4 | import tech.mlsql.common.utils.path.PathFun 5 | 6 | import scala.collection.mutable.ArrayBuffer 7 | 8 | /** 9 | * 26/8/2021 WilliamZhu(allwefantasy@gmail.com) 10 | */ 11 | object MLSQLDesktopApp { 12 | def main(args: Array[String]): Unit = { 13 | val defaultMap = arrayToMap(Array( 14 | "-streaming.master", "local[*]", 15 | "-streaming.name", "MLSQL-desktop", 16 | "-streaming.rest", "true", 17 | "-streaming.thrift", "false", 18 | "-streaming.platform", "spark", 19 | "-streaming.spark.service", "true", 20 | "-streaming.job.cancel", "true", 21 | "-streaming.datalake.path", PathFun.joinPath(".","data"), 22 | "-streaming.driver.port", "9003", 23 | "-streaming.plugin.clzznames", "tech.mlsql.plugins.ds.MLSQLExcelApp,tech.mlsql.plugins.shell.app.MLSQLShell,tech.mlsql.plugins.assert.app.MLSQLAssert" 24 | )) 25 | val extraMap = arrayToMap(args) 26 | StreamingApp.main( mapToArray(defaultMap ++ extraMap)) 27 | } 28 | 29 | def arrayToMap(args: Array[String]): Map[String, String] = { 30 | val res = scala.collection.mutable.HashMap[String, String]() 31 | var i = 0; 32 | while (i < args.length) { 33 | res += (args(i) -> args(i + 1)) 34 | i += 2 35 | } 36 | res.toMap 37 | } 38 | 39 | def mapToArray(args:Map[String,String]):Array[String] = { 40 | args.flatMap{item=> 41 | val (key,value) = item 42 | Array(key,value) 43 | } .toArray 44 | } 45 | } 46 | 47 | class MLSQLDesktopApp 48 | -------------------------------------------------------------------------------- /mlsql-mllib/.repo/desc.template.plugin: -------------------------------------------------------------------------------- 1 | moduleName=mlsql-mllib-{{spark_binary_version}} 2 | mainClass=tech.mlsql.plugins.mllib.app.MLSQLMllib 3 | scala_version={{scala_binary_version}} 4 | spark_version={{spark_binary_version}} 5 | version=0.1.0-SNAPSHOT 6 | author=allwefantasy 7 | mlsqlVersions="" 8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-mllib 9 | mlsqlPluginType=app 10 | desc=mllib 11 | 12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /mlsql-mllib/.repo/pom.template.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | mlsql-plugins-{{spark_binary_version}}_{{scala_binary_version}} 7 | tech.mlsql 8 | 0.1.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | mlsql-mllib-{{spark_binary_version}}_{{scala_binary_version}} 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | shade 21 | 22 | 23 | 24 | org.apache.maven.plugins 25 | maven-shade-plugin 26 | 3.2.0 27 | 28 | 29 | 30 | *:* 31 | 32 | META-INF/*.SF 33 | META-INF/*.DSA 34 | META-INF/*.RSA 35 | 36 | 37 | 38 | false 39 | 40 | 41 | org.apache.poi 42 | shadeio.poi 43 | 44 | 45 | com.norbitltd.spoiwo 46 | shadeio.spoiwo 47 | 48 | 49 | com.github.pjfanning 50 | shadeio.pjfanning 51 | 52 | 53 | org.apache.commons.compress 54 | shadeio.commons.compress 55 | 56 | 57 | 58 | 59 | 60 | 61 | package 62 | 63 | shade 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | -------------------------------------------------------------------------------- /mlsql-mllib/README.md: -------------------------------------------------------------------------------- 1 | # mlsql-mmlib 2 | 3 | This plugin provide ET wrapper for spark-mllib. 4 | 5 | ## Install from store 6 | 7 | Execute following command in web console: 8 | 9 | ``` 10 | !plugin app add "tech.mlsql.plugins.mllib.app.MLSQLMllib" "mlsql-mllib-2.4"; 11 | ``` 12 | 13 | Check installation: 14 | 15 | ``` 16 | !show et/ClassificationEvaluator; 17 | !show etc/RegressionEvaluator; 18 | ``` 19 | 20 | ## Install Manually 21 | 22 | Firstly, build shade jar in your terminal: 23 | 24 | ```shell 25 | pip install mlsql_plugin_tool 26 | mlsql_plugin_tool build --module_name mlsql-mllib --spark spark243 27 | ``` 28 | 29 | then change start script of MLSQL Engine, 30 | 31 | Add Jar: 32 | 33 | ``` 34 | --jars YOUR_JAR_PATH 35 | ``` 36 | 37 | Register Class: 38 | 39 | ``` 40 | -streaming.plugin.clzznames tech.mlsql.plugins.mllib.app.MLSQLMllib 41 | ``` 42 | 43 | If there are more than one class, use comma to seperate them. For example: 44 | 45 | ``` 46 | -streaming.plugin.clzznames classA,classB,classC 47 | ``` 48 | 49 | ## Usage 50 | 51 | Classification: 52 | 53 | ```sql 54 | predict data as RandomForest.`/tmp/model` as predicted_table; 55 | run predicted_table as ClassificationEvaluator.``; 56 | ``` 57 | 58 | Regression: 59 | 60 | ```sql 61 | predict data as LinearRegressionExt.`/tmp/model` as predicted_table; 62 | run predicted_table as RegressionEvaluator.``; 63 | ``` 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | -------------------------------------------------------------------------------- /mlsql-mllib/desc.plugin: -------------------------------------------------------------------------------- 1 | moduleName=mlsql-mllib-3.0 2 | mainClass=tech.mlsql.plugins.mllib.app.MLSQLMllib 3 | scala_version=2.12 4 | spark_version=3.0 5 | version=0.1.0-SNAPSHOT 6 | author=allwefantasy 7 | mlsqlVersions="" 8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-mllib 9 | mlsqlPluginType=app 10 | desc=mllib 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /mlsql-mllib/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | mlsql-plugins-3.0_2.12 7 | tech.mlsql 8 | 0.1.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | mlsql-mllib-3.0_2.12 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | shade 21 | 22 | 23 | 24 | org.apache.maven.plugins 25 | maven-shade-plugin 26 | 3.2.0 27 | 28 | 29 | 30 | *:* 31 | 32 | META-INF/*.SF 33 | META-INF/*.DSA 34 | META-INF/*.RSA 35 | 36 | 37 | 38 | false 39 | 40 | 41 | org.apache.poi 42 | shadeio.poi 43 | 44 | 45 | com.norbitltd.spoiwo 46 | shadeio.spoiwo 47 | 48 | 49 | com.github.pjfanning 50 | shadeio.pjfanning 51 | 52 | 53 | org.apache.commons.compress 54 | shadeio.commons.compress 55 | 56 | 57 | 58 | 59 | 60 | 61 | package 62 | 63 | shade 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | -------------------------------------------------------------------------------- /mlsql-mllib/src/main/java/tech/mlsql/plugins/mllib/app/MLSQLMllib.scala: -------------------------------------------------------------------------------- 1 | package tech.mlsql.plugins.mllib.app 2 | 3 | import tech.mlsql.common.utils.log.Logging 4 | import tech.mlsql.dsl.CommandCollection 5 | import tech.mlsql.ets.register.ETRegister 6 | import tech.mlsql.plugins.mllib.ets._ 7 | import tech.mlsql.version.VersionCompatibility 8 | 9 | /** 10 | * 31/5/2021 WilliamZhu(allwefantasy@gmail.com) 11 | */ 12 | class MLSQLMllib extends tech.mlsql.app.App with VersionCompatibility with Logging { 13 | override def run(args: Seq[String]): Unit = { 14 | ETRegister.register("ClassificationEvaluator", classOf[ClassificationEvaluator].getName) 15 | ETRegister.register("RegressionEvaluator", classOf[RegressionEvaluator].getName) 16 | ETRegister.register("AutoMLExt", classOf[AutoMLExt].getName) 17 | ETRegister.register("SampleDatasetExt", classOf[SampleDatasetExt].getName) 18 | ETRegister.register("TakeRandomSampleExt", classOf[TakeRandomSampleExt].getName) 19 | ETRegister.register("ColumnsExt", classOf[ColumnsExt].getName) 20 | 21 | // !columns drop fields from tableName; 22 | CommandCollection.refreshCommandMapping(Map("columns" -> 23 | """ 24 | |run {3} as ColumnsExt.`` where action="{0}" and fields="{1}" 25 | |""".stripMargin)) 26 | 27 | } 28 | 29 | 30 | override def supportedVersions: Seq[String] = { 31 | MLSQLMllib.versions 32 | } 33 | } 34 | 35 | object MLSQLMllib { 36 | val versions = Seq(">=2.0.0", "2.1.0", "2.1.0-SNAPSHOT", "2.0.0", "2.0.1") 37 | } -------------------------------------------------------------------------------- /mlsql-mllib/src/main/java/tech/mlsql/plugins/mllib/ets/ClassificationEvaluator.scala: -------------------------------------------------------------------------------- 1 | package tech.mlsql.plugins.mllib.ets 2 | 3 | import org.apache.spark.ml.param.Param 4 | import org.apache.spark.sql.expressions.UserDefinedFunction 5 | import org.apache.spark.sql.{DataFrame, SparkSession} 6 | import streaming.dsl.auth.TableAuthResult 7 | import streaming.dsl.mmlib._ 8 | import streaming.dsl.mmlib.algs.classfication.BaseClassification 9 | import streaming.dsl.mmlib.algs.param.{BaseParams, WowParams} 10 | import streaming.dsl.mmlib.algs.{CodeExampleText, Functions, MetricValue} 11 | import tech.mlsql.dsl.auth.ETAuth 12 | import tech.mlsql.dsl.auth.dsl.mmlib.ETMethod.ETMethod 13 | import tech.mlsql.plugins.mllib.app.MLSQLMllib 14 | import tech.mlsql.version.VersionCompatibility 15 | 16 | /** 17 | * 31/5/2021 WilliamZhu(allwefantasy@gmail.com) 18 | */ 19 | class ClassificationEvaluator(override val uid: String) extends SQLAlg 20 | with VersionCompatibility with Functions with WowParams with ETAuth with BaseClassification { 21 | def this() = this(BaseParams.randomUID()) 22 | 23 | /** 24 | * run table as ClassificationEvaluator.`` where labelCol="label"; 25 | */ 26 | override def train(df: DataFrame, path: String, params: Map[String, String]): DataFrame = { 27 | batchPredict(df, path, params) 28 | } 29 | 30 | override def batchPredict(df: DataFrame, path: String, params: Map[String, String]): DataFrame = { 31 | val items = multiclassClassificationEvaluate(df, (evaluator) => { 32 | evaluator.setLabelCol(params.getOrElse(labelCol.name, "label")) 33 | evaluator.setPredictionCol("prediction") 34 | }) 35 | import df.sparkSession.implicits._ 36 | df.sparkSession.createDataset[MetricValue](items).toDF() 37 | } 38 | 39 | override def load(sparkSession: SparkSession, path: String, params: Map[String, String]): Any = ??? 40 | 41 | override def predict(sparkSession: SparkSession, _model: Any, name: String, params: Map[String, String]): UserDefinedFunction = ??? 42 | 43 | override def supportedVersions: Seq[String] = { 44 | MLSQLMllib.versions 45 | } 46 | 47 | override def auth(etMethod: ETMethod, path: String, params: Map[String, String]): List[TableAuthResult] = { 48 | List() 49 | } 50 | 51 | override def modelType: ModelType = AlgType 52 | 53 | override def doc: Doc = Doc(HtmlDoc, 54 | """ 55 | |Compute f1|weightedPrecision|weightedRecall|accuracy for predicted table. 56 | """.stripMargin) 57 | 58 | 59 | override def codeExample: Code = Code(SQLCode, CodeExampleText.jsonStr + 60 | """ 61 | |predict data as RandomForest.`/tmp/model` as predicted_table; 62 | |run predicted_table as ClassificationEvaluator.``; 63 | """.stripMargin) 64 | 65 | override def explainParams(sparkSession: SparkSession): DataFrame = { 66 | _explainParams(sparkSession) 67 | } 68 | 69 | final val labelCol: Param[String] = new Param[String](this, "labelCol", "default: label") 70 | 71 | } 72 | -------------------------------------------------------------------------------- /mlsql-mllib/src/main/java/tech/mlsql/plugins/mllib/ets/ColumnsExt.scala: -------------------------------------------------------------------------------- 1 | package tech.mlsql.plugins.mllib.ets 2 | 3 | import org.apache.spark.ml.param.{Param, StringArrayParam} 4 | import org.apache.spark.sql.expressions.UserDefinedFunction 5 | import org.apache.spark.sql.{DataFrame, SparkSession} 6 | import streaming.dsl.mmlib.algs.classfication.BaseClassification 7 | import streaming.dsl.mmlib.algs.param.BaseParams 8 | import streaming.dsl.mmlib.algs.{Functions, MllibFunctions} 9 | import streaming.dsl.mmlib._ 10 | 11 | /** 12 | * 10/10/2021 WilliamZhu(allwefantasy@gmail.com) 13 | */ 14 | class ColumnsExt(override val uid: String) extends SQLAlg 15 | with Functions 16 | with MllibFunctions 17 | with BaseClassification 18 | with PluginBaseETAuth { 19 | 20 | def this() = this(BaseParams.randomUID()) 21 | 22 | override def train(df: DataFrame, path: String, params: Map[String, String]): DataFrame = { 23 | val _action = params.getOrElse(action.name, $(action).toString) 24 | val _fields = params.getOrElse(fields.name, $(fields).mkString(",")).split(",") 25 | val dfName = params("__dfname__") 26 | if (_fields.length == 0) return df 27 | _action match { 28 | case "drop" | "remove" => 29 | val newdf = df.drop(_fields: _*) 30 | newdf.createOrReplaceTempView(dfName) 31 | newdf 32 | } 33 | } 34 | 35 | 36 | override def skipOriginalDFName: Boolean = false 37 | 38 | override def batchPredict(df: DataFrame, path: String, params: Map[String, String]): DataFrame = { 39 | train(df, path, params) 40 | } 41 | 42 | override def modelType: ModelType = ProcessType 43 | 44 | override def doc: Doc = Doc(MarkDownDoc, 45 | """ 46 | | 47 | |""".stripMargin) 48 | 49 | override def codeExample: Code = Code(SQLCode, 50 | """ 51 | |select 1 as a,2 as b as mockTable; 52 | |!columns drop a from mockTable; 53 | |select * from mockTable as output; 54 | |""".stripMargin) 55 | 56 | 57 | override def load(sparkSession: SparkSession, path: String, params: Map[String, String]): Any = ??? 58 | 59 | override def predict(sparkSession: SparkSession, _model: Any, name: String, params: Map[String, String]): UserDefinedFunction = ??? 60 | 61 | override def etName: String = "__columns_operator__" 62 | 63 | final val action: Param[String] = 64 | new Param[String](this, name = "action", doc = "") 65 | setDefault(action, "drop") 66 | 67 | final val fields: StringArrayParam = 68 | new StringArrayParam(this, name = "fields", doc = "") 69 | setDefault(fields, Array[String]()) 70 | 71 | } 72 | -------------------------------------------------------------------------------- /mlsql-mllib/src/main/java/tech/mlsql/plugins/mllib/ets/PluginBaseETAuth.scala: -------------------------------------------------------------------------------- 1 | package tech.mlsql.plugins.mllib.ets 2 | 3 | import streaming.dsl.ScriptSQLExec 4 | import streaming.dsl.auth._ 5 | import tech.mlsql.dsl.auth.ETAuth 6 | import tech.mlsql.dsl.auth.dsl.mmlib.ETMethod.ETMethod 7 | 8 | /** 9 | * 27/9/2021 WilliamZhu(allwefantasy@gmail.com) 10 | */ 11 | trait PluginBaseETAuth extends ETAuth { 12 | override def auth(etMethod: ETMethod, path: String, params: Map[String, String]): List[TableAuthResult] = { 13 | val vtable = MLSQLTable( 14 | Option(DB_DEFAULT.MLSQL_SYSTEM.toString), 15 | Option(etName), 16 | OperateType.SELECT, 17 | Option("select"), 18 | TableType.SYSTEM) 19 | 20 | val context = ScriptSQLExec.contextGetOrForTest() 21 | context.execListener.getTableAuth match { 22 | case Some(tableAuth) => 23 | tableAuth.auth(List(vtable)) 24 | case None => 25 | List(TableAuthResult(granted = true, "")) 26 | } 27 | } 28 | 29 | def etName: String 30 | } 31 | -------------------------------------------------------------------------------- /mlsql-mllib/src/main/java/tech/mlsql/plugins/mllib/ets/RegressionEvaluator.scala: -------------------------------------------------------------------------------- 1 | package tech.mlsql.plugins.mllib.ets 2 | 3 | import org.apache.spark.ml.param.Param 4 | import org.apache.spark.sql.expressions.UserDefinedFunction 5 | import org.apache.spark.sql.{DataFrame, SparkSession} 6 | import streaming.dsl.auth.TableAuthResult 7 | import streaming.dsl.mmlib._ 8 | import streaming.dsl.mmlib.algs.classfication.BaseClassification 9 | import streaming.dsl.mmlib.algs.param.{BaseParams, WowParams} 10 | import streaming.dsl.mmlib.algs.{CodeExampleText, Functions, MetricValue} 11 | import tech.mlsql.dsl.auth.ETAuth 12 | import tech.mlsql.dsl.auth.dsl.mmlib.ETMethod.ETMethod 13 | import tech.mlsql.plugins.mllib.app.MLSQLMllib 14 | import tech.mlsql.version.VersionCompatibility 15 | 16 | /** 17 | * 1/6/2021 WilliamZhu(allwefantasy@gmail.com) 18 | */ 19 | class RegressionEvaluator(override val uid: String) extends SQLAlg 20 | with VersionCompatibility with Functions with WowParams with ETAuth with BaseClassification { 21 | def this() = this(BaseParams.randomUID()) 22 | 23 | /** 24 | * run table as RegressionEvaluator.`` where labelCol="label"; 25 | */ 26 | override def train(df: DataFrame, path: String, params: Map[String, String]): DataFrame = { 27 | batchPredict(df, path, params) 28 | } 29 | 30 | override def batchPredict(df: DataFrame, path: String, params: Map[String, String]): DataFrame = { 31 | val items = "mse|rmse|r2|mae".split("\\|").map { metricName => 32 | val evaluator = new org.apache.spark.ml.evaluation.RegressionEvaluator().setMetricName(metricName) 33 | evaluator.setLabelCol(params.getOrElse(labelCol.name, "label")) 34 | evaluator.setPredictionCol("prediction") 35 | MetricValue(metricName, evaluator.evaluate(df)) 36 | }.toList 37 | 38 | import df.sparkSession.implicits._ 39 | df.sparkSession.createDataset[MetricValue](items).toDF() 40 | } 41 | 42 | override def load(sparkSession: SparkSession, path: String, params: Map[String, String]): Any = ??? 43 | 44 | override def predict(sparkSession: SparkSession, _model: Any, name: String, params: Map[String, String]): UserDefinedFunction = ??? 45 | 46 | override def supportedVersions: Seq[String] = { 47 | MLSQLMllib.versions 48 | } 49 | 50 | override def auth(etMethod: ETMethod, path: String, params: Map[String, String]): List[TableAuthResult] = { 51 | List() 52 | } 53 | 54 | override def modelType: ModelType = AlgType 55 | 56 | override def doc: Doc = Doc(HtmlDoc, 57 | """ 58 | |Compute mse|rmse|r2|mae for predicted table. 59 | """.stripMargin) 60 | 61 | 62 | override def codeExample: Code = Code(SQLCode, CodeExampleText.jsonStr + 63 | """ 64 | |predict data as LinearRegressionExt.`/tmp/model` as predicted_table; 65 | |run predicted_table as RegressionEvaluator.``; 66 | """.stripMargin) 67 | 68 | override def explainParams(sparkSession: SparkSession): DataFrame = { 69 | _explainParams(sparkSession) 70 | } 71 | 72 | final val labelCol: Param[String] = new Param[String](this, "labelCol", "default: label") 73 | 74 | } -------------------------------------------------------------------------------- /mlsql-mllib/src/main/java/tech/mlsql/plugins/mllib/ets/TakeRandomSampleExt.scala: -------------------------------------------------------------------------------- 1 | package tech.mlsql.plugins.mllib.ets 2 | 3 | import org.apache.spark.ml.param.Param 4 | import org.apache.spark.sql.expressions.UserDefinedFunction 5 | import org.apache.spark.sql.{DataFrame, SparkSession} 6 | import streaming.dsl.mmlib._ 7 | import streaming.dsl.mmlib.algs.classfication.BaseClassification 8 | import streaming.dsl.mmlib.algs.param.BaseParams 9 | import streaming.dsl.mmlib.algs.{Functions, MllibFunctions} 10 | 11 | /** 12 | * 27/9/2021 WilliamZhu(allwefantasy@gmail.com) 13 | */ 14 | class TakeRandomSampleExt(override val uid: String) extends SQLAlg 15 | with Functions 16 | with MllibFunctions 17 | with BaseClassification 18 | with PluginBaseETAuth { 19 | 20 | def this() = this(BaseParams.randomUID()) 21 | 22 | override def train(df: DataFrame, path: String, params: Map[String, String]): DataFrame = { 23 | val _size = params.getOrElse(size.name, $(size).toString).toLong 24 | val _fraction = params.getOrElse(fraction.name, $(fraction).toString).toDouble 25 | 26 | val newdf = (_fraction, _size) match { 27 | case (-1, -1) => 28 | df 29 | case (-1, s) => 30 | val count = df.count() 31 | df.sample(Math.min(s * 1.0 / count + 0.2, 1.0)).limit(s.toInt) 32 | case (f, -1) => 33 | df.sample(f) 34 | 35 | case (f, s) => 36 | df.sample(Math.min(f + 0.1, 1.0)).limit(s.toInt) 37 | } 38 | if (_fraction != -1) { 39 | df.sample(_fraction) 40 | } 41 | 42 | newdf 43 | } 44 | 45 | 46 | override def batchPredict(df: DataFrame, path: String, params: Map[String, String]): DataFrame = { 47 | train(df, path, params) 48 | } 49 | 50 | override def load(sparkSession: SparkSession, path: String, params: Map[String, String]): Any = ??? 51 | 52 | override def predict(sparkSession: SparkSession, _model: Any, name: String, params: Map[String, String]): UserDefinedFunction = ??? 53 | 54 | override def etName: String = "__take_random_sample_operator__" 55 | 56 | override def modelType: ModelType = ProcessType 57 | 58 | override def doc: Doc = Doc(MarkDownDoc, 59 | """ 60 | | 61 | |""".stripMargin) 62 | 63 | override def codeExample: Code = Code(SQLCode, 64 | """ 65 | | 66 | | 67 | |""".stripMargin) 68 | 69 | final val fraction: Param[Double] = new Param[Double](this, name = "fraction", doc = "") 70 | setDefault(fraction, -1.0D) 71 | 72 | final val size: Param[Long] = new Param[Long](this, "size", "") 73 | setDefault(size, -1L) 74 | 75 | 76 | } 77 | -------------------------------------------------------------------------------- /mlsql-shell/.repo/desc.template.plugin: -------------------------------------------------------------------------------- 1 | moduleName=mlsql-shell-{{spark_binary_version}} 2 | mainClass=tech.mlsql.plugins.shell.app.MLSQLShell 3 | scala_version={{scala_binary_version}} 4 | spark_version={{spark_binary_version}} 5 | version=0.1.0-SNAPSHOT 6 | author=allwefantasy 7 | mlsqlVersions="" 8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-shell 9 | mlsqlPluginType=app 10 | desc=mlsql-shell 11 | 12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /mlsql-shell/.repo/pom.template.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | mlsql-plugins-{{spark_binary_version}}_{{scala_binary_version}} 7 | tech.mlsql 8 | 0.1.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | mlsql-shell-{{spark_binary_version}}_{{scala_binary_version}} 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | shade 21 | 22 | 23 | 24 | org.apache.maven.plugins 25 | maven-shade-plugin 26 | 3.2.0 27 | 28 | 29 | 30 | *:* 31 | 32 | META-INF/*.SF 33 | META-INF/*.DSA 34 | META-INF/*.RSA 35 | 36 | 37 | 38 | false 39 | 40 | 41 | org.apache.poi 42 | shadeio.poi 43 | 44 | 45 | com.norbitltd.spoiwo 46 | shadeio.spoiwo 47 | 48 | 49 | com.github.pjfanning 50 | shadeio.pjfanning 51 | 52 | 53 | org.apache.commons.compress 54 | shadeio.commons.compress 55 | 56 | 57 | 58 | 59 | 60 | 61 | package 62 | 63 | shade 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | -------------------------------------------------------------------------------- /mlsql-shell/README.md: -------------------------------------------------------------------------------- 1 | # mlsql-shell 2 | 3 | This plugin provide execute shell command in MLSQL Engine Driver Side. 4 | 5 | ![](http://store.mlsql.tech/upload_images/6d09a596-cb0a-495c-9a95-6bbcc63be9ab.png) 6 | 7 | ## Install from store 8 | 9 | Execute following command in web console: 10 | 11 | ``` 12 | !plugin app add - "mlsql-shell-2.4"; 13 | ``` 14 | 15 | Check installation: 16 | 17 | ``` 18 | !sh pip install pyjava; 19 | ``` 20 | 21 | 22 | ## Install Manually 23 | 24 | Firstly, build shade jar in your terminal: 25 | 26 | ```shell 27 | pip install mlsql_plugin_tool 28 | mlsql_plugin_tool build --module_name mlsql-shell --spark spark243 29 | ``` 30 | 31 | then change start script of MLSQL Engine, 32 | 33 | Add Jar: 34 | 35 | ``` 36 | --jars YOUR_JAR_PATH 37 | ``` 38 | 39 | Register Class: 40 | 41 | ``` 42 | -streaming.plugin.clzznames tech.mlsql.plugins.shell.app.MLSQLShell 43 | ``` 44 | 45 | If there are more than one class, use comma to seperate them. For example: 46 | 47 | ``` 48 | -streaming.plugin.clzznames classA,classB,classC 49 | ``` 50 | 51 | ## Usage 52 | 53 | ```sql 54 | !sh pip install pyjava; 55 | !sh echo "yes"; 56 | !sh wget "https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-shell"; 57 | ``` 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | -------------------------------------------------------------------------------- /mlsql-shell/desc.plugin: -------------------------------------------------------------------------------- 1 | moduleName=mlsql-shell-3.0 2 | mainClass=tech.mlsql.plugins.shell.app.MLSQLShell 3 | scala_version=2.12 4 | spark_version=3.0 5 | version=0.1.0-SNAPSHOT 6 | author=allwefantasy 7 | mlsqlVersions="" 8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-shell 9 | mlsqlPluginType=app 10 | desc=mlsql-shell 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /mlsql-shell/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | mlsql-plugins-3.0_2.12 7 | tech.mlsql 8 | 0.1.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | mlsql-shell-3.0_2.12 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | shade 21 | 22 | 23 | 24 | org.apache.maven.plugins 25 | maven-shade-plugin 26 | 3.2.0 27 | 28 | 29 | 30 | *:* 31 | 32 | META-INF/*.SF 33 | META-INF/*.DSA 34 | META-INF/*.RSA 35 | 36 | 37 | 38 | false 39 | 40 | 41 | org.apache.poi 42 | shadeio.poi 43 | 44 | 45 | com.norbitltd.spoiwo 46 | shadeio.spoiwo 47 | 48 | 49 | com.github.pjfanning 50 | shadeio.pjfanning 51 | 52 | 53 | org.apache.commons.compress 54 | shadeio.commons.compress 55 | 56 | 57 | 58 | 59 | 60 | 61 | package 62 | 63 | shade 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | -------------------------------------------------------------------------------- /mlsql-shell/src/main/java/tech/mlsql/plugins/shell/app/MLSQLShell.scala: -------------------------------------------------------------------------------- 1 | package tech.mlsql.plugins.shell.app 2 | 3 | import tech.mlsql.common.utils.log.Logging 4 | import tech.mlsql.dsl.CommandCollection 5 | import tech.mlsql.ets.register.ETRegister 6 | import tech.mlsql.plugins.shell.ets.{CopyFromLocal, ShellExecute} 7 | import tech.mlsql.version.VersionCompatibility 8 | 9 | /** 10 | * 2/6/2021 WilliamZhu(allwefantasy@gmail.com) 11 | */ 12 | class MLSQLShell extends tech.mlsql.app.App with VersionCompatibility with Logging { 13 | override def run(args: Seq[String]): Unit = { 14 | ETRegister.register("ShellExecute", classOf[ShellExecute].getName) 15 | CommandCollection.refreshCommandMapping(Map("sh" -> 16 | """ 17 | |run command as ShellExecute.`` where parameters='''{:all}''' 18 | |""".stripMargin)) 19 | 20 | ETRegister.register("CopyFromLocal", classOf[CopyFromLocal].getName) 21 | CommandCollection.refreshCommandMapping(Map("copyFromLocal" -> 22 | """ 23 | |run command as CopyFromLocal.`{1}` where src="{0}" 24 | |""".stripMargin)) 25 | } 26 | 27 | 28 | override def supportedVersions: Seq[String] = { 29 | MLSQLShell.versions 30 | } 31 | } 32 | 33 | object MLSQLShell { 34 | val versions = Seq("2.1.0", "2.1.0-SNAPSHOT", "2.0.0", "2.0.1") 35 | } 36 | -------------------------------------------------------------------------------- /mlsql-shell/src/main/java/tech/mlsql/plugins/shell/ets/CopyFromLocal.scala: -------------------------------------------------------------------------------- 1 | package tech.mlsql.plugins.shell.ets 2 | 3 | import org.apache.spark.sql.expressions.UserDefinedFunction 4 | import org.apache.spark.sql.{DataFrame, SparkSession} 5 | import streaming.dsl.ScriptSQLExec 6 | import streaming.dsl.auth._ 7 | import streaming.dsl.mmlib.SQLAlg 8 | import streaming.dsl.mmlib.algs.Functions 9 | import streaming.dsl.mmlib.algs.param.{BaseParams, WowParams} 10 | import tech.mlsql.common.utils.serder.json.JSONTool 11 | import tech.mlsql.dsl.auth.ETAuth 12 | import tech.mlsql.dsl.auth.dsl.mmlib.ETMethod.ETMethod 13 | import tech.mlsql.plugins.shell.app.MLSQLShell 14 | import tech.mlsql.tool.HDFSOperatorV2 15 | import tech.mlsql.version.VersionCompatibility 16 | 17 | /** 18 | * 2/6/2021 WilliamZhu(allwefantasy@gmail.com) 19 | */ 20 | class CopyFromLocal(override val uid: String) extends SQLAlg 21 | with VersionCompatibility with Functions with WowParams with ETAuth { 22 | def this() = this(BaseParams.randomUID()) 23 | 24 | /** 25 | * !copyFromLocal src dst; 26 | */ 27 | override def train(df: DataFrame, path: String, params: Map[String, String]): DataFrame = { 28 | 29 | HDFSOperatorV2.copyToHDFS(params("src"), path, false, false) 30 | import df.sparkSession.implicits._ 31 | df.sparkSession.createDataset[String](Seq().toSeq).toDF("content") 32 | } 33 | 34 | override def skipPathPrefix: Boolean = false 35 | 36 | override def load(sparkSession: SparkSession, path: String, params: Map[String, String]): Any = ??? 37 | 38 | override def predict(sparkSession: SparkSession, _model: Any, name: String, params: Map[String, String]): UserDefinedFunction = ??? 39 | 40 | override def supportedVersions: Seq[String] = MLSQLShell.versions 41 | 42 | override def auth(etMethod: ETMethod, path: String, params: Map[String, String]): List[TableAuthResult] = { 43 | val vtable = MLSQLTable( 44 | db = Option(DB_DEFAULT.MLSQL_SYSTEM.toString), 45 | table = Option("__copy_from_local__"), 46 | operateType = OperateType.EMPTY, 47 | sourceType = Option("_mlsql_"), 48 | tableType = TableType.SYSTEM) 49 | 50 | val context = ScriptSQLExec.contextGetOrForTest() 51 | context.execListener.getTableAuth match { 52 | case Some(tableAuth) => 53 | tableAuth.auth(List(vtable)) 54 | case None => List(TableAuthResult(true, "")) 55 | } 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /mlsql-shell/src/main/java/tech/mlsql/plugins/shell/ets/ShellExecute.scala: -------------------------------------------------------------------------------- 1 | package tech.mlsql.plugins.shell.ets 2 | 3 | import org.apache.spark.sql.expressions.UserDefinedFunction 4 | import org.apache.spark.sql.{DataFrame, SparkSession} 5 | import streaming.dsl.ScriptSQLExec 6 | import streaming.dsl.auth._ 7 | import streaming.dsl.mmlib.SQLAlg 8 | import streaming.dsl.mmlib.algs.Functions 9 | import streaming.dsl.mmlib.algs.param.{BaseParams, WowParams} 10 | import tech.mlsql.common.utils.serder.json.JSONTool 11 | import tech.mlsql.common.utils.shell.ShellCommand 12 | import tech.mlsql.dsl.auth.ETAuth 13 | import tech.mlsql.dsl.auth.dsl.mmlib.ETMethod.ETMethod 14 | import tech.mlsql.plugins.shell.app.MLSQLShell 15 | import tech.mlsql.version.VersionCompatibility 16 | 17 | import scala.collection.mutable.ArrayBuffer 18 | 19 | /** 20 | * 2/6/2021 WilliamZhu(allwefantasy@gmail.com) 21 | */ 22 | class ShellExecute(override val uid: String) extends SQLAlg 23 | with VersionCompatibility with Functions with WowParams with ETAuth { 24 | def this() = this(BaseParams.randomUID()) 25 | 26 | /** 27 | * !sh pip install pyjava; 28 | */ 29 | override def train(df: DataFrame, path: String, params: Map[String, String]): DataFrame = { 30 | val args = JSONTool.parseJson[List[String]](params("parameters")) 31 | import df.sparkSession.implicits._ 32 | 33 | args.head match { 34 | case "script" => 35 | val res = ShellCommand.exec(args.last) 36 | df.sparkSession.createDataset[String](Seq(res)).toDF("content") 37 | case _ => 38 | 39 | val process = os.proc(args).spawn() 40 | val result = ArrayBuffer[String]() 41 | 42 | var errLine = process.stderr.readLine() 43 | 44 | while (errLine != null) { 45 | logInfo(format(errLine)) 46 | result.append(errLine) 47 | errLine = process.stderr.readLine() 48 | } 49 | 50 | 51 | var line = process.stdout.readLine() 52 | while (line != null) { 53 | logInfo(format(line)) 54 | result.append(line) 55 | line = process.stdout.readLine() 56 | } 57 | 58 | df.sparkSession.createDataset[String](result.toSeq).toDF("content") 59 | } 60 | 61 | 62 | } 63 | 64 | override def skipPathPrefix: Boolean = false 65 | 66 | override def load(sparkSession: SparkSession, path: String, params: Map[String, String]): Any = ??? 67 | 68 | override def predict(sparkSession: SparkSession, _model: Any, name: String, params: Map[String, String]): UserDefinedFunction = ??? 69 | 70 | override def supportedVersions: Seq[String] = MLSQLShell.versions 71 | 72 | override def auth(etMethod: ETMethod, path: String, params: Map[String, String]): List[TableAuthResult] = { 73 | val vtable = MLSQLTable( 74 | db = Option(DB_DEFAULT.MLSQL_SYSTEM.toString), 75 | table = Option("__shell_execute__"), 76 | operateType = OperateType.EMPTY, 77 | sourceType = Option("_mlsql_"), 78 | tableType = TableType.SYSTEM) 79 | 80 | val context = ScriptSQLExec.contextGetOrForTest() 81 | context.execListener.getTableAuth match { 82 | case Some(tableAuth) => 83 | tableAuth.auth(List(vtable)) 84 | case None => List(TableAuthResult(true, "")) 85 | } 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /run-script/.repo/pom.template.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | mlsql-plugins-{{spark_binary_version}}_{{scala_binary_version}} 7 | tech.mlsql 8 | 0.1.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | run-script-${spark.binary.version}_${scala.binary.version} 13 | 14 | 15 | -------------------------------------------------------------------------------- /run-script/README.md: -------------------------------------------------------------------------------- 1 | ## Install 2 | 3 | ```sql 4 | !plugin et add - "run-script-2.4" named runScript; 5 | ``` 6 | 7 | ## Usage 8 | 9 | ```sql 10 | set code1=''' 11 | select 1 as a as b; 12 | '''; 13 | !runScript '''${code1}''' named output; 14 | ``` -------------------------------------------------------------------------------- /run-script/desc.plugin: -------------------------------------------------------------------------------- 1 | moduleName=run-script-2.4 2 | mainClass=tech.mlsql.plugins.et.RunScript 3 | version=0.1.0-SNAPSHOT 4 | author=allwefantasy 5 | mlsqlVersions="1.5.0,1.5.0-SNAPSHOT,1.6.0,1.6.0-SNAPSHOT" 6 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/run-script 7 | scala_version=2.11 8 | spark_version=2.4 9 | mlsqlPluginType=et 10 | desc=wow 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /run-script/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | mlsql-plugins-3.0_2.12 7 | tech.mlsql 8 | 0.1.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | run-script-${spark.binary.version}_${scala.binary.version} 13 | 14 | 15 | -------------------------------------------------------------------------------- /run-script/src/main/java/tech/mlsql/plugins/et/RunScript.scala: -------------------------------------------------------------------------------- 1 | package tech.mlsql.plugins.et 2 | 3 | import org.apache.spark.sql.expressions.UserDefinedFunction 4 | import org.apache.spark.sql.{DataFrame, SparkSession} 5 | import streaming.dsl.ScriptSQLExec 6 | import streaming.dsl.auth.TableAuthResult 7 | import streaming.dsl.mmlib._ 8 | import streaming.dsl.mmlib.algs.Functions 9 | import streaming.dsl.mmlib.algs.param.{BaseParams, WowParams} 10 | import tech.mlsql.common.utils.serder.json.JSONTool 11 | import tech.mlsql.dsl.auth.ETAuth 12 | import tech.mlsql.dsl.auth.dsl.mmlib.ETMethod.ETMethod 13 | import tech.mlsql.ets.ScriptRunner 14 | import tech.mlsql.version.VersionCompatibility 15 | 16 | 17 | class RunScript(override val uid: String) extends SQLAlg with VersionCompatibility with Functions with WowParams with ETAuth { 18 | def this() = this(BaseParams.randomUID()) 19 | 20 | // 21 | override def train(df: DataFrame, path: String, params: Map[String, String]): DataFrame = { 22 | 23 | val context = ScriptSQLExec.context() 24 | val command = JSONTool.parseJson[List[String]](params("parameters")).toArray 25 | val sparkOpt = Option(df.sparkSession) 26 | command match { 27 | case Array(script, "named", tableName) => 28 | var jobRes: DataFrame = ScriptRunner.rubSubJob( 29 | script, 30 | (_df: DataFrame) => {}, 31 | sparkOpt, 32 | true, 33 | true).get 34 | jobRes.createOrReplaceTempView(tableName) 35 | jobRes 36 | case _ => throw new RuntimeException("try !runScript code named table1") 37 | } 38 | 39 | } 40 | 41 | override def auth(etMethod: ETMethod, path: String, params: Map[String, String]): List[TableAuthResult] = { 42 | List() 43 | } 44 | 45 | override def supportedVersions: Seq[String] = { 46 | Seq("1.5.0-SNAPSHOT", "1.5.0", "1.6.0-SNAPSHOT", "1.6.0") 47 | } 48 | 49 | 50 | override def doc: Doc = Doc(MarkDownDoc, 51 | s""" 52 | |When you want to get the result from command and used 53 | | in next command(SQL), you can use !last command. 54 | | 55 | |For example: 56 | | 57 | |``` 58 | |${codeExample.code} 59 | |``` 60 | """.stripMargin) 61 | 62 | 63 | override def codeExample: Code = Code(SQLCode, 64 | """ 65 | |!hdfs /tmp; 66 | |!last named hdfsTmpTable; 67 | |select * from hdfsTmpTable; 68 | """.stripMargin) 69 | 70 | override def batchPredict(df: DataFrame, path: String, params: Map[String, String]): DataFrame = train(df, path, params) 71 | 72 | override def load(sparkSession: SparkSession, path: String, params: Map[String, String]): Any = ??? 73 | 74 | override def predict(sparkSession: SparkSession, _model: Any, name: String, params: Map[String, String]): UserDefinedFunction = ??? 75 | 76 | 77 | } 78 | -------------------------------------------------------------------------------- /save-then-load/.repo/pom.template.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | mlsql-plugins-{{spark_binary_version}}_{{scala_binary_version}} 7 | tech.mlsql 8 | 0.1.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | save-then-load-${spark.binary.version}_${scala.binary.version} 13 | 14 | 15 | -------------------------------------------------------------------------------- /save-then-load/README.md: -------------------------------------------------------------------------------- 1 | ## Install 2 | 3 | ```sql 4 | !plugin et add - "save-then-load-2.4" named saveThenLoad; 5 | ``` 6 | 7 | ## Usage 8 | 9 | This plugin will save the table into delta table and load it again. 10 | 11 | ```sql 12 | !saveThenLoad tableName; 13 | select * from tableName as output; 14 | ``` -------------------------------------------------------------------------------- /save-then-load/desc.plugin: -------------------------------------------------------------------------------- 1 | moduleName=save-then-load-2.4 2 | mainClass=tech.mlsql.plugins.et.SaveThenLoad 3 | version=0.1.0-SNAPSHOT 4 | author=allwefantasy 5 | mlsqlVersions="1.5.0,1.5.0-SNAPSHOT,1.6.0,1.6.0-SNAPSHOT" 6 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/save-then-load 7 | scala_version=2.11 8 | spark_version=2.4 9 | mlsqlPluginType=et 10 | desc=wow 11 | 12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /save-then-load/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | mlsql-plugins-3.0_2.12 7 | tech.mlsql 8 | 0.1.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | save-then-load-${spark.binary.version}_${scala.binary.version} 13 | 14 | 15 | -------------------------------------------------------------------------------- /save-then-load/src/main/java/tech/mlsql/plugins/et/SaveThenLoad.scala: -------------------------------------------------------------------------------- 1 | package tech.mlsql.plugins.et 2 | 3 | import org.apache.spark.sql.expressions.UserDefinedFunction 4 | import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession} 5 | import streaming.core.datasource.impl.MLSQLDelta 6 | import streaming.core.datasource.{DataSinkConfig, DataSourceConfig} 7 | import streaming.dsl.auth.TableAuthResult 8 | import streaming.dsl.mmlib._ 9 | import streaming.dsl.mmlib.algs.Functions 10 | import streaming.dsl.mmlib.algs.param.{BaseParams, WowParams} 11 | import tech.mlsql.common.utils.serder.json.JSONTool 12 | import tech.mlsql.dsl.auth.ETAuth 13 | import tech.mlsql.dsl.auth.dsl.mmlib.ETMethod.ETMethod 14 | import tech.mlsql.version.VersionCompatibility 15 | 16 | /** 17 | * 13/1/2020 WilliamZhu(allwefantasy@gmail.com) 18 | */ 19 | class SaveThenLoad(override val uid: String) extends SQLAlg with VersionCompatibility with Functions with WowParams with ETAuth { 20 | def this() = this(BaseParams.randomUID()) 21 | 22 | override def train(df: DataFrame, path: String, params: Map[String, String]): DataFrame = { 23 | val command = JSONTool.parseJson[List[String]](params("parameters")).toArray 24 | val session = df.sparkSession 25 | command match { 26 | case Array(tableName) => 27 | val ds = new MLSQLDelta() 28 | ds.save(session.table(tableName).write, DataSinkConfig(s"__tmp__.${tableName}", Map(), SaveMode.Overwrite, Option(df))) 29 | val newDF = ds.load(session.read, DataSourceConfig(s"__tmp__.${tableName}", Map(), Option(df))) 30 | newDF.createOrReplaceTempView(tableName) 31 | newDF 32 | case _ => throw new RuntimeException("!saveThenLoad tableName;") 33 | } 34 | } 35 | 36 | override def auth(etMethod: ETMethod, path: String, params: Map[String, String]): List[TableAuthResult] = { 37 | List() 38 | } 39 | 40 | override def supportedVersions: Seq[String] = { 41 | Seq("1.5.0-SNAPSHOT", "1.5.0", "1.6.0-SNAPSHOT", "1.6.0") 42 | } 43 | 44 | 45 | override def doc: Doc = Doc(MarkDownDoc, 46 | s""" 47 | | 48 | |``` 49 | |${codeExample.code} 50 | |``` 51 | """.stripMargin) 52 | 53 | 54 | override def codeExample: Code = Code(SQLCode, 55 | """ 56 | |example 57 | """.stripMargin) 58 | 59 | override def batchPredict(df: DataFrame, path: String, params: Map[String, String]): DataFrame = train(df, path, params) 60 | 61 | override def load(sparkSession: SparkSession, path: String, params: Map[String, String]): Any = ??? 62 | 63 | override def predict(sparkSession: SparkSession, _model: Any, name: String, params: Map[String, String]): UserDefinedFunction = ??? 64 | 65 | 66 | } 67 | -------------------------------------------------------------------------------- /stream-persist/.repo/desc.template.plugin: -------------------------------------------------------------------------------- 1 | moduleName=stream-persist-app-{{spark_binary_version}} 2 | mainClass=tech.mlsq.streambootstrapatstartup.StreamApp 3 | scala_version={{scala_binary_version}} 4 | spark_version={{spark_binary_version}} 5 | version=0.1.0-SNAPSHOT 6 | author=allwefantasy 7 | mlsqlVersions="1.5.0,1.5.0-SNAPSHOT,1.6.0,1.6.0-SNAPSHOT" 8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/stream-boostrap-at-startup 9 | mlsqlPluginType=app 10 | desc=wow 11 | 12 | -------------------------------------------------------------------------------- /stream-persist/.repo/pom.template.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | mlsql-plugins-{{spark_binary_version}}_{{scala_binary_version}} 7 | tech.mlsql 8 | 0.1.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | stream-persist-${spark.binary.version}_${scala.binary.version} 13 | 14 | -------------------------------------------------------------------------------- /stream-persist/README.md: -------------------------------------------------------------------------------- 1 | ## Install command: 2 | 3 | ``` 4 | !plugin app add - "stream-persist-app-2.4"; 5 | ``` 6 | 7 | The first plugin is APP plugin and the second is ET plugin. 8 | 9 | 10 | ## Usage 11 | 12 | Use ET Plugin to persist stream job. 13 | 14 | ```sql 15 | !streamPersist persist streamExample; 16 | 17 | !streamPersist remove streamExample; 18 | 19 | !streamPersist list; 20 | ``` 21 | 22 | And then once MLSQL Engine is restarted, and the stream job streamExample will be 23 | boosted at the startup of MLSQL. 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /stream-persist/db.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE `w_streams` ( 2 | `id` int(11) unsigned NOT NULL AUTO_INCREMENT, 3 | `name` varchar(256) DEFAULT NULL, 4 | `content` text, 5 | `owner` varchar(256) DEFAULT NULL, 6 | `home` varchar(256) DEFAULT NULL, 7 | PRIMARY KEY (`id`) 8 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8; -------------------------------------------------------------------------------- /stream-persist/desc.plugin: -------------------------------------------------------------------------------- 1 | moduleName=stream-persist-app-3.0 2 | mainClass=tech.mlsq.streambootstrapatstartup.StreamApp 3 | scala_version=2.12 4 | spark_version=3.0 5 | version=0.1.0-SNAPSHOT 6 | author=allwefantasy 7 | mlsqlVersions="1.5.0,1.5.0-SNAPSHOT,1.6.0,1.6.0-SNAPSHOT" 8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/stream-boostrap-at-startup 9 | mlsqlPluginType=app 10 | desc=wow 11 | -------------------------------------------------------------------------------- /stream-persist/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | mlsql-plugins-3.0_2.12 7 | tech.mlsql 8 | 0.1.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | stream-persist-${spark.binary.version}_${scala.binary.version} 13 | 14 | -------------------------------------------------------------------------------- /stream-persist/src/main/java/tech/mlsq/streambootstrapatstartup/StreamApp.scala: -------------------------------------------------------------------------------- 1 | package tech.mlsq.streambootstrapatstartup 2 | 3 | import _root_.streaming.core.strategy.platform.{PlatformManager, SparkRuntime} 4 | import _root_.streaming.dsl.{MLSQLExecuteContext, ScriptSQLExec, ScriptSQLExecListener} 5 | import org.apache.spark.sql.SparkSession 6 | import tech.mlsql.common.utils.log.Logging 7 | import tech.mlsql.dsl.CommandCollection 8 | import tech.mlsql.ets.ScriptRunner 9 | import tech.mlsql.ets.register.ETRegister 10 | import tech.mlsql.job.{JobManager, MLSQLJobType} 11 | import tech.mlsql.store.DBStore 12 | import tech.mlsql.version.VersionCompatibility 13 | 14 | /** 15 | * 2019-09-20 WilliamZhu(allwefantasy@gmail.com) 16 | */ 17 | class StreamApp extends tech.mlsql.app.App with VersionCompatibility with Logging { 18 | 19 | 20 | override def run(args: Seq[String]): Unit = { 21 | val root = runtime.sparkSession 22 | import root.implicits._ 23 | 24 | ETRegister.register("StreamPersistCommand", classOf[StreamPersistCommand].getName) 25 | CommandCollection.refreshCommandMapping(Map("streamPersist" -> "StreamPersistCommand")) 26 | 27 | val thread = new Thread("start MLSQL stream") { 28 | override def run(): Unit = { 29 | while (!PlatformManager.RUNTIME_IS_READY.get()) { 30 | Thread.sleep(3000) 31 | logInfo("Waiting MLSQL runtime ready to start streams.") 32 | } 33 | logInfo("Starting to start streams.") 34 | val streams = DBStore.store.tryReadTable(root, StreamAppConfig.TABLE, () => root.createDataset[Stream](Seq()).toDF()) 35 | streams.as[Stream].collect().foreach { stream => 36 | val session = getSessionByOwner(stream.owner) 37 | val job = JobManager.getJobInfo(stream.owner, stream.name, MLSQLJobType.STREAM, stream.content, -1) 38 | setUpScriptSQLExecListener(stream.owner, session, job.groupId, stream.home) 39 | ScriptRunner.runJob(stream.content, job, (df) => { 40 | 41 | }) 42 | } 43 | } 44 | } 45 | thread.start() 46 | 47 | } 48 | 49 | def setUpScriptSQLExecListener(owner: String, sparkSession: SparkSession, groupId: String, home: String) = { 50 | val context = new ScriptSQLExecListener(sparkSession, "", Map[String, String](owner -> home)) 51 | ScriptSQLExec.setContext(new MLSQLExecuteContext(context, owner, context.pathPrefix(None), groupId, Map())) 52 | context.addEnv("SKIP_AUTH", "true") 53 | context.addEnv("HOME", context.pathPrefix(None)) 54 | context.addEnv("OWNER", owner) 55 | context 56 | } 57 | 58 | def getSessionByOwner(owner: String) = { 59 | runtime.getSession(owner) 60 | } 61 | 62 | def runtime = { 63 | PlatformManager.getRuntime.asInstanceOf[SparkRuntime] 64 | } 65 | 66 | override def supportedVersions: Seq[String] = { 67 | Seq("1.5.0-SNAPSHOT", "1.5.0", "1.6.0-SNAPSHOT", "1.6.0") 68 | } 69 | } 70 | 71 | object StreamAppConfig { 72 | val TABLE = "__mlsql__.streams" 73 | } 74 | 75 | case class Stream(name: String, content: String, owner: String, home: String) 76 | 77 | object StreamApp { 78 | } 79 | -------------------------------------------------------------------------------- /stream-persist/src/main/java/tech/mlsq/streambootstrapatstartup/StreamPersistCommand.scala: -------------------------------------------------------------------------------- 1 | package tech.mlsq.streambootstrapatstartup 2 | 3 | import _root_.streaming.dsl.ScriptSQLExec 4 | import _root_.streaming.dsl.mmlib.SQLAlg 5 | import _root_.streaming.dsl.mmlib.algs.Functions 6 | import _root_.streaming.dsl.mmlib.algs.param.{BaseParams, WowParams} 7 | import org.apache.spark.sql.expressions.UserDefinedFunction 8 | import org.apache.spark.sql.{DataFrame, SparkSession} 9 | import org.apache.spark.sql.mlsql.session.MLSQLException 10 | import tech.mlsql.common.utils.serder.json.JSONTool 11 | import tech.mlsql.datalake.DataLake 12 | import tech.mlsql.job.JobManager 13 | import tech.mlsql.store.DBStore 14 | import tech.mlsql.version.VersionCompatibility 15 | 16 | /** 17 | * 2019-09-20 WilliamZhu(allwefantasy@gmail.com) 18 | */ 19 | class StreamPersistCommand(override val uid: String) extends SQLAlg with VersionCompatibility with Functions with WowParams { 20 | def this() = this(BaseParams.randomUID()) 21 | 22 | override def train(df: DataFrame, path: String, params: Map[String, String]): DataFrame = { 23 | val spark = df.sparkSession 24 | val dataLake = new DataLake(spark) 25 | require(dataLake.isEnable, "data lake should be enabled.") 26 | import spark.implicits._ 27 | 28 | val command = JSONTool.parseJson[List[String]](params("parameters")) 29 | command match { 30 | case Seq("persist", streamName) => 31 | JobManager.getJobInfo.filter(f => f._2.jobName == streamName).map(f => f._2).headOption match { 32 | case Some(item) => 33 | val data = spark.createDataset(Seq(Stream(streamName, item.jobContent, item.owner, ScriptSQLExec.context().home))) 34 | DBStore.store.saveTable(spark, data.toDF(), StreamAppConfig.TABLE, Option("name"), false) 35 | DBStore.store.readTable(spark, StreamAppConfig.TABLE) 36 | case None => throw new MLSQLException(s"not stream ${streamName} exists") 37 | } 38 | case Seq("remove", streamName) => 39 | DBStore.store.saveTable(spark, spark.createDataset[Stream](Seq(Stream(streamName, null, null, null))).toDF(), StreamAppConfig.TABLE, Option("name"), true) 40 | DBStore.store.readTable(spark, StreamAppConfig.TABLE) 41 | 42 | case Seq("list") => 43 | DBStore.store.readTable(spark, StreamAppConfig.TABLE) 44 | } 45 | 46 | } 47 | 48 | 49 | override def supportedVersions: Seq[String] = { 50 | Seq("1.5.0-SNAPSHOT", "1.5.0", "1.6.0-SNAPSHOT", "1.6.0") 51 | } 52 | 53 | override def batchPredict(df: DataFrame, path: String, params: Map[String, String]): DataFrame = train(df, path, params) 54 | 55 | override def load(sparkSession: SparkSession, path: String, params: Map[String, String]): Any = ??? 56 | 57 | override def predict(sparkSession: SparkSession, _model: Any, name: String, params: Map[String, String]): UserDefinedFunction = ??? 58 | } 59 | -------------------------------------------------------------------------------- /table-repartition/.repo/desc.template.plugin: -------------------------------------------------------------------------------- 1 | moduleName=table-repartition-{{spark_binary_version}} 2 | mainClass=tech.mlsql.plugins.et.TableRepartition 3 | scala_version={{scala_binary_version}} 4 | spark_version={{spark_binary_version}} 5 | version=0.1.0-SNAPSHOT 6 | author=allwefantasy 7 | mlsqlVersions="1.5.0,1.5.0-SNAPSHOT,1.6.0,1.6.0-SNAPSHOT" 8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/table-repartition 9 | mlsqlPluginType=et 10 | desc=wow -------------------------------------------------------------------------------- /table-repartition/.repo/pom.template.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | mlsql-plugins-{{spark_binary_version}}_{{scala_binary_version}} 7 | tech.mlsql 8 | 0.1.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | table-repartition-${spark.binary.version}_${scala.binary.version} 13 | 14 | 15 | -------------------------------------------------------------------------------- /table-repartition/README.md: -------------------------------------------------------------------------------- 1 | ## Install 2 | 3 | ```sql 4 | !plugin et add - "table-repartition-2.4"; 5 | ``` 6 | 7 | ## Usage 8 | 9 | ```sql 10 | set rawText=''' 11 | {"id":9,"content":"Spark好的语言1","label":0.0} 12 | {"id":10,"content":"MLSQL是一个好的语言7","label":0.0} 13 | {"id":13,"content":"MLSQL是一个好的语言7","label":0.0} 14 | '''; 15 | 16 | load jsonStr.`rawText` as orginal_text_corpus; 17 | 18 | select id,content,label from orginal_text_corpus as orginal_text_corpus1; 19 | run orginal_text_corpus1 as TableRepartition.`` where partitionNum="2" and partitionType="range" and partitionCols="id" 20 | as newtable; 21 | ``` -------------------------------------------------------------------------------- /table-repartition/desc.plugin: -------------------------------------------------------------------------------- 1 | moduleName=table-repartition-3.0 2 | mainClass=tech.mlsql.plugins.et.TableRepartition 3 | scala_version=2.12 4 | spark_version=3.0 5 | version=0.1.0-SNAPSHOT 6 | author=allwefantasy 7 | mlsqlVersions="1.5.0,1.5.0-SNAPSHOT,1.6.0,1.6.0-SNAPSHOT" 8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/table-repartition 9 | mlsqlPluginType=et 10 | desc=wow -------------------------------------------------------------------------------- /table-repartition/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | mlsql-plugins-3.0_2.12 7 | tech.mlsql 8 | 0.1.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | table-repartition-${spark.binary.version}_${scala.binary.version} 13 | 14 | 15 | -------------------------------------------------------------------------------- /table-repartition/src/main/java/tech/mlsql/plugins/et/TableRepartition.scala: -------------------------------------------------------------------------------- 1 | package tech.mlsql.plugins.et 2 | 3 | import org.apache.spark.ml.param.{IntParam, Param} 4 | import org.apache.spark.sql.expressions.UserDefinedFunction 5 | import org.apache.spark.sql.mlsql.session.MLSQLException 6 | import org.apache.spark.sql.{DataFrame, SparkSession, functions => F} 7 | import streaming.dsl.auth.TableAuthResult 8 | import streaming.dsl.mmlib._ 9 | import streaming.dsl.mmlib.algs.Functions 10 | import streaming.dsl.mmlib.algs.param.{BaseParams, WowParams} 11 | import tech.mlsql.dsl.auth.ETAuth 12 | import tech.mlsql.dsl.auth.dsl.mmlib.ETMethod.ETMethod 13 | import tech.mlsql.version.VersionCompatibility 14 | 15 | 16 | class TableRepartition(override val uid: String) extends SQLAlg with VersionCompatibility with Functions with WowParams with ETAuth { 17 | def this() = this(BaseParams.randomUID()) 18 | 19 | // 20 | override def train(df: DataFrame, path: String, params: Map[String, String]): DataFrame = { 21 | 22 | params.get(partitionNum.name).map { item => 23 | set(partitionNum, item.toInt) 24 | item 25 | }.getOrElse { 26 | throw new MLSQLException(s"${partitionNum.name} is required") 27 | } 28 | 29 | params.get(partitionType.name).map { item => 30 | set(partitionType, item) 31 | item 32 | }.getOrElse { 33 | set(partitionType, "hash") 34 | } 35 | 36 | params.get(partitionCols.name).map { item => 37 | set(partitionCols, item) 38 | item 39 | }.getOrElse { 40 | set(partitionCols, "") 41 | } 42 | 43 | $(partitionType) match { 44 | case "range" => 45 | 46 | require(params.contains(partitionCols.name), "At least one partition-by expression must be specified.") 47 | df.repartitionByRange($(partitionNum), $(partitionCols).split(",").map(name => F.col(name)): _*) 48 | 49 | case _ => 50 | df.repartition($(partitionNum)) 51 | } 52 | 53 | 54 | } 55 | 56 | override def auth(etMethod: ETMethod, path: String, params: Map[String, String]): List[TableAuthResult] = { 57 | List() 58 | } 59 | 60 | override def supportedVersions: Seq[String] = { 61 | Seq("1.5.0-SNAPSHOT", "1.5.0", "1.6.0-SNAPSHOT", "1.6.0") 62 | } 63 | 64 | 65 | override def doc: Doc = Doc(MarkDownDoc, 66 | s""" 67 | | 68 | """.stripMargin) 69 | 70 | 71 | override def codeExample: Code = Code(SQLCode, 72 | """ 73 | | 74 | """.stripMargin) 75 | 76 | override def batchPredict(df: DataFrame, path: String, params: Map[String, String]): DataFrame = train(df, path, params) 77 | 78 | override def load(sparkSession: SparkSession, path: String, params: Map[String, String]): Any = ??? 79 | 80 | override def predict(sparkSession: SparkSession, _model: Any, name: String, params: Map[String, String]): UserDefinedFunction = ??? 81 | 82 | final val partitionNum: IntParam = new IntParam(this, "partitionNum", 83 | "") 84 | final val partitionType: Param[String] = new Param[String](this, "partitionType", 85 | "") 86 | 87 | final val partitionCols: Param[String] = new Param[String](this, "partitionCols", 88 | "") 89 | 90 | override def explainParams(sparkSession: SparkSession): DataFrame = _explainParams(sparkSession) 91 | 92 | } 93 | --------------------------------------------------------------------------------