├── .gitignore
├── .repo
└── pom.template.xml
├── README.md
├── binlog2delta
├── .DS_Store
├── .repo
│ ├── desc.template.plugin
│ └── pom.template.xml
├── README.md
├── desc.plugin
├── pom.xml
└── src
│ ├── .DS_Store
│ └── main
│ ├── .DS_Store
│ ├── java
│ ├── .DS_Store
│ └── tech
│ │ └── mlsql
│ │ └── plugins
│ │ └── binlog2delta
│ │ └── JavaDoc.java
│ └── resources
│ ├── main.mlsql
│ └── plugin.json
├── connect-persist
├── .repo
│ ├── desc.template.plugin
│ └── pom.template.xml
├── README.md
├── db.sql
├── desc.plugin
├── pom.xml
└── src
│ └── main
│ └── java
│ └── tech
│ └── mlsql
│ └── plugins
│ ├── et
│ └── ConnectPersistCommand.scala
│ └── mllib
│ └── ConnectPersistApp.scala
├── delta-enhancer
├── .repo
│ └── pom.template.xml
├── README.md
├── desc.plugin
├── pom.xml
└── src
│ └── main
│ └── java
│ └── tech
│ └── mlsql
│ └── plugin
│ └── et
│ └── DeltaCommand.scala
├── desktop-publish.sh
├── dev
├── change-scala-version.sh
├── change-version-to-2.11.sh
└── change-version-to-2.12.sh
├── ds-hbase-2x
├── .repo
│ ├── desc.template.plugin
│ └── pom.template.xml
├── README.md
├── desc.plugin
├── pom.xml
└── src
│ └── main
│ └── java
│ ├── org
│ └── apache
│ │ └── spark
│ │ └── sql
│ │ └── execution
│ │ └── datasources
│ │ └── hbase2x
│ │ ├── DefaultSource.scala
│ │ ├── HBaseConfBuilder.scala
│ │ ├── HBaseType.scala
│ │ ├── JavaDoc.java
│ │ └── SparkHBaseConf.scala
│ └── tech
│ └── mlsql
│ └── plugins
│ └── ds
│ └── MLSQLHBase2x.scala
├── echo-controller
├── .repo
│ └── pom.template.xml
├── README.md
├── desc.plugin
├── pom.xml
└── src
│ └── main
│ └── java
│ └── tech
│ └── mlsql
│ └── plugins
│ └── mllib
│ └── echocontroller
│ └── StreamApp.scala
├── install-all.sh
├── install.sh
├── last-command
├── .repo
│ └── pom.template.xml
├── README.md
├── desc.plugin
├── pom.xml
└── src
│ └── main
│ └── java
│ └── tech
│ └── mlsql
│ └── plugins
│ └── et
│ └── LastCommand.scala
├── mlsql-analysis-toolkit
├── .repo
│ └── pom.template.xml
├── README.md
├── desc.plugin
├── pom.xml
└── src
│ └── main
│ └── java
│ └── tech
│ └── mlsql
│ └── plugins
│ └── analysis
│ ├── AnalysisApp.scala
│ ├── ApproxQuantile.scala
│ └── DFTool.scala
├── mlsql-assert
├── .repo
│ ├── desc.template.plugin
│ └── pom.template.xml
├── README.md
├── desc.plugin
├── pom.xml
└── src
│ └── main
│ └── java
│ └── tech
│ └── mlsql
│ └── plugins
│ └── assert
│ ├── app
│ └── MLSQLAssert.scala
│ └── ets
│ ├── Assert.scala
│ └── MLSQLThrow.scala
├── mlsql-bigdl
├── .repo
│ ├── desc.template.plugin
│ └── pom.template.xml
├── README.md
├── desc.plugin
├── pom.xml
└── src
│ └── main
│ └── java
│ ├── com
│ └── intel
│ │ └── analytics
│ │ └── bigdl
│ │ └── visualization
│ │ ├── LogTrainSummary.scala
│ │ └── WowFileWriter.scala
│ └── tech
│ └── mlsql
│ └── plugins
│ └── bigdl
│ ├── BigDLApp.scala
│ ├── BigDLFunctions.scala
│ ├── OptimizeParamExtractor.scala
│ ├── SQLBigDLClassifyExt.scala
│ ├── SQLImageLoaderExt.scala
│ ├── SQLLeNet5Ext.scala
│ ├── SQLMnistLoaderExt.scala
│ ├── WowClassNLLCriterion.scala
│ └── WowLoggerFilter.scala
├── mlsql-canal
├── .repo
│ ├── desc.template.plugin
│ └── pom.template.xml
├── README.md
├── desc.plugin
├── pom.xml
└── src
│ └── main
│ └── scala
│ └── tech
│ └── mlsql
│ └── plugins
│ └── canal
│ ├── CanalApp.scala
│ ├── ets
│ └── BinlogToDelta.scala
│ ├── mysql
│ ├── JdbcTypeParser.scala
│ ├── MysqlType.java
│ └── statement
│ │ └── DDLStatementParser.scala
│ ├── sink
│ ├── BinlogConstants.scala
│ ├── BinlogWritter.scala
│ ├── DeltaSink.scala
│ └── Sink.scala
│ └── util
│ └── JacksonUtil.scala
├── mlsql-cli
├── .repo
│ ├── desc.template.plugin
│ └── pom.template.xml
├── desc.plugin
├── pom.xml
└── src
│ └── main
│ └── java
│ └── tech
│ └── mlsql
│ └── plugin
│ └── cli
│ └── app
│ ├── CliCommands.java
│ ├── CliException.java
│ ├── CliExceptionUtils.java
│ ├── MLSQLCli.java
│ └── MLSQLCmd.java
├── mlsql-cube
├── .repo
│ ├── desc.template.plugin
│ └── pom.template.xml
├── desc.plugin
└── pom.xml
├── mlsql-ds
├── .repo
│ ├── desc.template.plugin
│ └── pom.template.xml
├── desc.plugin
├── pom.xml
└── src
│ └── main
│ └── java
│ └── tech
│ └── mlsql
│ └── plugins
│ └── ds
│ └── app
│ ├── MLSQLDs.scala
│ └── MLSQLXml.scala
├── mlsql-excel
├── .repo
│ ├── desc.template.plugin
│ └── pom.template.xml
├── README.md
├── desc.plugin
├── pom.xml
└── src
│ └── main
│ └── java
│ ├── com
│ └── crealytics
│ │ └── spark
│ │ └── excel
│ │ ├── DataColumn.scala
│ │ ├── DataLocator.scala
│ │ ├── DefaultSource.scala
│ │ ├── DefaultSource15.scala
│ │ ├── ExcelFileSaver.scala
│ │ ├── ExcelRelation.scala
│ │ ├── InferSchema.scala
│ │ ├── PlainNumberFormat.scala
│ │ ├── Utils.scala
│ │ ├── WorkbookReader.scala
│ │ └── package.scala
│ └── tech
│ └── mlsql
│ └── plugins
│ └── ds
│ └── MLSQLExcel.scala
├── mlsql-ext-ets
├── .repo
│ ├── desc.template.plugin
│ └── pom.template.xml
├── desc.plugin
├── pom.xml
└── src
│ └── main
│ └── java
│ └── tech
│ └── mlsql
│ └── plugins
│ └── ext
│ └── ets
│ └── app
│ └── MLSQLETApp.scala
├── mlsql-ke
├── .repo
│ ├── desc.template.plugin
│ └── pom.template.xml
├── desc.plugin
├── pom.xml
└── src
│ └── main
│ └── java
│ └── tech
│ └── mlsql
│ └── plugins
│ └── ke
│ ├── app
│ └── MLSQLKE.scala
│ └── ets
│ ├── KEAPISchedule.scala
│ ├── KEAutoModel.scala
│ └── KEBuildSegment.scala
├── mlsql-language-server
├── .repo
│ ├── desc.template.plugin
│ └── pom.template.xml
├── build.sh
├── desc.plugin
├── pom.xml
└── src
│ └── main
│ └── java
│ └── tech
│ └── mlsql
│ └── plugins
│ └── langserver
│ ├── AutoSuggestWrapper.scala
│ ├── FileTracker.java
│ ├── LSContext.java
│ ├── MLSQLDocumentService.java
│ ├── MLSQLLanguageServer.java
│ ├── MLSQLWorkspaceService.java
│ ├── commons
│ └── client
│ │ ├── Message.java
│ │ └── TraceRecord.java
│ └── launchers
│ └── stdio
│ ├── Launcher.java
│ └── MLSQLDesktopApp.scala
├── mlsql-mllib
├── .repo
│ ├── desc.template.plugin
│ └── pom.template.xml
├── README.md
├── desc.plugin
├── pom.xml
└── src
│ └── main
│ └── java
│ └── tech
│ └── mlsql
│ └── plugins
│ └── mllib
│ ├── app
│ └── MLSQLMllib.scala
│ └── ets
│ ├── AutoMLExt.scala
│ ├── ClassificationEvaluator.scala
│ ├── ColumnsExt.scala
│ ├── PluginBaseETAuth.scala
│ ├── RegressionEvaluator.scala
│ ├── SampleDatasetExt.scala
│ └── TakeRandomSampleExt.scala
├── mlsql-shell
├── .repo
│ ├── desc.template.plugin
│ └── pom.template.xml
├── README.md
├── desc.plugin
├── pom.xml
└── src
│ └── main
│ └── java
│ └── tech
│ └── mlsql
│ └── plugins
│ └── shell
│ ├── app
│ └── MLSQLShell.scala
│ └── ets
│ ├── CopyFromLocal.scala
│ └── ShellExecute.scala
├── pom.xml
├── run-script
├── .repo
│ └── pom.template.xml
├── README.md
├── desc.plugin
├── pom.xml
└── src
│ └── main
│ └── java
│ └── tech
│ └── mlsql
│ └── plugins
│ └── et
│ └── RunScript.scala
├── save-then-load
├── .repo
│ └── pom.template.xml
├── README.md
├── desc.plugin
├── pom.xml
└── src
│ └── main
│ └── java
│ └── tech
│ └── mlsql
│ └── plugins
│ └── et
│ └── SaveThenLoad.scala
├── stream-persist
├── .repo
│ ├── desc.template.plugin
│ └── pom.template.xml
├── README.md
├── db.sql
├── desc.plugin
├── pom.xml
└── src
│ └── main
│ └── java
│ └── tech
│ └── mlsq
│ └── streambootstrapatstartup
│ ├── StreamApp.scala
│ └── StreamPersistCommand.scala
└── table-repartition
├── .repo
├── desc.template.plugin
└── pom.template.xml
├── README.md
├── desc.plugin
├── pom.xml
└── src
└── main
└── java
└── tech
└── mlsql
└── plugins
└── et
└── TableRepartition.scala
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea
2 | *.iml
3 | target
4 | .DS_Store
5 | /**/build
6 |
7 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # mlsql-plugins
2 |
3 | This project is a collection of plugins for MLSQL.
4 | Please check every module in project for more detail.
5 |
6 | ## Build Shade Jar
7 |
8 | Requirements:
9 |
10 | 1. Python >= 3.6
11 | 2. Maven >= 3.0
12 |
13 | You can install [mlsql_plugin_tool](https://github.com/allwefantasy/mlsql_plugin_tool) to build module in this project.
14 |
15 | Install command:
16 |
17 | ```
18 | pip install mlsql_plugin_tool
19 | ```
20 |
21 | Build shard jar comamnd:
22 |
23 | ```
24 | mlsql_plugin_tool build --module_name xxxxx --spark spark243
25 | ```
26 |
27 | 1. spark: two options are avaiable, spark243, spark311
28 | 2. module_name e.g mlsql-excel, ds-hbase-2x
29 |
30 | Once build success, the system will show message like fowllowing:
31 |
32 | ```
33 |
34 | ====Build success!=====
35 | File location 0:
36 | /Users/allwefantasy/Volumes/Samsung_T5/allwefantasy/CSDNWorkSpace/mlsqlplugins/ds-hbase-2x/target/ds-hbase-2x-2.4_2.11-0.1.0-SNAPSHOT.jar
37 |
38 | ```
39 |
40 | Then you can install this plugin(jar file) in [MLSQL Engine](https://docs.mlsql.tech/mlsql-stack/plugin/offline_install.html)
41 |
42 | ## Plugins which Both Support Spark 2.4.3/3.1.1
43 |
44 | 1. binlog2delta
45 | 2. connect-persist
46 | 3. ds-hbase-2x
47 | 4. mlsql-bigdl
48 | 5. mlsql-excel
49 | 6. stream-persist
50 | 7. mlsql-mllib
--------------------------------------------------------------------------------
/binlog2delta/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allwefantasy/mlsql-plugins/17e1a380d823a443b2503d883a9f1e50aeb832cf/binlog2delta/.DS_Store
--------------------------------------------------------------------------------
/binlog2delta/.repo/desc.template.plugin:
--------------------------------------------------------------------------------
1 | mainClass=-
2 | version=0.1.0-SNAPSHOT
3 | author=allwefantasy
4 | mlsqlVersions="1.5.0,1.5.0-SNAPSHOT,1.6.0,1.6.0-SNAPSHOT"
5 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/binlog2delta
6 | scala_version=2.11
7 | mlsqlPluginType=script
8 | desc=wow
9 |
--------------------------------------------------------------------------------
/binlog2delta/.repo/pom.template.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | mlsql-plugins-{{spark_binary_version}}_{{scala_binary_version}}
7 | tech.mlsql
8 | 0.1.0-SNAPSHOT
9 |
10 | 4.0.0
11 |
12 | binlog2delta_${scala.binary.version}
13 |
14 |
15 |
--------------------------------------------------------------------------------
/binlog2delta/README.md:
--------------------------------------------------------------------------------
1 | ## Install command:
2 |
3 | ```
4 | !plugin script add - binlog2delta;
5 | ```
6 |
7 | ## Usage
8 |
9 | ```sql
10 | set checkpointLocation="/tmp/cpl-binlog-m";
11 | include plugin.`binlog2delta`;
12 | ```
13 |
14 | Here are parameter you can set before include the plugin:
15 |
16 | ```sql
17 | set streamName="binlog";
18 |
19 | set host="127.0.0.1";
20 | set port="3306";
21 | set userName="root";
22 | set password="mlsql";
23 | set bingLogNamePrefix="mysql-bin";
24 | set binlogIndex="1";
25 | set binlogFileOffset="4";
26 | set databaseNamePattern="mlsql_console";
27 | set tableNamePattern="script_file";
28 |
29 | set deltaTableHome="/tmp/binlog2delta";
30 | set idCols="id";
31 | set duration="10";
32 | set checkpointLocation="/tmp/ck-binlog2delta";
33 |
34 | ```
35 |
36 | ## Check the content in plugin
37 |
38 | ```sql
39 | !plugin script show binlog2delta/plugin.json;
40 | ```
41 |
42 | or
43 |
44 | ```sql
45 | !plugin script show binlog2delta/main.json;
46 | ```
47 |
48 |
--------------------------------------------------------------------------------
/binlog2delta/desc.plugin:
--------------------------------------------------------------------------------
1 | mainClass=-
2 | version=0.1.0-SNAPSHOT
3 | author=allwefantasy
4 | mlsqlVersions="1.5.0,1.5.0-SNAPSHOT,1.6.0,1.6.0-SNAPSHOT"
5 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/binlog2delta
6 | scala_version=2.11
7 | mlsqlPluginType=script
8 | desc=wow
--------------------------------------------------------------------------------
/binlog2delta/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | mlsql-plugins-3.0_2.12
7 | tech.mlsql
8 | 0.1.0-SNAPSHOT
9 |
10 | 4.0.0
11 |
12 | binlog2delta_${scala.binary.version}
13 |
14 |
15 |
--------------------------------------------------------------------------------
/binlog2delta/src/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allwefantasy/mlsql-plugins/17e1a380d823a443b2503d883a9f1e50aeb832cf/binlog2delta/src/.DS_Store
--------------------------------------------------------------------------------
/binlog2delta/src/main/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allwefantasy/mlsql-plugins/17e1a380d823a443b2503d883a9f1e50aeb832cf/binlog2delta/src/main/.DS_Store
--------------------------------------------------------------------------------
/binlog2delta/src/main/java/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allwefantasy/mlsql-plugins/17e1a380d823a443b2503d883a9f1e50aeb832cf/binlog2delta/src/main/java/.DS_Store
--------------------------------------------------------------------------------
/binlog2delta/src/main/java/tech/mlsql/plugins/binlog2delta/JavaDoc.java:
--------------------------------------------------------------------------------
1 | package tech.mlsql.plugins.binlog2delta;
2 |
3 | /**
4 | * 2019-09-16 WilliamZhu(allwefantasy@gmail.com)
5 | */
6 | public class JavaDoc {
7 | }
8 |
--------------------------------------------------------------------------------
/binlog2delta/src/main/resources/main.mlsql:
--------------------------------------------------------------------------------
1 |
2 | set streamName="binlog" where type="defaultParam";
3 |
4 | set host="127.0.0.1" where type="defaultParam";
5 | set port="3306" where type="defaultParam";
6 | set userName="root" where type="defaultParam";
7 | set password="mlsql" where type="defaultParam";
8 | set bingLogNamePrefix="mysql-bin" where type="defaultParam";
9 | set binlogIndex="1" where type="defaultParam";
10 | set binlogFileOffset="4" where type="defaultParam";
11 | set databaseNamePattern="mlsql_console" where type="defaultParam";
12 | set tableNamePattern="script_file" where type="defaultParam";
13 |
14 | set deltaTableHome="/tmp/binlog2delta" where type="defaultParam";
15 | set idCols="id" where type="defaultParam";
16 | set duration="10" where type="defaultParam";
17 | set checkpointLocation="/tmp/ck-binlog2delta" where type="defaultParam";
18 |
19 | set dbPrefix="mysql" where type="defaultParam";
20 |
21 |
22 | load binlog.`` where
23 | host="${host}"
24 | and port="${port}"
25 | and userName="${userName}"
26 | and password="${password}"
27 | and bingLogNamePrefix="${bingLogNamePrefix}"
28 | and binlogIndex="${binlogIndex}"
29 | and binlogFileOffset="${binlogFileOffset}"
30 | and databaseNamePattern="${databaseNamePattern}"
31 | and tableNamePattern="${tableNamePattern}"
32 | as binlogTable;
33 |
34 | save append binlogTable
35 | as rate.`${dbPrefix}_{db}.{table}`
36 | options mode="Append"
37 | and idCols="${idCols}"
38 | and syncType="binlog"
39 | and duration="${duration}"
40 | and checkpointLocation="${checkpointLocation}";
--------------------------------------------------------------------------------
/connect-persist/.repo/desc.template.plugin:
--------------------------------------------------------------------------------
1 | moduleName=connect-persist-app-{{spark_binary_version}}
2 | mainClass=tech.mlsql.plugins.app.ConnectPersistApp
3 | scala_version={{scala_binary_version}}
4 | spark_version={{spark_binary_version}}
5 | version=0.1.0-SNAPSHOT
6 | author=allwefantasy
7 | mlsqlVersions="1.5.0,1.5.0-SNAPSHOT,1.6.0,1.6.0-SNAPSHOT"
8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/stream-persist
9 | mlsqlPluginType=app
10 | desc=wow
11 |
--------------------------------------------------------------------------------
/connect-persist/.repo/pom.template.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | mlsql-plugins-{{spark_binary_version}}_{{scala_binary_version}}
7 | tech.mlsql
8 | 0.1.0-SNAPSHOT
9 |
10 | 4.0.0
11 |
12 | connect-persist-${spark.binary.version}_${scala.binary.version}
13 |
14 |
--------------------------------------------------------------------------------
/connect-persist/README.md:
--------------------------------------------------------------------------------
1 | ## Install command:
2 |
3 | ```
4 | !plugin app add - 'connect-persist-app-2.4';
5 | ```
6 |
7 | > Notice:
8 | > If you set MLSQL meta store as MySQL, you should import db.sql file into
9 | > your meta database.
10 |
11 | ## Usage
12 |
13 | Use ET Plugin to persist stream job.
14 |
15 | ```sql
16 | !connectPersist;
17 | ```
18 |
19 | And then once MLSQL Engine is restarted, and the connect info will be
20 | restored at the startup of MLSQL.
21 |
22 |
23 |
24 |
--------------------------------------------------------------------------------
/connect-persist/db.sql:
--------------------------------------------------------------------------------
1 | CREATE TABLE `w_connect_table` (
2 | `id` int(11) unsigned NOT NULL AUTO_INCREMENT,
3 | `format` varchar(256) DEFAULT NULL,
4 | `db` varchar(256) DEFAULT NULL,
5 | `options` text,
6 | PRIMARY KEY (`id`)
7 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
--------------------------------------------------------------------------------
/connect-persist/desc.plugin:
--------------------------------------------------------------------------------
1 | moduleName=connect-persist-app-3.0
2 | mainClass=tech.mlsql.plugins.app.ConnectPersistApp
3 | scala_version=2.12
4 | spark_version=3.0
5 | version=0.1.0-SNAPSHOT
6 | author=allwefantasy
7 | mlsqlVersions="1.5.0,1.5.0-SNAPSHOT,1.6.0,1.6.0-SNAPSHOT"
8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/stream-persist
9 | mlsqlPluginType=app
10 | desc=wow
--------------------------------------------------------------------------------
/connect-persist/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | mlsql-plugins-3.0_2.12
7 | tech.mlsql
8 | 0.1.0-SNAPSHOT
9 |
10 | 4.0.0
11 |
12 | connect-persist-${spark.binary.version}_${scala.binary.version}
13 |
14 |
--------------------------------------------------------------------------------
/connect-persist/src/main/java/tech/mlsql/plugins/et/ConnectPersistCommand.scala:
--------------------------------------------------------------------------------
1 | package tech.mlsql.plugins.et
2 |
3 | import java.util.concurrent.ConcurrentHashMap
4 |
5 | import org.apache.spark.sql.expressions.UserDefinedFunction
6 | import org.apache.spark.sql.{DataFrame, SparkSession}
7 | import streaming.dsl.{ConnectMeta, DBMappingKey}
8 | import streaming.dsl.auth.TableAuthResult
9 | import streaming.dsl.mmlib._
10 | import streaming.dsl.mmlib.algs.Functions
11 | import streaming.dsl.mmlib.algs.param.{BaseParams, WowParams}
12 | import tech.mlsql.common.utils.classloader.ClassLoaderTool
13 | import tech.mlsql.common.utils.lang.sc.ScalaReflect
14 | import tech.mlsql.dsl.auth.ETAuth
15 | import tech.mlsql.dsl.auth.dsl.mmlib.ETMethod.ETMethod
16 | import tech.mlsql.store.DBStore
17 | import tech.mlsql.version.VersionCompatibility
18 |
19 | import scala.collection.JavaConverters._
20 |
21 | /**
22 | * 15/1/2020 WilliamZhu(allwefantasy@gmail.com)
23 | */
24 | class ConnectPersistCommand(override val uid: String) extends SQLAlg with VersionCompatibility with Functions with WowParams with ETAuth {
25 | def this() = this(BaseParams.randomUID())
26 |
27 |
28 | override def train(df: DataFrame, path: String, params: Map[String, String]): DataFrame = {
29 | val session = df.sparkSession
30 | val dbMapping = ConnectMeta.toMap
31 | val items = dbMapping.toList.map(f => ConnectMetaItem(f._1.format, f._1.db, f._2))
32 | import session.implicits._
33 | val newdf = session.createDataset[ConnectMetaItem](items).toDF()
34 | DBStore.store.saveTable(session, newdf, ConnectPersistMeta.connectTableName, Option("format,db"), false)
35 | newdf
36 | }
37 |
38 | override def auth(etMethod: ETMethod, path: String, params: Map[String, String]): List[TableAuthResult] = {
39 | List()
40 | }
41 |
42 | override def supportedVersions: Seq[String] = {
43 | Seq("1.5.0-SNAPSHOT", "1.5.0", "1.6.0-SNAPSHOT", "1.6.0")
44 | }
45 |
46 |
47 | override def doc: Doc = Doc(MarkDownDoc,
48 | s"""
49 | |
50 | |```
51 | |${codeExample.code}
52 | |```
53 | """.stripMargin)
54 |
55 |
56 | override def codeExample: Code = Code(SQLCode,
57 | """
58 | |example
59 | """.stripMargin)
60 |
61 | override def batchPredict(df: DataFrame, path: String, params: Map[String, String]): DataFrame = train(df, path, params)
62 |
63 | override def load(sparkSession: SparkSession, path: String, params: Map[String, String]): Any = ???
64 |
65 | override def predict(sparkSession: SparkSession, _model: Any, name: String, params: Map[String, String]): UserDefinedFunction = ???
66 |
67 | }
68 |
69 | object ConnectPersistMeta {
70 | def connectTableName = "__mlsql__.connect_table"
71 | }
72 |
73 | case class ConnectMetaItem(format: String, db: String, options: Map[String, String])
74 |
--------------------------------------------------------------------------------
/connect-persist/src/main/java/tech/mlsql/plugins/mllib/ConnectPersistApp.scala:
--------------------------------------------------------------------------------
1 | package tech.mlsql.plugins.mllib
2 |
3 | import _root_.streaming.core.strategy.platform.{PlatformManager, SparkRuntime}
4 | import _root_.streaming.dsl._
5 | import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}
6 | import tech.mlsql.common.utils.log.Logging
7 | import tech.mlsql.datalake.DataLake
8 | import tech.mlsql.dsl.CommandCollection
9 | import tech.mlsql.ets.register.ETRegister
10 | import tech.mlsql.plugins.et.{ConnectMetaItem, ConnectPersistCommand, ConnectPersistMeta}
11 | import tech.mlsql.store.DBStore
12 | import tech.mlsql.version.VersionCompatibility
13 |
14 | /**
15 | * 15/1/2020 WilliamZhu(allwefantasy@gmail.com)
16 | */
17 | class ConnectPersistApp extends tech.mlsql.app.App with VersionCompatibility with Logging {
18 | override def run(args: Seq[String]): Unit = {
19 | val root = runtime.sparkSession
20 | import root.implicits._
21 |
22 | ETRegister.register("ConnectPersistCommand", classOf[ConnectPersistCommand].getName)
23 | CommandCollection.refreshCommandMapping(Map("connectPersist" -> "ConnectPersistCommand"))
24 |
25 | val streams = DBStore.store.tryReadTable(root, ConnectPersistMeta.connectTableName, () => root.createDataset[ConnectMetaItem](Seq()).toDF())
26 | streams.as[ConnectMetaItem].collect().foreach { item =>
27 | logInfo(s"load connect statement format: ${item.format} db:${item.db}")
28 | ConnectMeta.options(DBMappingKey(item.format, item.db), item.options)
29 | }
30 | }
31 |
32 | def runtime = {
33 | PlatformManager.getRuntime.asInstanceOf[SparkRuntime]
34 | }
35 |
36 | override def supportedVersions: Seq[String] = Seq("1.5.0-SNAPSHOT", "1.5.0", "1.6.0-SNAPSHOT", "1.6.0")
37 | }
38 |
39 |
--------------------------------------------------------------------------------
/delta-enhancer/.repo/pom.template.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | mlsql-plugins-{{spark_binary_version}}_{{scala_binary_version}}
7 | tech.mlsql
8 | 0.1.0-SNAPSHOT
9 |
10 | 4.0.0
11 |
12 | delta-enhancer-${spark.binary.version}_${scala.binary.version}
13 |
14 |
15 | tech.mlsql
16 | delta-plus_${scala.binary.version}
17 | ${delta-plus.version}
18 | ${scope}
19 |
20 |
21 |
22 |
--------------------------------------------------------------------------------
/delta-enhancer/README.md:
--------------------------------------------------------------------------------
1 | ## Install command:
2 |
3 | ```
4 | !plugin et add tech.mlsql.plugin.et.DeltaCommand delta-enhancer
5 | named deltaEnhancer;
6 | ```
7 |
8 | ## Usage
9 |
10 | ```sql
11 | !deltaEnhancer pruneDeletes __mlsql__.plugins 10000;
12 | ```
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/delta-enhancer/desc.plugin:
--------------------------------------------------------------------------------
1 | tech.mlsql.plugin.et.DeltaCommand
2 |
--------------------------------------------------------------------------------
/delta-enhancer/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | mlsql-plugins-3.0_2.12
7 | tech.mlsql
8 | 0.1.0-SNAPSHOT
9 |
10 | 4.0.0
11 |
12 | delta-enhancer-${spark.binary.version}_${scala.binary.version}
13 |
14 |
15 | tech.mlsql
16 | delta-plus_${scala.binary.version}
17 | ${delta-plus.version}
18 | ${scope}
19 |
20 |
21 |
22 |
--------------------------------------------------------------------------------
/delta-enhancer/src/main/java/tech/mlsql/plugin/et/DeltaCommand.scala:
--------------------------------------------------------------------------------
1 | package tech.mlsql.plugin.et
2 |
3 | import io.delta.tables.DeltaTable
4 | import org.apache.spark.sql.expressions.UserDefinedFunction
5 | import org.apache.spark.sql.{DataFrame, SparkSession}
6 | import streaming.dsl.mmlib.SQLAlg
7 | import streaming.dsl.mmlib.algs.Functions
8 | import streaming.dsl.mmlib.algs.param.{BaseParams, WowParams}
9 | import tech.mlsql.common.utils.path.PathFun
10 | import tech.mlsql.common.utils.serder.json.JSONTool
11 | import tech.mlsql.datalake.DataLake
12 | import tech.mlsql.version.VersionCompatibility
13 |
14 | /**
15 | * 2019-09-11 WilliamZhu(allwefantasy@gmail.com)
16 | */
17 | class DeltaCommand(override val uid: String) extends SQLAlg with VersionCompatibility with Functions with WowParams {
18 | def this() = this(BaseParams.randomUID())
19 |
20 |
21 | override def train(df: DataFrame, path: String, params: Map[String, String]): DataFrame = {
22 | val spark = df.sparkSession
23 |
24 | def resolveRealPath(dataPath: String) = {
25 | val dataLake = new DataLake(spark)
26 | if (dataLake.isEnable) {
27 | dataLake.identifyToPath(dataPath)
28 | } else {
29 | PathFun(path).add(dataPath).toPath
30 | }
31 | }
32 |
33 |
34 | val command = JSONTool.parseJson[List[String]](params("parameters"))
35 | command match {
36 | case Seq("pruneDeletes", dataPath, howManyHoures, _*) =>
37 | val deltaLog = DeltaTable.forPath(spark, resolveRealPath(dataPath))
38 | deltaLog.vacuum(howManyHoures.toInt)
39 | }
40 |
41 | }
42 |
43 |
44 | override def supportedVersions: Seq[String] = {
45 | Seq("1.5.0-SNAPSHOT", "1.5.0")
46 | }
47 |
48 | override def batchPredict(df: DataFrame, path: String, params: Map[String, String]): DataFrame = train(df, path, params)
49 |
50 | override def load(sparkSession: SparkSession, path: String, params: Map[String, String]): Any = ???
51 |
52 | override def predict(sparkSession: SparkSession, _model: Any, name: String, params: Map[String, String]): UserDefinedFunction = ???
53 |
54 |
55 | }
56 |
--------------------------------------------------------------------------------
/desktop-publish.sh:
--------------------------------------------------------------------------------
1 | SOURCE=/Users/allwefantasy/Volumes/Samsung_T5/allwefantasy/CSDNWorkSpace/mlsqlplugins
2 | TARGET=/Users/allwefantasy/projects/mlsql-desktop
3 | #conda activate mlsql-plugin-tool
4 |
5 | mods=${1:-mlsql-language-server mlsql-excel mlsql-assert mlsql-shell}
6 |
7 | for mod in ${mods}
8 | do
9 | echo "build= $mod"
10 | mlsql_plugin_tool build --module_name ${mod} --spark spark311
11 |
12 | for os in linux mac win
13 | do
14 | cp ${SOURCE}/$mod/build/${mod}-3.0_2.12-0.1.0-SNAPSHOT.jar ${TARGET}/${os}/plugin
15 | done
16 | done
17 |
18 | #mlsql-language-server mlsql-excel mlsql-assert mlsql-shell
19 |
--------------------------------------------------------------------------------
/dev/change-scala-version.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | #
4 | # Licensed to the Apache Software Foundation (ASF) under one or more
5 | # contributor license agreements. See the NOTICE file distributed with
6 | # this work for additional information regarding copyright ownership.
7 | # The ASF licenses this file to You under the Apache License, Version 2.0
8 | # (the "License"); you may not use this file except in compliance with
9 | # the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | #
19 |
20 | set -e
21 |
22 | VALID_VERSIONS=( 2.11 2.12 )
23 |
24 | usage() {
25 | echo "Usage: $(basename $0) [-h|--help]
26 | where :
27 | -h| --help Display this help text
28 | valid version values : ${VALID_VERSIONS[*]}
29 | " 1>&2
30 | exit 1
31 | }
32 |
33 | if [[ ($# -ne 1) || ( $1 == "--help") || $1 == "-h" ]]; then
34 | usage
35 | fi
36 |
37 | TO_VERSION=$1
38 |
39 | check_scala_version() {
40 | for i in ${VALID_VERSIONS[*]}; do [ $i = "$1" ] && return 0; done
41 | echo "Invalid Scala version: $1. Valid versions: ${VALID_VERSIONS[*]}" 1>&2
42 | exit 1
43 | }
44 |
45 | check_scala_version "$TO_VERSION"
46 |
47 | if [ $TO_VERSION = "2.12" ]; then
48 | FROM_VERSION="2.11"
49 | else
50 | FROM_VERSION="2.12"
51 | fi
52 |
53 | sed_i() {
54 | sed -e "$1" "$2" > "$2.tmp" && mv "$2.tmp" "$2"
55 | }
56 |
57 | export -f sed_i
58 |
59 | BASEDIR=$(dirname $0)/..
60 | find "$BASEDIR" -name 'pom.xml' -not -path '*target*' -print \
61 | -exec bash -c "sed_i 's/\(artifactId.*\)_'$FROM_VERSION'/\1_'$TO_VERSION'/g' {}" \;
62 |
63 | # Also update in parent POM
64 | # Match any scala binary version to ensure idempotency
65 | sed_i '1,/[0-9]*\.[0-9]*[0-9]*\.[0-9]*'$TO_VERSION'' \
66 | "$BASEDIR/pom.xml"
67 |
68 | # Update source of scaladocs
69 | # echo "$BASEDIR/docs/_plugins/copy_api_dirs.rb"
70 | # sed_i 's/scala\-'$FROM_VERSION'/scala\-'$TO_VERSION'/' "$BASEDIR/docs/_plugins/copy_api_dirs.rb"
71 |
--------------------------------------------------------------------------------
/dev/change-version-to-2.11.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | #
4 | # Licensed to the Apache Software Foundation (ASF) under one or more
5 | # contributor license agreements. See the NOTICE file distributed with
6 | # this work for additional information regarding copyright ownership.
7 | # The ASF licenses this file to You under the Apache License, Version 2.0
8 | # (the "License"); you may not use this file except in compliance with
9 | # the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | #
19 |
20 | # This script exists for backwards compability. Use change-scala-version.sh instead.
21 | echo "This script is deprecated. Please instead run: change-scala-version.sh 2.11"
22 |
23 | $(dirname $0)/change-scala-version.sh 2.11
24 |
--------------------------------------------------------------------------------
/dev/change-version-to-2.12.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | #
4 | # Licensed to the Apache Software Foundation (ASF) under one or more
5 | # contributor license agreements. See the NOTICE file distributed with
6 | # this work for additional information regarding copyright ownership.
7 | # The ASF licenses this file to You under the Apache License, Version 2.0
8 | # (the "License"); you may not use this file except in compliance with
9 | # the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | #
19 |
20 | # This script exists for backwards compability. Use change-scala-version.sh instead.
21 | echo "This script is deprecated. Please instead run: change-scala-version.sh 2.10"
22 |
23 | $(dirname $0)/change-scala-version.sh 2.12
24 |
--------------------------------------------------------------------------------
/ds-hbase-2x/.repo/desc.template.plugin:
--------------------------------------------------------------------------------
1 | moduleName=ds-hbase-2x-{{spark_binary_version}}
2 | mainClass=tech.mlsql.plugins.ds.MLSQLHBase2x
3 | scala_version={{scala_binary_version}}
4 | spark_version={{spark_binary_version}}
5 | version=0.1.0-SNAPSHOT
6 | author=allwefantasy
7 | mlsqlVersions="1.5.0,1.5.0-SNAPSHOT,1.6.0,1.6.0-SNAPSHOT"
8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/ds-hbase-2x
9 | mlsqlPluginType=ds
10 | desc=wow
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
--------------------------------------------------------------------------------
/ds-hbase-2x/README.md:
--------------------------------------------------------------------------------
1 | ## Install
2 |
3 | ```
4 | !plugin ds add - ds-hbase-2x-2.4;
5 | ```
6 |
7 | ## Usage
8 |
9 | DataFrame:
10 |
11 | ```scala
12 | val data = (0 to 255).map { i =>
13 | HBaseRecord(i, "extra")
14 | }
15 | val tableName = "t1"
16 | val familyName = "c1"
17 |
18 |
19 | import spark.implicits._
20 | sc.parallelize(data).toDF.write
21 | .options(Map(
22 | "outputTableName" -> cat,
23 | "family" -> family
24 | ) ++ options)
25 | .format("org.apache.spark.sql.execution.datasources.hbase2x")
26 | .save()
27 |
28 | val df = spark.read.format("org.apache.spark.sql.execution.datasources.hbase2x").options(
29 | Map(
30 | "inputTableName" -> tableName,
31 | "family" -> familyName,
32 | "field.type.col1" -> "BooleanType",
33 | "field.type.col2" -> "DoubleType",
34 | "field.type.col3" -> "FloatType",
35 | "field.type.col4" -> "IntegerType",
36 | "field.type.col5" -> "LongType",
37 | "field.type.col6" -> "ShortType",
38 | "field.type.col7" -> "StringType",
39 | "field.type.col8" -> "ByteType"
40 | )
41 | ).load()
42 | ```
43 |
44 | MLSQL:
45 |
46 | ```sql
47 | set rawText='''
48 | {"id":9,"content":"Spark好的语言1","label":0.0}
49 | {"id":10,"content":"MLSQL是一个好的语言7","label":0.0}
50 | {"id":12,"content":"MLSQL是一个好的语言7","label":0.0}
51 | ''';
52 |
53 | load jsonStr.`rawText` as orginal_text_corpus;
54 |
55 | select cast(id as String) as rowkey,content,label from orginal_text_corpus as orginal_text_corpus1;
56 |
57 | connect hbase2x where `zk`="127.0.0.1:2181"
58 | and `family`="cf" as hbase1;
59 |
60 | save overwrite orginal_text_corpus1
61 | as hbase2x.`hbase1:mlsql_example`;
62 |
63 | load hbase2x.`hbase1:mlsql_example` where field.type.label="DoubleType"
64 | as mlsql_example ;
65 |
66 | select * from mlsql_example as show_data;
67 | ```
68 |
69 | You should configure parameters like `zookeeper.znode.parent`,`hbase.rootdir` according by
70 | your HBase configuration.
71 |
72 | Parameters:
73 |
74 | | Property Name | Meaning |
75 | |---|---|
76 | | tsSuffix |to overwrite hbase value's timestamp|
77 | |namespace|hbase namespace|
78 | | family |hbase family,family="" means load all existing families|
79 | | field.type.ck | specify type for ck(field name),now supports:LongType、FloatType、DoubleType、IntegerType、BooleanType、BinaryType、TimestampType、DateType,default: StringType。|
80 |
81 |
82 |
83 |
84 |
--------------------------------------------------------------------------------
/ds-hbase-2x/desc.plugin:
--------------------------------------------------------------------------------
1 | moduleName=ds-hbase-2x-3.0
2 | mainClass=tech.mlsql.plugins.ds.MLSQLHBase2x
3 | scala_version=2.12
4 | spark_version=3.0
5 | version=0.1.0-SNAPSHOT
6 | author=allwefantasy
7 | mlsqlVersions="1.5.0,1.5.0-SNAPSHOT,1.6.0,1.6.0-SNAPSHOT"
8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/ds-hbase-2x
9 | mlsqlPluginType=ds
10 | desc=wow
11 |
12 |
13 |
14 |
15 |
16 |
17 |
--------------------------------------------------------------------------------
/ds-hbase-2x/src/main/java/org/apache/spark/sql/execution/datasources/hbase2x/HBaseConfBuilder.scala:
--------------------------------------------------------------------------------
1 | package org.apache.spark.sql.execution.datasources.hbase2x
2 |
3 | import org.apache.hadoop.hbase.HBaseConfiguration
4 | import org.apache.spark.sql.SparkSession
5 | import org.json4s.DefaultFormats
6 | import scala.collection.JavaConversions._
7 |
8 | /**
9 | * 2019-07-08 WilliamZhu(allwefantasy@gmail.com)
10 | */
11 | object HBaseConfBuilder {
12 | def build(spark: SparkSession, parameters: Map[String, String]) = {
13 | val testConf = spark.sqlContext.sparkContext.conf.getBoolean(SparkHBaseConf.testConf, false)
14 | if (testConf) SparkHBaseConf.conf
15 | else {
16 | implicit val formats = DefaultFormats
17 |
18 | // task is already broadcast; since hConf is per HBaseRelation (currently), broadcast'ing
19 | // it again does not help - it actually hurts. When we add support for
20 | // caching hConf across HBaseRelation, we can revisit broadcast'ing it (with a caching
21 | // mechanism in place)
22 | val hc = HBaseConfiguration.create()
23 |
24 | if (parameters.containsKey("zk") || parameters.containsKey("hbase.zookeeper.quorum")) {
25 | hc.set("hbase.zookeeper.quorum", parameters.getOrElse("zk", parameters.getOrElse("hbase.zookeeper.quorum", "127.0.0.1:2181")))
26 | }
27 |
28 | if (parameters.containsKey("znode")) {
29 | hc.set("zookeeper.znode.parent", parameters.get("znode").get)
30 | }
31 |
32 | if (parameters.containsKey("rootdir")) {
33 | hc.set("hbase.rootdir", parameters.get("rootdir").get)
34 | }
35 |
36 | /**
37 | * when people confgiure the wrong zk address, by default the HBase client will
38 | * try infinitely. We should control this group parameters to limit the try times.
39 | */
40 | hc.set("hbase.client.pause", parameters.getOrElse("hbase.client.pause", "1000"))
41 | hc.set("zookeeper.recovery.retry", parameters.getOrElse("zookeeper.recovery.retry", "60"))
42 | hc.set("hbase.client.retries.number", parameters.getOrElse("hbase.client.retries.number", "60"))
43 |
44 |
45 | parameters.filter { f =>
46 | f._1.startsWith("hbase.") || f._1.startsWith("zookeeper.") || f._1.startsWith("phoenix.")
47 | }.foreach { f =>
48 | hc.set(f._1, f._2)
49 | }
50 |
51 | hc
52 | }
53 |
54 | }
55 | }
56 |
--------------------------------------------------------------------------------
/ds-hbase-2x/src/main/java/org/apache/spark/sql/execution/datasources/hbase2x/HBaseType.scala:
--------------------------------------------------------------------------------
1 | package org.apache.spark.sql.execution.datasources.hbase2x
2 |
3 | /**
4 | * 2019-07-08 WilliamZhu(allwefantasy@gmail.com)
5 | */
6 | package object hbase2x {
7 | type HBaseType = Array[Byte]
8 | }
9 |
--------------------------------------------------------------------------------
/ds-hbase-2x/src/main/java/org/apache/spark/sql/execution/datasources/hbase2x/JavaDoc.java:
--------------------------------------------------------------------------------
1 | package org.apache.spark.sql.execution.datasources.hbase2x;
2 |
3 | /**
4 | * 2019-07-06 WilliamZhu(allwefantasy@gmail.com)
5 | */
6 | public class JavaDoc {
7 | }
8 |
--------------------------------------------------------------------------------
/ds-hbase-2x/src/main/java/org/apache/spark/sql/execution/datasources/hbase2x/SparkHBaseConf.scala:
--------------------------------------------------------------------------------
1 | package org.apache.spark.sql.execution.datasources.hbase2x
2 |
3 | import org.apache.hadoop.conf.Configuration
4 |
5 | /**
6 | * 2019-07-08 WilliamZhu(allwefantasy@gmail.com)
7 | */
8 | object SparkHBaseConf {
9 | val testConf = "spark.hbase.connector.test"
10 | val credentialsManagerEnabled = "spark.hbase.connector.security.credentials.enabled"
11 | val expireTimeFraction = "spark.hbase.connector.security.credentials.expireTimeFraction"
12 | val refreshTimeFraction = "spark.hbase.connector.security.credentials.refreshTimeFraction"
13 | val refreshDurationMins = "spark.hbase.connector.security.credentials.refreshDurationMins"
14 | val principal = "spark.hbase.connector.security.credentials"
15 | val keytab = "spark.hbase.connector.security.keytab"
16 |
17 | var conf: Configuration = _
18 | var BulkGetSize = "spark.hbase.connector.bulkGetSize"
19 | var defaultBulkGetSize = 100
20 | var CachingSize = "spark.hbase.connector.cacheSize"
21 | var defaultCachingSize = 100
22 | // in milliseconds
23 | val connectionCloseDelay = 10 * 60 * 1000
24 | }
25 |
--------------------------------------------------------------------------------
/echo-controller/.repo/pom.template.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | mlsql-plugins-{{spark_binary_version}}_{{scala_binary_version}}
7 | tech.mlsql
8 | 0.1.0-SNAPSHOT
9 |
10 | 4.0.0
11 |
12 | echo-controller-${spark.binary.version}_${scala.binary.version}
13 |
14 |
15 |
--------------------------------------------------------------------------------
/echo-controller/README.md:
--------------------------------------------------------------------------------
1 | ## Install
2 |
3 | ```sql
4 | !plugin app add - echo-controller-2.4;
5 | ```
6 |
7 | ## Usage
8 |
9 | ```
10 | select crawler_http("http://127.0.0.1:9003/run/script","POST",map("owner","wow","sql","select 1 as a as output;","executeMode","echo")) as c as output;
11 | ```
12 |
13 | The server will response with `select 1 as a as output;` back instead of execute the sql.
--------------------------------------------------------------------------------
/echo-controller/desc.plugin:
--------------------------------------------------------------------------------
1 | tech.mlsql.plugins.app.echocontroller.StreamApp
2 |
--------------------------------------------------------------------------------
/echo-controller/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | mlsql-plugins-3.0_2.12
7 | tech.mlsql
8 | 0.1.0-SNAPSHOT
9 |
10 | 4.0.0
11 |
12 | echo-controller-${spark.binary.version}_${scala.binary.version}
13 |
14 |
15 |
--------------------------------------------------------------------------------
/echo-controller/src/main/java/tech/mlsql/plugins/mllib/echocontroller/StreamApp.scala:
--------------------------------------------------------------------------------
1 | package tech.mlsql.plugins.mllib.echocontroller
2 |
3 | import tech.mlsql.app.CustomController
4 | import tech.mlsql.common.utils.serder.json.JSONTool
5 | import tech.mlsql.runtime.AppRuntimeStore
6 | import tech.mlsql.version.VersionCompatibility
7 |
8 | /**
9 | * 7/11/2019 WilliamZhu(allwefantasy@gmail.com)
10 | */
11 | class StreamApp extends tech.mlsql.app.App with VersionCompatibility {
12 | override def run(args: Seq[String]): Unit = {
13 | AppRuntimeStore.store.registerController("echo", classOf[EchoController].getName)
14 | }
15 |
16 | override def supportedVersions: Seq[String] = Seq("1.5.0-SNAPSHOT", "1.5.0", "1.6.0-SNAPSHOT", "1.6.0")
17 | }
18 |
19 | class EchoController extends CustomController {
20 | override def run(params: Map[String, String]): String = {
21 | JSONTool.toJsonStr(List(params("sql")))
22 | }
23 | }
24 |
--------------------------------------------------------------------------------
/install-all.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | ALL_MODUELS="mlsql-shell mlsql-assert mlsql-mllib mlsql-excel connect-persist last-command run-script save-then-load stream-persist table-repartition"
4 |
5 | MODUELS=${1}
6 |
7 | if [[ "${MODUELS}" == "" ]];then
8 | MODUELS = ALL_MODUELS
9 | fi
10 |
11 | for spark_version in spark243 spark311
12 | do
13 | for module in ${MODUELS}
14 | do
15 | ./install.sh ${module} ${spark_version}
16 | done
17 | done
18 |
19 |
20 | # ./install.sh ds-hbase-2x
21 | # ./install.sh mlsql-bigdl
--------------------------------------------------------------------------------
/install.sh:
--------------------------------------------------------------------------------
1 | PROJECT=/Users/allwefantasy/Volumes/Samsung_T5/allwefantasy/CSDNWorkSpace/mlsqlplugins
2 |
3 | MOUDLE_NAME=$1
4 | VERSION="0.1.0-SNAPSHOT"
5 | V=${2:-3.0}
6 | MIDDLE="2.4_2.11"
7 |
8 | SPARK="spark311"
9 |
10 | if [[ "${V}" == "2.4" ]]
11 | then
12 | SPARK=spark243
13 | fi
14 |
15 | if [[ "${SPARK}" == "spark311" ]]
16 | then
17 | MIDDLE="3.0_2.12"
18 | fi
19 |
20 | echo ${MOUDLE_NAME}
21 | echo ${SPARK}
22 | echo ${MIDDLE}
23 |
24 | mlsql_plugin_tool build --module_name ${MOUDLE_NAME} --spark ${SPARK}
25 | mlsql_plugin_tool upload \
26 | --module_name ${MOUDLE_NAME} \
27 | --user ${STORE_USER} \
28 | --password ${STORE_PASSWORD} \
29 | --jar_path ${PROJECT}/${MOUDLE_NAME}/build/${MOUDLE_NAME}-${MIDDLE}-${VERSION}.jar
30 |
--------------------------------------------------------------------------------
/last-command/.repo/pom.template.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | mlsql-plugins-{{spark_binary_version}}_{{scala_binary_version}}
7 | tech.mlsql
8 | 0.1.0-SNAPSHOT
9 |
10 | 4.0.0
11 |
12 | last-command-${spark.binary.version}_${scala.binary.version}
13 |
14 |
15 |
--------------------------------------------------------------------------------
/last-command/README.md:
--------------------------------------------------------------------------------
1 | ## Install
2 |
3 | ```
4 | !plugin et add - last-command-2.4 named lastCommand;
5 | ```
6 |
7 | ## Help
8 |
9 |
10 | ```sql
11 | !show et LastCommand;
12 | ```
13 |
14 | ## Usage
15 |
16 | ```sql
17 | !hdfs -ls /tmp/;
18 | !lastCommand named hdfsTmpTable;
19 | select * from hdfsTmpTable as output;
20 | ```
21 |
22 |
23 |
24 |
25 |
26 |
--------------------------------------------------------------------------------
/last-command/desc.plugin:
--------------------------------------------------------------------------------
1 | moduleName=last-command-2.4
2 | mainClass=tech.mlsql.plugins.et.LastCommand
3 | version=0.1.0-SNAPSHOT
4 | author=allwefantasy
5 | mlsqlVersions="1.5.0,1.5.0-SNAPSHOT,1.6.0,1.6.0-SNAPSHOT"
6 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/last-command
7 | scala_version=2.11
8 | spark_version=2.4
9 | mlsqlPluginType=et
10 | desc=last command
11 |
12 |
13 |
14 |
15 |
--------------------------------------------------------------------------------
/last-command/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | mlsql-plugins-3.0_2.12
7 | tech.mlsql
8 | 0.1.0-SNAPSHOT
9 |
10 | 4.0.0
11 |
12 | last-command-${spark.binary.version}_${scala.binary.version}
13 |
14 |
15 |
--------------------------------------------------------------------------------
/last-command/src/main/java/tech/mlsql/plugins/et/LastCommand.scala:
--------------------------------------------------------------------------------
1 | package tech.mlsql.plugins.et
2 |
3 | import org.apache.spark.sql.expressions.UserDefinedFunction
4 | import org.apache.spark.sql.{DataFrame, SparkSession}
5 | import streaming.dsl.ScriptSQLExec
6 | import streaming.dsl.mmlib._
7 | import streaming.dsl.mmlib.algs.Functions
8 | import streaming.dsl.mmlib.algs.param.{BaseParams, WowParams}
9 | import tech.mlsql.common.utils.serder.json.JSONTool
10 | import tech.mlsql.version.VersionCompatibility
11 |
12 |
13 | class LastCommand(override val uid: String) extends SQLAlg with VersionCompatibility with Functions with WowParams {
14 | def this() = this(BaseParams.randomUID())
15 |
16 | //
17 | override def train(df: DataFrame, path: String, params: Map[String, String]): DataFrame = {
18 |
19 | val context = ScriptSQLExec.context()
20 | val command = JSONTool.parseJson[List[String]](params("parameters")).toArray
21 |
22 | //!last named table1;
23 | context.execListener.getLastSelectTable() match {
24 | case Some(tableName) =>
25 | command match {
26 | case Array("named", newTableName) =>
27 | val newDf = context.execListener.sparkSession.table(tableName)
28 | newDf.createOrReplaceTempView(newTableName)
29 | newDf
30 | }
31 | case None => throw new RuntimeException("no table found in previous command")
32 | }
33 | }
34 |
35 |
36 | override def supportedVersions: Seq[String] = {
37 | Seq("1.5.0-SNAPSHOT", "1.5.0", "1.6.0-SNAPSHOT", "1.6.0")
38 | }
39 |
40 |
41 | override def doc: Doc = Doc(MarkDownDoc,
42 | s"""
43 | |When you want to get the result from command and used
44 | | in next command(SQL), you can use !last command.
45 | |
46 | |For example:
47 | |
48 | |```
49 | |${codeExample.code}
50 | |```
51 | """.stripMargin)
52 |
53 |
54 | override def codeExample: Code = Code(SQLCode,
55 | """
56 | |!hdfs /tmp;
57 | |!last named hdfsTmpTable;
58 | |select * from hdfsTmpTable;
59 | """.stripMargin)
60 |
61 | override def batchPredict(df: DataFrame, path: String, params: Map[String, String]): DataFrame = train(df, path, params)
62 |
63 | override def load(sparkSession: SparkSession, path: String, params: Map[String, String]): Any = ???
64 |
65 | override def predict(sparkSession: SparkSession, _model: Any, name: String, params: Map[String, String]): UserDefinedFunction = ???
66 | }
67 |
--------------------------------------------------------------------------------
/mlsql-analysis-toolkit/.repo/pom.template.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | mlsql-plugins-{{spark_binary_version}}_{{scala_binary_version}}
7 | tech.mlsql
8 | 0.1.0-SNAPSHOT
9 |
10 | 4.0.0
11 |
12 | mlsql-analysis-toolkit-${spark.binary.version}_${scala.binary.version}
13 |
14 |
15 |
--------------------------------------------------------------------------------
/mlsql-analysis-toolkit/README.md:
--------------------------------------------------------------------------------
1 | ## Install command:
2 |
3 | ```
4 | !plugin app add - "mlsql-analysis-toolkit-2.4";
5 | ```
6 |
7 |
8 | ## Usage
9 |
10 | To compute field in table the medium number.
11 |
12 | ```sql
13 | !approxQuantile time_temp birthday "0.5" valued time_quantile;
14 | select ${time_quantile} as quantile as output;
15 | ```
16 |
17 |
18 | To create table with id column with fix size:
19 |
20 | ```sql
21 | !dataframe build range 100000 named table1;
22 | ```
23 |
24 |
25 |
26 |
--------------------------------------------------------------------------------
/mlsql-analysis-toolkit/desc.plugin:
--------------------------------------------------------------------------------
1 | moduleName=mlsql-analysis-toolkit-2.4
2 | mainClass=tech.mlsql.plugins.analysis.AnalysisApp
3 | version=0.1.0-SNAPSHOT
4 | author=allwefantasy
5 | mlsqlVersions="1.5.0,1.5.0-SNAPSHOT,1.6.0,1.6.0-SNAPSHOT"
6 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-analysis-toolkit
7 | scala_version=2.11
8 | spark_version=2.4
9 | mlsqlPluginType=app
10 | desc=wow
11 |
12 |
--------------------------------------------------------------------------------
/mlsql-analysis-toolkit/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | mlsql-plugins-3.0_2.12
7 | tech.mlsql
8 | 0.1.0-SNAPSHOT
9 |
10 | 4.0.0
11 |
12 | mlsql-analysis-toolkit-${spark.binary.version}_${scala.binary.version}
13 |
14 |
15 |
--------------------------------------------------------------------------------
/mlsql-analysis-toolkit/src/main/java/tech/mlsql/plugins/analysis/AnalysisApp.scala:
--------------------------------------------------------------------------------
1 | package tech.mlsql.plugins.analysis
2 |
3 | import tech.mlsql.dsl.CommandCollection
4 | import tech.mlsql.ets.register.ETRegister
5 | import tech.mlsql.version.VersionCompatibility
6 |
7 | /**
8 | * 26/4/2020 WilliamZhu(allwefantasy@gmail.com)
9 | */
10 | class AnalysisApp extends tech.mlsql.app.App with VersionCompatibility {
11 | override def run(args: Seq[String]): Unit = {
12 | ETRegister.register("ApproxQuantile", classOf[ApproxQuantile].getName)
13 | CommandCollection.refreshCommandMapping(Map("approxQuantile" -> "ApproxQuantile"))
14 |
15 | ETRegister.register("DFTool", classOf[DFTool].getName)
16 | CommandCollection.refreshCommandMapping(Map("dataframe" -> "DFTool"))
17 | }
18 |
19 | override def supportedVersions: Seq[String] = Seq("1.5.0-SNAPSHOT", "1.5.0", "1.6.0-SNAPSHOT", "1.6.0")
20 | }
--------------------------------------------------------------------------------
/mlsql-analysis-toolkit/src/main/java/tech/mlsql/plugins/analysis/ApproxQuantile.scala:
--------------------------------------------------------------------------------
1 | package tech.mlsql.plugins.analysis
2 |
3 | import org.apache.spark.sql.expressions.UserDefinedFunction
4 | import org.apache.spark.sql.{DataFrame, SparkSession}
5 | import streaming.dsl.ScriptSQLExec
6 | import streaming.dsl.mmlib.SQLAlg
7 | import streaming.dsl.mmlib.algs.Functions
8 | import streaming.dsl.mmlib.algs.param.{BaseParams, WowParams}
9 | import tech.mlsql.common.utils.serder.json.JSONTool
10 | import tech.mlsql.version.VersionCompatibility
11 |
12 | /**
13 | * 26/4/2020 WilliamZhu(allwefantasy@gmail.com)
14 | */
15 | class ApproxQuantile(override val uid: String) extends SQLAlg with VersionCompatibility with Functions with WowParams {
16 | def this() = this(BaseParams.randomUID())
17 |
18 | override def train(df: DataFrame, path: String, params: Map[String, String]): DataFrame = {
19 | val command = JSONTool.parseJson[List[String]](params("parameters")).toArray
20 |
21 | def compute(table: String, field: String, quantile: String, error: String) = {
22 | df.sparkSession.table(table).stat.approxQuantile(field, Array(quantile.toDouble), error.toDouble)
23 | }
24 |
25 | var tableName: String = null
26 |
27 | val res = command match {
28 | case Array(table, field, quantile) =>
29 | compute(table, field, quantile, "0").head
30 |
31 | case Array(table, field, quantile, "valued", value) =>
32 | val f = compute(table, field, quantile, "0").head
33 | ScriptSQLExec.context().execListener.addEnv(value, f.toString)
34 | f
35 | case Array(table, field, quantile, "named", value) =>
36 | tableName = value
37 | compute(table, field, quantile, "0").head
38 |
39 | case Array(table, field, quantile, error) =>
40 | compute(table, field, quantile, error).head
41 |
42 | case Array(table, field, quantile, error, "valued", value) =>
43 | val f = compute(table, field, quantile, error).head
44 | ScriptSQLExec.context().execListener.addEnv(value, f.toString)
45 | f
46 | case Array(table, field, quantile, error, "named", value) =>
47 | tableName = value
48 | compute(table, field, quantile, error).head
49 | }
50 |
51 | import df.sparkSession.implicits._
52 | val newdf = df.sparkSession.createDataset[Double](Seq(res)).toDF("value")
53 | if (tableName != null) {
54 | newdf.createOrReplaceTempView(tableName)
55 | }
56 | newdf
57 |
58 | }
59 |
60 |
61 | override def batchPredict(df: DataFrame, path: String, params: Map[String, String]): DataFrame = train(df, path, params)
62 |
63 | override def load(sparkSession: SparkSession, path: String, params: Map[String, String]): Any = ???
64 |
65 | override def predict(sparkSession: SparkSession, _model: Any, name: String, params: Map[String, String]): UserDefinedFunction = ???
66 |
67 | override def supportedVersions: Seq[String] = {
68 | Seq("1.5.0-SNAPSHOT", "1.5.0", "1.6.0-SNAPSHOT", "1.6.0")
69 | }
70 | }
71 |
--------------------------------------------------------------------------------
/mlsql-analysis-toolkit/src/main/java/tech/mlsql/plugins/analysis/DFTool.scala:
--------------------------------------------------------------------------------
1 | package tech.mlsql.plugins.analysis
2 |
3 | import org.apache.spark.sql.expressions.UserDefinedFunction
4 | import org.apache.spark.sql.{DataFrame, SparkSession}
5 | import streaming.dsl.mmlib.SQLAlg
6 | import streaming.dsl.mmlib.algs.Functions
7 | import streaming.dsl.mmlib.algs.param.{BaseParams, WowParams}
8 | import tech.mlsql.common.utils.serder.json.JSONTool
9 | import tech.mlsql.version.VersionCompatibility
10 |
11 | /**
12 | * 1/5/2020 WilliamZhu(allwefantasy@gmail.com)
13 | */
14 | class DFTool(override val uid: String) extends SQLAlg with VersionCompatibility with Functions with WowParams {
15 | def this() = this(BaseParams.randomUID())
16 |
17 | /**
18 | * !dataframe build range 100 named table1;
19 | */
20 | override def train(df: DataFrame, path: String, params: Map[String, String]): DataFrame = {
21 | val command = JSONTool.parseJson[List[String]](params("parameters")).toArray
22 |
23 | val newdf = command match {
24 | case Array("build", "range", end, "named", table) =>
25 | val temp = df.sparkSession.range(end.toLong).toDF()
26 | temp.createOrReplaceTempView(table)
27 | temp
28 | }
29 | newdf
30 | }
31 |
32 |
33 | override def batchPredict(df: DataFrame, path: String, params: Map[String, String]): DataFrame = train(df, path, params)
34 |
35 | override def load(sparkSession: SparkSession, path: String, params: Map[String, String]): Any = ???
36 |
37 | override def predict(sparkSession: SparkSession, _model: Any, name: String, params: Map[String, String]): UserDefinedFunction = ???
38 |
39 | override def supportedVersions: Seq[String] = {
40 | Seq("1.5.0-SNAPSHOT", "1.5.0", "1.6.0-SNAPSHOT", "1.6.0")
41 | }
42 | }
43 |
44 |
--------------------------------------------------------------------------------
/mlsql-assert/.repo/desc.template.plugin:
--------------------------------------------------------------------------------
1 | moduleName=mlsql-assert-{{spark_binary_version}}
2 | mainClass=tech.mlsql.plugins.assert.app.MLSQLAssert
3 | scala_version={{scala_binary_version}}
4 | spark_version={{spark_binary_version}}
5 | version=0.1.0-SNAPSHOT
6 | author=allwefantasy
7 | mlsqlVersions=""
8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-assert
9 | mlsqlPluginType=app
10 | desc=mlsql-shell
11 |
12 |
13 |
14 |
15 |
16 |
17 |
--------------------------------------------------------------------------------
/mlsql-assert/.repo/pom.template.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | mlsql-plugins-{{spark_binary_version}}_{{scala_binary_version}}
7 | tech.mlsql
8 | 0.1.0-SNAPSHOT
9 |
10 | 4.0.0
11 |
12 | mlsql-assert-{{spark_binary_version}}_{{scala_binary_version}}
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 | shade
21 |
22 |
23 |
24 | org.apache.maven.plugins
25 | maven-shade-plugin
26 | 3.2.0
27 |
28 |
29 |
30 | *:*
31 |
32 | META-INF/*.SF
33 | META-INF/*.DSA
34 | META-INF/*.RSA
35 |
36 |
37 |
38 | false
39 |
40 |
41 | org.apache.poi
42 | shadeio.poi
43 |
44 |
45 | com.norbitltd.spoiwo
46 | shadeio.spoiwo
47 |
48 |
49 | com.github.pjfanning
50 | shadeio.pjfanning
51 |
52 |
53 | org.apache.commons.compress
54 | shadeio.commons.compress
55 |
56 |
57 |
58 |
59 |
60 |
61 | package
62 |
63 | shade
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
--------------------------------------------------------------------------------
/mlsql-assert/README.md:
--------------------------------------------------------------------------------
1 | # mlsql-shell
2 |
3 | This plugin provide assert in table.
4 |
5 |
6 | ## Install from store
7 |
8 | Execute following command in web console:
9 |
10 | ```
11 | !plugin app add - "mlsql-assert-2.4";
12 | ```
13 |
14 |
15 | ## Install Manually
16 |
17 | Firstly, build shade jar in your terminal:
18 |
19 | ```shell
20 | pip install mlsql_plugin_tool
21 | mlsql_plugin_tool build --module_name mlsql-assert --spark spark243
22 | ```
23 |
24 | then change start script of MLSQL Engine,
25 |
26 | Add Jar:
27 |
28 | ```
29 | --jars YOUR_JAR_PATH
30 | ```
31 |
32 | Register Class:
33 |
34 | ```
35 | -streaming.plugin.clzznames tech.mlsql.plugins.shell.app.MLSQLShell
36 | ```
37 |
38 | If there are more than one class, use comma to seperate them. For example:
39 |
40 | ```
41 | -streaming.plugin.clzznames classA,classB,classC
42 | ```
43 |
44 | ## Usage
45 |
46 | ```sql
47 |
48 | -- !plugin app remove "mlsql-assert-2.4";
49 | -- !plugin app add - "mlsql-assert-2.4";
50 | -- create test data
51 | set jsonStr='''
52 | {"features":[5.1,3.5,1.4,0.2],"label":0.0},
53 | {"features":[5.1,3.5,1.4,0.2],"label":1.0}
54 | {"features":[5.1,3.5,1.4,0.2],"label":0.0}
55 | {"features":[4.4,2.9,1.4,0.2],"label":0.0}
56 | {"features":[5.1,3.5,1.4,0.2],"label":1.0}
57 | {"features":[5.1,3.5,1.4,0.2],"label":0.0}
58 | {"features":[5.1,3.5,1.4,0.2],"label":0.0}
59 | {"features":[4.7,3.2,1.3,0.2],"label":1.0}
60 | {"features":[5.1,3.5,1.4,0.2],"label":0.0}
61 | {"features":[5.1,3.5,1.4,0.2],"label":0.0}
62 | ''';
63 | load jsonStr.`jsonStr` as data;
64 | select vec_dense(features) as features ,label as label from data
65 | as data1;
66 |
67 | -- use RandomForest
68 | train data1 as RandomForest.`/tmp/model` where
69 |
70 | -- once set true,every time you run this script, MLSQL will generate new directory for you model
71 | keepVersion="true"
72 |
73 | -- specicy the test dataset which will be used to feed evaluator to generate some metrics e.g. F1, Accurate
74 | and evaluateTable="data1"
75 |
76 | -- specify group 0 parameters
77 | and `fitParam.0.labelCol`="features"
78 | and `fitParam.0.featuresCol`="label"
79 | and `fitParam.0.maxDepth`="2"
80 |
81 | -- specify group 1 parameters
82 | and `fitParam.1.featuresCol`="features"
83 | and `fitParam.1.labelCol`="label"
84 | and `fitParam.1.maxDepth`="10"
85 | as model_result;
86 |
87 | select name,value from model_result where name="status" as result;
88 | -- make sure status of all models are success.
89 | !assert result ''':value=="success"''' "all model status should be success";
90 |
91 | ```
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
--------------------------------------------------------------------------------
/mlsql-assert/desc.plugin:
--------------------------------------------------------------------------------
1 | moduleName=mlsql-assert-3.0
2 | mainClass=tech.mlsql.plugins.assert.app.MLSQLAssert
3 | scala_version=2.12
4 | spark_version=3.0
5 | version=0.1.0-SNAPSHOT
6 | author=allwefantasy
7 | mlsqlVersions=""
8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-assert
9 | mlsqlPluginType=app
10 | desc=mlsql-shell
11 |
12 |
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/mlsql-assert/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | mlsql-plugins-3.0_2.12
7 | tech.mlsql
8 | 0.1.0-SNAPSHOT
9 |
10 | 4.0.0
11 |
12 | mlsql-assert-3.0_2.12
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 | shade
21 |
22 |
23 |
24 | org.apache.maven.plugins
25 | maven-shade-plugin
26 | 3.2.0
27 |
28 |
29 |
30 | *:*
31 |
32 | META-INF/*.SF
33 | META-INF/*.DSA
34 | META-INF/*.RSA
35 |
36 |
37 |
38 | false
39 |
40 |
41 | org.apache.poi
42 | shadeio.poi
43 |
44 |
45 | com.norbitltd.spoiwo
46 | shadeio.spoiwo
47 |
48 |
49 | com.github.pjfanning
50 | shadeio.pjfanning
51 |
52 |
53 | org.apache.commons.compress
54 | shadeio.commons.compress
55 |
56 |
57 |
58 |
59 |
60 |
61 | package
62 |
63 | shade
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
--------------------------------------------------------------------------------
/mlsql-assert/src/main/java/tech/mlsql/plugins/assert/app/MLSQLAssert.scala:
--------------------------------------------------------------------------------
1 | package tech.mlsql.plugins.assert.app
2 |
3 | import tech.mlsql.common.utils.log.Logging
4 | import tech.mlsql.dsl.CommandCollection
5 | import tech.mlsql.ets.register.ETRegister
6 | import tech.mlsql.plugins.assert.ets.{Assert, MLSQLThrow}
7 | import tech.mlsql.version.VersionCompatibility
8 |
9 | /**
10 | * 4/6/2021 WilliamZhu(allwefantasy@gmail.com)
11 | */
12 | class MLSQLAssert extends tech.mlsql.app.App with VersionCompatibility with Logging {
13 | override def run(args: Seq[String]): Unit = {
14 | ETRegister.register("Assert", classOf[Assert].getName)
15 | ETRegister.register("Throw", classOf[MLSQLThrow].getName)
16 | CommandCollection.refreshCommandMapping(Map("assert" ->
17 | """
18 | |run command as Assert.`` where parameters='''{:all}'''
19 | |""".stripMargin))
20 | CommandCollection.refreshCommandMapping(Map("throw" ->
21 | """
22 | |run command as Throw.`` where msg='''{0}'''
23 | |""".stripMargin))
24 | }
25 |
26 |
27 | override def supportedVersions: Seq[String] = {
28 | MLSQLAssert.versions
29 | }
30 | }
31 |
32 | object MLSQLAssert {
33 | val versions = Seq("2.1.0", "2.1.0-SNAPSHOT", "2.0.0", "2.0.1")
34 | }
--------------------------------------------------------------------------------
/mlsql-assert/src/main/java/tech/mlsql/plugins/assert/ets/MLSQLThrow.scala:
--------------------------------------------------------------------------------
1 | package tech.mlsql.plugins.assert.ets
2 |
3 | import org.apache.spark.sql.expressions.UserDefinedFunction
4 | import org.apache.spark.sql.mlsql.session.MLSQLException
5 | import org.apache.spark.sql.{DataFrame, SparkSession}
6 | import streaming.dsl.auth.TableAuthResult
7 | import streaming.dsl.mmlib.algs.Functions
8 | import streaming.dsl.mmlib.algs.param.{BaseParams, WowParams}
9 | import streaming.dsl.mmlib._
10 | import tech.mlsql.dsl.auth.ETAuth
11 | import tech.mlsql.dsl.auth.dsl.mmlib.ETMethod.ETMethod
12 | import tech.mlsql.plugins.assert.app.MLSQLAssert
13 | import tech.mlsql.version.VersionCompatibility
14 |
15 | /**
16 | * 4/9/2021 WilliamZhu(allwefantasy@gmail.com)
17 | */
18 | class MLSQLThrow(override val uid: String) extends SQLAlg
19 | with VersionCompatibility with Functions with WowParams with ETAuth {
20 | def this() = this(BaseParams.randomUID())
21 |
22 | override def train(df: DataFrame, path: String, params: Map[String, String]): DataFrame = {
23 | throw new RuntimeException(params("msg"))
24 | }
25 |
26 | override def batchPredict(df: DataFrame, path: String, params: Map[String, String]): DataFrame = train(df, path, params)
27 |
28 | override def skipPathPrefix: Boolean = true
29 |
30 | override def load(sparkSession: SparkSession, path: String, params: Map[String, String]): Any = throw new MLSQLException(s"${getClass.getName} not support register ")
31 |
32 | override def predict(sparkSession: SparkSession, _model: Any, name: String, params: Map[String, String]): UserDefinedFunction = throw new MLSQLException(s"${getClass.getName} not support register ")
33 |
34 | override def supportedVersions: Seq[String] = MLSQLAssert.versions
35 |
36 | override def auth(etMethod: ETMethod, path: String, params: Map[String, String]): List[TableAuthResult] = {
37 | List()
38 | }
39 |
40 | override def modelType: ModelType = ProcessType
41 |
42 | override def doc: Doc = Doc(HtmlDoc,
43 | """
44 | |
45 | | This ET is used to stop the execute of the script.
46 | """.stripMargin)
47 |
48 |
49 | override def codeExample: Code = Code(SQLCode,
50 | """
51 | |
52 | |!throw "exception msg";
53 | |run command as Throw.`` where msg="";
54 | """.stripMargin)
55 | }
56 |
--------------------------------------------------------------------------------
/mlsql-bigdl/.repo/desc.template.plugin:
--------------------------------------------------------------------------------
1 | moduleName=mlsql-bigdl-{{spark_binary_version}}
2 | mainClass=tech.mlsql.plugins.bigdl.BigDLApp
3 | scala_version={{scala_binary_version}}
4 | spark_version={{spark_binary_version}}
5 | version=0.1.0-SNAPSHOT
6 | author=allwefantasy
7 | mlsqlVersions="1.5.0,1.5.0-SNAPSHOT,1.6.0,1.6.0-SNAPSHOT"
8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-bigdl
9 | mlsqlPluginType=app
10 | desc=bigdl
11 |
12 |
13 |
14 |
15 |
16 |
17 |
--------------------------------------------------------------------------------
/mlsql-bigdl/README.md:
--------------------------------------------------------------------------------
1 | ## Install
2 |
3 | ```
4 | !plugin app add - "mlsql-bigdl-2.4";
5 | ```
6 |
7 | ## Usage
8 |
9 | Check this [Doc](http://docs.mlsql.tech/zh/dl/load_image.html)
10 |
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/mlsql-bigdl/desc.plugin:
--------------------------------------------------------------------------------
1 | moduleName=mlsql-bigdl-3.0
2 | mainClass=tech.mlsql.plugins.bigdl.BigDLApp
3 | scala_version=2.12
4 | spark_version=3.0
5 | version=0.1.0-SNAPSHOT
6 | author=allwefantasy
7 | mlsqlVersions="1.5.0,1.5.0-SNAPSHOT,1.6.0,1.6.0-SNAPSHOT"
8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-bigdl
9 | mlsqlPluginType=app
10 | desc=bigdl
11 |
12 |
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/mlsql-bigdl/src/main/java/com/intel/analytics/bigdl/visualization/LogTrainSummary.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | */
18 |
19 | package com.intel.analytics.bigdl.visualization
20 |
21 | import org.apache.spark.internal.Logging
22 | import streaming.log.WowLog
23 |
24 |
25 | class LogTrainSummary(logDir: String,
26 | appName: String) extends TrainSummary(logDir, appName) with Logging with WowLog {
27 |
28 | override def addScalar(tag: String, value: Float, step: Long): LogTrainSummary.this.type = {
29 | // tag match {
30 | // case "Throughput" =>
31 | // logInfo(format(s"global step: ${step} Throughput is ${value} records/second. "))
32 | // case "Loss" =>
33 | // logInfo(format(s"global step: ${step} Loss is ${value}"))
34 | // case _ =>
35 | // logInfo(format(s"global step: ${step} ${tag} is ${value}"))
36 | // }
37 |
38 | super.addScalar(tag, value, step)
39 | }
40 | }
41 |
42 | class LogValidateSummary(logDir: String,
43 | appName: String) extends ValidationSummary(logDir, appName) with Logging with WowLog {
44 | override def addScalar(tag: String, value: Float, step: Long): LogValidateSummary.this.type = {
45 | //logInfo(format(s"global step: ${step} ${tag} is ${value}"))
46 | super.addScalar(tag, value, step)
47 | }
48 | }
49 |
--------------------------------------------------------------------------------
/mlsql-bigdl/src/main/java/com/intel/analytics/bigdl/visualization/WowFileWriter.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | */
18 |
19 | package com.intel.analytics.bigdl.visualization
20 |
21 | import com.intel.analytics.bigdl.visualization.tensorboard.FileWriter
22 |
23 |
24 | class WowFileWriter(folder: String) extends FileWriter(folder) {
25 |
26 | }
27 |
--------------------------------------------------------------------------------
/mlsql-bigdl/src/main/java/tech/mlsql/plugins/bigdl/BigDLApp.scala:
--------------------------------------------------------------------------------
1 | package tech.mlsql.plugins.bigdl
2 |
3 | import tech.mlsql.ets.register.ETRegister
4 | import tech.mlsql.version.VersionCompatibility
5 |
6 | /**
7 | * 5/4/2020 WilliamZhu(allwefantasy@gmail.com)
8 | */
9 | class BigDLApp extends tech.mlsql.app.App with VersionCompatibility {
10 | override def run(args: Seq[String]): Unit = {
11 | ETRegister.register("ImageLoaderExt", classOf[SQLImageLoaderExt].getName)
12 | ETRegister.register("MnistLoaderExt", classOf[SQLMnistLoaderExt].getName)
13 | ETRegister.register("BigDLClassifyExt", classOf[SQLBigDLClassifyExt].getName)
14 | ETRegister.register("LeNet5Ext", classOf[SQLLeNet5Ext].getName)
15 | }
16 |
17 | override def supportedVersions: Seq[String] = Seq("1.5.0-SNAPSHOT", "1.5.0", "1.6.0-SNAPSHOT", "1.6.0")
18 | }
19 |
20 |
--------------------------------------------------------------------------------
/mlsql-bigdl/src/main/java/tech/mlsql/plugins/bigdl/WowClassNLLCriterion.scala:
--------------------------------------------------------------------------------
1 | package tech.mlsql.plugins.bigdl
2 |
3 | import com.intel.analytics.bigdl.nn.ClassNLLCriterion
4 | import com.intel.analytics.bigdl.tensor.Tensor
5 | import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric
6 | import streaming.dsl.mmlib.algs.bigdl.ClassWeightParamExtractor
7 |
8 |
9 | object WowClassNLLCriterion {
10 | def apply(
11 | paramsExtractor: ClassWeightParamExtractor
12 | )(implicit ev: TensorNumeric[Float]): ClassNLLCriterion[Float] = {
13 | val weights = paramsExtractor.weights.map(f => Tensor(f, Array(f.size))).getOrElse(null)
14 | new ClassNLLCriterion[Float](weights,
15 | paramsExtractor.sizeAverage.getOrElse(true),
16 | paramsExtractor.logProbAsInput.getOrElse(true),
17 | paramsExtractor.paddingValue.getOrElse(-1)
18 | )
19 | }
20 | }
21 |
--------------------------------------------------------------------------------
/mlsql-canal/.repo/desc.template.plugin:
--------------------------------------------------------------------------------
1 | moduleName=mlsql-canal-{{spark_binary_version}}
2 | mainClass=tech.mlsql.plugins.canal.CanalApp
3 | scala_version={{scala_binary_version}}
4 | spark_version={{spark_binary_version}}
5 | version=0.1.0-SNAPSHOT
6 | author=zml1206
7 | mlsqlVersions=""
8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-canal
9 | mlsqlPluginType=app
10 | desc=mlsql-canal
11 |
12 |
13 |
14 |
15 |
16 |
17 |
--------------------------------------------------------------------------------
/mlsql-canal/.repo/pom.template.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | mlsql-plugins-{{spark_binary_version}}_{{scala_binary_version}}
7 | tech.mlsql
8 | 0.1.0-SNAPSHOT
9 |
10 | 4.0.0
11 |
12 | mlsql-canal-{{spark_binary_version}}_{{scala_binary_version}}
13 |
14 |
15 |
16 |
17 |
18 |
19 | shade
20 |
21 |
22 |
23 | org.apache.maven.plugins
24 | maven-shade-plugin
25 | 3.2.0
26 |
27 |
28 |
29 | *:*
30 |
31 | META-INF/*.SF
32 | META-INF/*.DSA
33 | META-INF/*.RSA
34 |
35 |
36 |
37 | false
38 |
39 |
40 | org.apache.poi
41 | shadeio.poi
42 |
43 |
44 | com.norbitltd.spoiwo
45 | shadeio.spoiwo
46 |
47 |
48 | com.github.pjfanning
49 | shadeio.pjfanning
50 |
51 |
52 | org.apache.commons.compress
53 | shadeio.commons.compress
54 |
55 |
56 |
57 |
58 |
59 |
60 | package
61 |
62 | shade
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
--------------------------------------------------------------------------------
/mlsql-canal/README.md:
--------------------------------------------------------------------------------
1 | mlsql-canal
2 |
3 | Used in streaming, parse canal binlog, store it to delta lake, support ddl.
4 | Only support spark 3.X.
5 |
6 | ## Install
7 |
8 | ```
9 | !plugin ds add - "mlsql-canal-3.0";
10 | ```
11 |
12 | or install as app:
13 |
14 | ```
15 | !plugin app add "tech.mlsql.plugins.canal.CanalApp" "mlsql-canal-3.0";
16 | ```
17 |
18 |
19 | ## Usage
20 |
21 | ```sql
22 | set streamName="binlog_to_delta";
23 |
24 | load kafka.`binlog-canal_test`
25 | options `kafka.bootstrap.servers` = "***"
26 | and `maxOffsetsPerTrigger`="600000"
27 | as kafka_record;
28 |
29 | select cast(value as string) as value from kafka_record
30 | as kafka_value;
31 |
32 | save append kafka_value
33 | as custom.``
34 | options mode = "Append"
35 | and duration = "20"
36 | and sourceTable = "kafka_value"
37 | and checkpointLocation = "checkpoint/binlog_to_delta"
38 | and code = '''
39 | run kafka_value
40 | as BinlogToDelta.``
41 | options dbTable = "canal_test.test";
42 | ''';
43 | ```
--------------------------------------------------------------------------------
/mlsql-canal/desc.plugin:
--------------------------------------------------------------------------------
1 | moduleName=mlsql-canal-3.0
2 | mainClass=tech.mlsql.plugins.canal.CanalApp
3 | scala_version=2.12
4 | spark_version=3.0
5 | version=0.1.0-SNAPSHOT
6 | author=zml1206
7 | mlsqlVersions=""
8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-canal
9 | mlsqlPluginType=app
10 | desc=mlsql-canal
11 |
12 |
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/mlsql-canal/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | mlsql-plugins-3.0_2.12
7 | tech.mlsql
8 | 0.1.0-SNAPSHOT
9 |
10 | 4.0.0
11 |
12 | mlsql-canal-3.0_2.12
13 |
14 |
15 |
16 |
17 |
18 |
19 | shade
20 |
21 |
22 |
23 | org.apache.maven.plugins
24 | maven-shade-plugin
25 | 3.2.0
26 |
27 |
28 |
29 | *:*
30 |
31 | META-INF/*.SF
32 | META-INF/*.DSA
33 | META-INF/*.RSA
34 |
35 |
36 |
37 | false
38 |
39 |
40 | org.apache.poi
41 | shadeio.poi
42 |
43 |
44 | com.norbitltd.spoiwo
45 | shadeio.spoiwo
46 |
47 |
48 | com.github.pjfanning
49 | shadeio.pjfanning
50 |
51 |
52 | org.apache.commons.compress
53 | shadeio.commons.compress
54 |
55 |
56 |
57 |
58 |
59 |
60 | package
61 |
62 | shade
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
--------------------------------------------------------------------------------
/mlsql-canal/src/main/scala/tech/mlsql/plugins/canal/CanalApp.scala:
--------------------------------------------------------------------------------
1 | package tech.mlsql.plugins.canal
2 |
3 | import tech.mlsql.ets.register.ETRegister
4 | import tech.mlsql.version.VersionCompatibility
5 |
6 | /**
7 | * Created by zhuml on 2021/6/11.
8 | */
9 | class CanalApp extends tech.mlsql.app.App with VersionCompatibility {
10 |
11 | override def run(args: Seq[String]): Unit = {
12 | ETRegister.register("BinlogToDelta", "tech.mlsql.plugins.canal.ets.BinlogToDelta")
13 | }
14 |
15 | override def supportedVersions: Seq[String] = Seq("1.6.0-SNAPSHOT")
16 |
17 | }
18 |
--------------------------------------------------------------------------------
/mlsql-canal/src/main/scala/tech/mlsql/plugins/canal/ets/BinlogToDelta.scala:
--------------------------------------------------------------------------------
1 | package tech.mlsql.plugins.canal.ets
2 |
3 | import org.apache.spark.ml.param.Param
4 | import org.apache.spark.sql.expressions.UserDefinedFunction
5 | import org.apache.spark.sql.mlsql.session.MLSQLException
6 | import org.apache.spark.sql.{DataFrame, SparkSession}
7 | import streaming.dsl.mmlib._
8 | import streaming.dsl.mmlib.algs.param.WowParams
9 | import tech.mlsql.common.utils.log.Logging
10 | import tech.mlsql.plugins.canal.sink.{BinlogWritter, DeltaSink}
11 |
12 | /**
13 | * Created by zhuml on 2021/6/11.
14 | */
15 | class BinlogToDelta(override val uid: String) extends SQLAlg with WowParams with Logging {
16 |
17 | override def train(df: DataFrame, path: String, params: Map[String, String]): DataFrame = {
18 |
19 | val spark = df.sparkSession
20 |
21 | params.get(dbTable.name)
22 | .map(m => set(dbTable, m)).getOrElse {
23 | throw new MLSQLException(s"${dbTable.name} is required")
24 | }
25 | params.get(maxTs.name)
26 | .map(m => set(maxTs, m)).getOrElse {
27 | set(maxTs, "0")
28 | }
29 |
30 | val sink = new DeltaSink(spark, $(dbTable))
31 | new BinlogWritter(sink, df, $(maxTs).toLong).write
32 |
33 | spark.emptyDataFrame
34 | }
35 |
36 | override def load(sparkSession: SparkSession, path: String, params: Map[String, String]): Any = {
37 | throw new RuntimeException(s"${
38 | getClass.getName
39 | } not support load function.")
40 | }
41 |
42 | override def predict(sparkSession: SparkSession, _model: Any, name: String, params: Map[String,
43 | String]): UserDefinedFunction = {
44 | throw new RuntimeException(s"${
45 | getClass.getName
46 | } not support predict function.")
47 | }
48 |
49 | override def explainParams(sparkSession: SparkSession): DataFrame = {
50 | _explainParams(sparkSession)
51 | }
52 |
53 | final val dbTable: Param[String] = new Param[String](this, "dbTable", "db.table")
54 | final val maxTs: Param[String] = new Param[String](this, "maxTs", "delta table max ts")
55 |
56 | override def doc: Doc = Doc(MarkDownDoc,
57 | """
58 | |BinlogToDelta CDC数据变更捕获解析同步delta模块
59 | |
60 | |```sql
61 | |run table as BinlogToDelta.``
62 | |options daTable="a.b"
63 | | as t;
64 | |```
65 | |
66 | """.stripMargin)
67 |
68 | override def modelType: ModelType = ProcessType
69 |
70 | def this() = this(WowParams.randomUID())
71 | }
72 |
73 |
74 |
--------------------------------------------------------------------------------
/mlsql-canal/src/main/scala/tech/mlsql/plugins/canal/mysql/JdbcTypeParser.scala:
--------------------------------------------------------------------------------
1 | package tech.mlsql.plugins.canal.mysql
2 |
3 | import com.alibaba.druid.sql.ast.{SQLDataType, SQLDataTypeImpl}
4 | import org.apache.spark.sql.types.{DataType, DecimalType, StructField, StructType}
5 |
6 | /**
7 | * Created by zhuml on 2021/6/11.
8 | */
9 | object JdbcTypeParser {
10 |
11 | val UNSIGNED = """.*(unsigned)""".r
12 |
13 | // 判断是否为有符号数
14 | def isSigned(typeName: String) = {
15 | typeName.trim match {
16 | case UNSIGNED(unsigned) => false
17 | case _ => true
18 | }
19 | }
20 |
21 | val FIXED_DECIMAL = """decimal\(\s*(\d+)\s*,\s*(\-?\d+)\s*\)""".r
22 | val FIXED_NUMERIC = """numeric\(\s*(\d+)\s*,\s*(\-?\d+)\s*\)""".r
23 | val FIXED_SCALE = """\w*\(\s*(\d+)\s*\)""".r
24 |
25 |
26 | // decimal/numeric 数据类型 具有precision固定精度(最大位数)和scale小数位数(点右侧的位数)的十进制数。
27 | def parsePrecisionScale(name: String) = {
28 | name match {
29 | case "decimal" | "numeric" => Array(DecimalType.SYSTEM_DEFAULT.precision, DecimalType.SYSTEM_DEFAULT.scale)
30 | case FIXED_DECIMAL(precision, scale) => Array(precision.toInt, scale.toInt)
31 | case FIXED_NUMERIC(precision, scale) => Array(precision.toInt, scale.toInt)
32 | case FIXED_SCALE(scale) => Array(scale.toInt, 0)
33 | case _ => Array(0, 0)
34 | }
35 | }
36 |
37 | def getMysqlStructType(sqlTypeMap: Map[String, Int], mysqlTypeMap: Map[String, String]): StructType = {
38 |
39 | val fields = mysqlTypeMap.map(k => {
40 | val sqlType = sqlTypeMap(k._1)
41 | val Array(precision, scale) = parsePrecisionScale(k._2)
42 | val signed = isSigned(k._2)
43 | val columnType = getCatalystTypePrivate(sqlType, precision, scale, signed).asInstanceOf[DataType]
44 | StructField(k._1, columnType)
45 | }).toArray
46 | new StructType(fields)
47 | }
48 |
49 | def getSqlTypeCode(name: String): Integer = {
50 | val _type = """\w*""".r.findFirstIn(name).getOrElse("").toUpperCase
51 | MysqlType.valueOf(_type).getVendorTypeNumber
52 | }
53 |
54 | def sqlTypeToDataType(sqlDataType: SQLDataType): DataType = {
55 | val name = sqlDataType.getName
56 | val Array(precision, scale) = parsePrecisionScale(name)
57 | val sqlType = getSqlTypeCode(name)
58 | getCatalystTypePrivate(sqlType, precision, scale, !sqlDataType.asInstanceOf[SQLDataTypeImpl].isUnsigned).asInstanceOf[DataType]
59 | }
60 |
61 | // JDBC type to Catalyst type
62 | lazy val getCatalystTypePrivate = {
63 | import scala.reflect.runtime.{universe => ru}
64 | val classMirror = ru.runtimeMirror(getClass.getClassLoader)
65 | val JdbcUtils = classMirror.staticModule("org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils")
66 | val methods = classMirror.reflectModule(JdbcUtils)
67 | val instanceMirror = classMirror.reflect(methods.instance)
68 | val method = methods.symbol.typeSignature.member(ru.TermName("getCatalystType")).asMethod
69 |
70 | instanceMirror.reflectMethod(method)
71 | }
72 |
73 |
74 | }
75 |
--------------------------------------------------------------------------------
/mlsql-canal/src/main/scala/tech/mlsql/plugins/canal/sink/BinlogConstants.scala:
--------------------------------------------------------------------------------
1 | package tech.mlsql.plugins.canal.sink
2 |
3 | /**
4 | * Created by zhuml on 2021/6/11.
5 | */
6 | object BinlogConstants {
7 |
8 | val TS_FIELD = "___ts___"
9 | val DELETE_FIELD = "___delete___"
10 | }
11 |
12 | case class BinlogRecord(data: Array[Map[String, String]],
13 | database: String,
14 | es: String,
15 | id: Long,
16 | isDdl: Boolean,
17 | mysqlType: Map[String, String],
18 | old: Array[Map[String, String]],
19 | pkNames: Array[String],
20 | sql: String,
21 | sqlType: Map[String, Int],
22 | table: String,
23 | ts: Long,
24 | `type`: String)
25 |
--------------------------------------------------------------------------------
/mlsql-canal/src/main/scala/tech/mlsql/plugins/canal/sink/BinlogWritter.scala:
--------------------------------------------------------------------------------
1 | package tech.mlsql.plugins.canal.sink
2 |
3 | import org.apache.spark.sql.DataFrame
4 | import tech.mlsql.plugins.canal.mysql.statement.DDLStatementParser
5 | import tech.mlsql.plugins.canal.util.JacksonUtil
6 |
7 | /**
8 | * Created by zhuml on 2021/6/11.
9 | */
10 | class BinlogWritter(@transient sink: Sink, df: DataFrame, maxTs: Long) extends Serializable {
11 |
12 | val spark = df.sparkSession
13 |
14 | def write = {
15 | sink.addTsIfNotExsit
16 | val filterDF = filter()
17 | //segment merge by ddl
18 | val ddls = filterDF.filter(r => r.isDdl && Array("ALTER", "TRUNCATE").contains(r.`type`)).collect()
19 | val dmlDS = filterDF.filter(r => !r.isDdl && Array("INSERT", "UPDATE", "DELETE").contains(r.`type`.toUpperCase))
20 | var tsMin = 0L
21 | var tsMax = 0L
22 | ddls.foreach(ddl => {
23 | val ddlParser = new DDLStatementParser(sink.tableLoad, ddl.sql)
24 | ddlParser.parseDF()
25 | if (ddlParser.isUpdate) {
26 | tsMax = ddl.ts
27 | sink.mergeData(dmlDS.filter(r => r.ts >= tsMin && r.ts < tsMax))
28 | sink.updateSchema(ddlParser.df)
29 | tsMin = tsMax
30 | }
31 | })
32 | sink.mergeData(dmlDS.filter(r => r.ts >= tsMin))
33 | }
34 |
35 | def filter() = {
36 | import spark.implicits._
37 | val table = sink.table
38 | df.map(r => JacksonUtil.fromJson(r.getString(0), classOf[BinlogRecord]))
39 | .filter(r => r.ts >= maxTs
40 | && s"${r.database}.${r.table}".equals(table))
41 | }
42 | }
43 |
--------------------------------------------------------------------------------
/mlsql-canal/src/main/scala/tech/mlsql/plugins/canal/sink/DeltaSink.scala:
--------------------------------------------------------------------------------
1 | package tech.mlsql.plugins.canal.sink
2 |
3 | import io.delta.tables.DeltaTable
4 | import org.apache.spark.sql.{DataFrame, Dataset, SparkSession}
5 | import tech.mlsql.common.utils.path.PathFun
6 | import tech.mlsql.datalake.DataLake
7 |
8 | /**
9 | * Created by zhuml on 2021/6/11.
10 | */
11 | class DeltaSink(spark: SparkSession, dbTable: String) extends Sink(dbTable: String) {
12 |
13 | val dataLake = new DataLake(spark)
14 |
15 | val finalPath = if (dataLake.isEnable) {
16 | dataLake.identifyToPath(dbTable)
17 | } else {
18 | PathFun(dbTable).add(dbTable).toPath
19 | }
20 |
21 | override def tableLoad() = spark.read.format("delta").load(finalPath)
22 |
23 | override def updateSchema(df: DataFrame): Unit = {
24 | df.write
25 | .format("delta")
26 | .mode("overwrite")
27 | .option("overwriteSchema", "true")
28 | .save(finalPath)
29 | }
30 |
31 | override def mergeData(ds: Dataset[BinlogRecord]): Unit = {
32 | val records = ds.take(1)
33 | if (records.length > 0) {
34 | val record = records(0)
35 | val schema = tableLoad.drop(BinlogConstants.TS_FIELD).schema
36 | val changesDF = duplicate(ds, schema)
37 | mergeToDelta(changesDF, record.pkNames, BinlogConstants.DELETE_FIELD)
38 | }
39 | }
40 |
41 | def mergeToDelta(df: DataFrame, pkNames: Array[String], deleteField: String): Unit = {
42 | val deltaTable = DeltaTable.forPath(spark, finalPath)
43 | val condition = pkNames.map(pk => s"s.${pk} = t.${pk}").mkString(" and ")
44 | deltaTable.as("t")
45 | .merge(
46 | df.as("s"), condition)
47 | .whenMatched(s"s.${deleteField} = true")
48 | .delete()
49 | .whenMatched().updateAll()
50 | .whenNotMatched(s"s.${deleteField} = false").insertAll()
51 | .execute()
52 | }
53 | }
54 |
--------------------------------------------------------------------------------
/mlsql-canal/src/main/scala/tech/mlsql/plugins/canal/sink/Sink.scala:
--------------------------------------------------------------------------------
1 | package tech.mlsql.plugins.canal.sink
2 |
3 | import org.apache.spark.sql.functions._
4 | import org.apache.spark.sql.types._
5 | import org.apache.spark.sql.{DataFrame, Dataset, functions => F}
6 | import tech.mlsql.plugins.canal.util.JacksonUtil
7 |
8 | /**
9 | * Created by zhuml on 2021/6/11.
10 | */
11 | abstract class Sink(val table: String) {
12 |
13 | def tableLoad: DataFrame
14 |
15 | def updateSchema(df: DataFrame)
16 |
17 | def mergeData(ds: Dataset[BinlogRecord])
18 |
19 | def addTsIfNotExsit = {
20 | {
21 | val df = tableLoad
22 | if (!df.schema.fieldNames.contains(BinlogConstants.TS_FIELD)) {
23 | updateSchema(df.withColumn(BinlogConstants.TS_FIELD, typedLit[Long](0)))
24 | }
25 | }
26 | }
27 |
28 | // duplicate binlog and parser data
29 | def duplicate(df: Dataset[BinlogRecord],
30 | schema: StructType): DataFrame = {
31 | import df.sparkSession.implicits._
32 | val schemaMap = schema.fields.map(s => s.name -> s.dataType).toMap
33 |
34 | val f = F.udf((dataJson: String) => {
35 | val dataMap = JacksonUtil.fromJson(dataJson, classOf[Map[String, String]])
36 | .map(data => {
37 | if (data._2 != null) {
38 | schemaMap.get(data._1) match {
39 | case Some(IntegerType) => (data._1, data._2.toInt)
40 | case Some(LongType) => (data._1, data._2.toLong)
41 | case Some(DoubleType) => (data._1, data._2.toDouble)
42 | case Some(FloatType) => (data._1, data._2.toFloat)
43 | case _ => data
44 | }
45 | } else {
46 | data
47 | }
48 | })
49 | JacksonUtil.toJson(dataMap)
50 | })
51 |
52 | df.flatMap(r => {
53 | r.data.map(data => {
54 | (r.pkNames.map(data.get(_)), (r.ts, r.`type`, JacksonUtil.toJson(data)))
55 | })
56 | }).groupBy("_1").agg(max("_2").as("latest"))
57 | .withColumn(("data"), f(F.col("latest._3")))
58 | .select(from_json($"data", schema).as("data"), $"latest._1".as(BinlogConstants.TS_FIELD), $"latest._2".as(BinlogConstants.DELETE_FIELD))
59 | .selectExpr("data.*", s"${BinlogConstants.TS_FIELD}", s"if(${BinlogConstants.DELETE_FIELD}='DELETE',true,false) as ${BinlogConstants.DELETE_FIELD}")
60 | }
61 | }
62 |
--------------------------------------------------------------------------------
/mlsql-canal/src/main/scala/tech/mlsql/plugins/canal/util/JacksonUtil.scala:
--------------------------------------------------------------------------------
1 | package tech.mlsql.plugins.canal.util
2 |
3 | import com.fasterxml.jackson.databind.ObjectMapper
4 | import com.fasterxml.jackson.module.scala.DefaultScalaModule
5 |
6 | import scala.util.control.NonFatal
7 |
8 | object JacksonUtil {
9 |
10 | private val _mapper = new ObjectMapper()
11 | _mapper.registerModule(DefaultScalaModule)
12 |
13 | def toJson[T](obj: T): String = {
14 | _mapper.writeValueAsString(obj)
15 | }
16 |
17 | def fromJson[T](json: String, `class`: Class[T]): T = {
18 | try {
19 | _mapper.readValue(json, `class`)
20 | } catch {
21 | case NonFatal(e) =>
22 | null.asInstanceOf[T]
23 | }
24 | }
25 |
26 | def prettyPrint[T](obj: T): String = {
27 | _mapper.writerWithDefaultPrettyPrinter().writeValueAsString(obj)
28 | }
29 |
30 | }
31 |
--------------------------------------------------------------------------------
/mlsql-cli/.repo/desc.template.plugin:
--------------------------------------------------------------------------------
1 | moduleName=mlsql-cli-{{spark_binary_version}}
2 | mainClass=tech.mlsql.plugins.cli.app.MLSQLCli
3 | scala_version={{scala_binary_version}}
4 | spark_version={{spark_binary_version}}
5 | version=0.1.0-SNAPSHOT
6 | author=allwefantasy
7 | mlsqlVersions=""
8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-cli
9 | mlsqlPluginType=app
10 | desc=mlsql-cli
11 |
12 |
13 |
14 |
15 |
16 |
17 |
--------------------------------------------------------------------------------
/mlsql-cli/desc.plugin:
--------------------------------------------------------------------------------
1 | moduleName=mlsql-cli-3.0
2 | mainClass=tech.mlsql.plugins.cli.app.MLSQLCli
3 | scala_version=2.12
4 | spark_version=3.0
5 | version=0.1.0-SNAPSHOT
6 | author=allwefantasy
7 | mlsqlVersions=""
8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-cli
9 | mlsqlPluginType=app
10 | desc=mlsql-cli
11 |
12 |
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/mlsql-cli/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | mlsql-plugins-3.0_2.12
7 | tech.mlsql
8 | 0.1.0-SNAPSHOT
9 |
10 | 4.0.0
11 |
12 | mlsql-cli-3.0_2.12
13 |
14 |
15 |
16 | shade
17 |
18 |
19 |
20 | org.apache.maven.plugins
21 | maven-shade-plugin
22 | 3.2.0
23 |
24 |
25 |
26 | *:*
27 |
28 | META-INF/*.SF
29 | META-INF/*.DSA
30 | META-INF/*.RSA
31 |
32 |
33 |
34 | false
35 |
36 |
37 | org.apache.poi
38 | shadeio.poi
39 |
40 |
41 | com.norbitltd.spoiwo
42 | shadeio.spoiwo
43 |
44 |
45 | com.github.pjfanning
46 | shadeio.pjfanning
47 |
48 |
49 | org.apache.commons.compress
50 | shadeio.commons.compress
51 |
52 |
53 |
54 |
55 |
56 |
57 | package
58 |
59 | shade
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 | info.picocli
72 | picocli
73 | 4.0.1
74 |
75 |
76 |
77 |
--------------------------------------------------------------------------------
/mlsql-cli/src/main/java/tech/mlsql/plugin/cli/app/CliCommands.java:
--------------------------------------------------------------------------------
1 | package tech.mlsql.plugin.cli.app;
2 |
3 | /**
4 | * 25/8/2021 WilliamZhu(allwefantasy@gmail.com)
5 | */
6 | public class CliCommands {
7 | public static final String DEFAULT = "default";
8 | public static final String HELP = "help";
9 | public static final String VERSION = "version";
10 | public static final String RUN = "run";
11 | public static final String HOME = "home";
12 | }
13 |
--------------------------------------------------------------------------------
/mlsql-cli/src/main/java/tech/mlsql/plugin/cli/app/CliException.java:
--------------------------------------------------------------------------------
1 | package tech.mlsql.plugin.cli.app;
2 |
3 | import java.util.ArrayList;
4 | import java.util.List;
5 |
6 | /**
7 | * 25/8/2021 WilliamZhu(allwefantasy@gmail.com)
8 | */
9 | public class CliException extends RuntimeException {
10 | private List detailedMessages = new ArrayList<>();
11 |
12 | public List getDetailedMessages() {
13 | return detailedMessages;
14 | }
15 |
16 | void addMessage(String message) {
17 | detailedMessages.add(message);
18 | }
19 |
20 | public List getMessages() {
21 | return detailedMessages;
22 | }
23 | }
24 |
25 |
26 |
--------------------------------------------------------------------------------
/mlsql-cli/src/main/java/tech/mlsql/plugin/cli/app/CliExceptionUtils.java:
--------------------------------------------------------------------------------
1 | package tech.mlsql.plugin.cli.app;
2 |
3 | /**
4 | * 25/8/2021 WilliamZhu(allwefantasy@gmail.com)
5 | */
6 | public class CliExceptionUtils {
7 | public static CliException createUsageExceptionWithHelp(String errorMsg) {
8 | CliException launcherException = new CliException();
9 | launcherException.addMessage("mlsql: " + errorMsg);
10 | launcherException.addMessage("Run 'mlsql help' for usage.");
11 | return launcherException;
12 | }
13 | }
14 |
--------------------------------------------------------------------------------
/mlsql-cli/src/main/java/tech/mlsql/plugin/cli/app/MLSQLCli.java:
--------------------------------------------------------------------------------
1 | package tech.mlsql.plugin.cli.app;
2 |
3 | import picocli.CommandLine;
4 | import tech.mlsql.core.version.MLSQLVersion;
5 | import tech.mlsql.core.version.VersionInfo;
6 |
7 | import java.io.PrintStream;
8 | import java.util.ArrayList;
9 | import java.util.List;
10 |
11 | /**
12 | * 25/8/2021 WilliamZhu(allwefantasy@gmail.com)
13 | */
14 | public class MLSQLCli {
15 |
16 | private static PrintStream errStream = System.err;
17 | private static PrintStream outStream = System.out;
18 |
19 | public static void main(String[] args) {
20 |
21 | }
22 |
23 | @CommandLine.Command(description = "Default Command.", name = "default")
24 | private static class DefaultCmd implements MLSQLCmd {
25 |
26 | @CommandLine.Option(names = {"--help", "-h", "?"}, hidden = true, description = "for more information")
27 | private boolean helpFlag;
28 |
29 | @CommandLine.Option(names = {"--version", "-v"}, hidden = true)
30 | private boolean versionFlag;
31 |
32 | @CommandLine.Parameters(arity = "0..1")
33 | private List argList = new ArrayList<>();
34 |
35 | @Override
36 | public void execute() {
37 | if (versionFlag) {
38 | printVersionInfo();
39 | return;
40 | }
41 |
42 | if (!argList.isEmpty()) {
43 | printUsageInfo(argList.get(0));
44 | return;
45 | }
46 |
47 | printUsageInfo(CliCommands.HELP);
48 | }
49 |
50 | @Override
51 | public String getName() {
52 | return "default";
53 | }
54 |
55 | @Override
56 | public void printLongDesc(StringBuilder out) {
57 |
58 | }
59 |
60 | @Override
61 | public void printUsage(StringBuilder out) {
62 |
63 | }
64 |
65 | @Override
66 | public void setParentCmdParser(CommandLine parentCmdParser) {
67 | }
68 | }
69 |
70 | private static void printUsageInfo(String commandName) {
71 | String usageInfo = MLSQLCmd.getCommandUsageInfo(commandName);
72 | errStream.println(usageInfo);
73 | }
74 |
75 | private static void printVersionInfo() {
76 | VersionInfo verison = MLSQLVersion.version();
77 | String output = "MLSQL: " + verison.version() + "; Spark Core: None";
78 | outStream.print(output);
79 | }
80 | }
81 |
82 |
83 |
--------------------------------------------------------------------------------
/mlsql-cli/src/main/java/tech/mlsql/plugin/cli/app/MLSQLCmd.java:
--------------------------------------------------------------------------------
1 | package tech.mlsql.plugin.cli.app;
2 |
3 | import picocli.CommandLine;
4 |
5 | import java.io.BufferedReader;
6 | import java.io.IOException;
7 | import java.io.InputStream;
8 | import java.io.InputStreamReader;
9 | import java.nio.charset.StandardCharsets;
10 |
11 | public interface MLSQLCmd {
12 |
13 |
14 | void execute();
15 |
16 |
17 | String getName();
18 |
19 |
20 | void printLongDesc(StringBuilder out);
21 |
22 |
23 | void printUsage(StringBuilder out);
24 |
25 |
26 | void setParentCmdParser(CommandLine parentCmdParser);
27 |
28 |
29 | static String getCommandUsageInfo(String commandName) {
30 | if (commandName == null) {
31 | throw CliExceptionUtils.createUsageExceptionWithHelp("invalid command");
32 | }
33 |
34 | String fileName = "cli-help/mlsql-" + commandName + ".help";
35 | try {
36 | return readFileAsString(fileName);
37 | } catch (IOException e) {
38 | throw CliExceptionUtils.createUsageExceptionWithHelp("usage info not available for command: " + commandName);
39 | }
40 | }
41 |
42 | static String readFileAsString(String path) throws IOException {
43 | InputStream is = ClassLoader.getSystemResourceAsStream(path);
44 | InputStreamReader inputStreamREader = null;
45 | BufferedReader br = null;
46 | StringBuilder sb = new StringBuilder();
47 | try {
48 | inputStreamREader = new InputStreamReader(is, StandardCharsets.UTF_8);
49 | br = new BufferedReader(inputStreamREader);
50 | String content = br.readLine();
51 | if (content == null) {
52 | return sb.toString();
53 | }
54 |
55 | sb.append(content);
56 |
57 | while ((content = br.readLine()) != null) {
58 | sb.append('\n').append(content);
59 | }
60 | } finally {
61 | if (inputStreamREader != null) {
62 | try {
63 | inputStreamREader.close();
64 | } catch (IOException ignore) {
65 | }
66 | }
67 | if (br != null) {
68 | try {
69 | br.close();
70 | } catch (IOException ignore) {
71 | }
72 | }
73 | }
74 | return sb.toString();
75 | }
76 | }
--------------------------------------------------------------------------------
/mlsql-cube/.repo/desc.template.plugin:
--------------------------------------------------------------------------------
1 | moduleName=mlsql-cube-{{spark_binary_version}}
2 | mainClass=tech.mlsql.plugins.shell.app.MLSQLCube
3 | scala_version={{scala_binary_version}}
4 | spark_version={{spark_binary_version}}
5 | version=0.1.0-SNAPSHOT
6 | author=allwefantasy
7 | mlsqlVersions=""
8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-cube
9 | mlsqlPluginType=app
10 | desc=mlsql-shell
11 |
12 |
13 |
14 |
15 |
16 |
17 |
--------------------------------------------------------------------------------
/mlsql-cube/.repo/pom.template.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | mlsql-plugins-{{spark_binary_version}}_{{scala_binary_version}}
7 | tech.mlsql
8 | 0.1.0-SNAPSHOT
9 |
10 | 4.0.0
11 |
12 | mlsql-cube-{{spark_binary_version}}_{{scala_binary_version}}
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 | shade
21 |
22 |
23 |
24 | org.apache.maven.plugins
25 | maven-shade-plugin
26 | 3.2.0
27 |
28 |
29 |
30 | *:*
31 |
32 | META-INF/*.SF
33 | META-INF/*.DSA
34 | META-INF/*.RSA
35 |
36 |
37 |
38 | false
39 |
40 |
41 | org.apache.poi
42 | shadeio.poi
43 |
44 |
45 | com.norbitltd.spoiwo
46 | shadeio.spoiwo
47 |
48 |
49 | com.github.pjfanning
50 | shadeio.pjfanning
51 |
52 |
53 | org.apache.commons.compress
54 | shadeio.commons.compress
55 |
56 |
57 |
58 |
59 |
60 |
61 | package
62 |
63 | shade
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
--------------------------------------------------------------------------------
/mlsql-cube/desc.plugin:
--------------------------------------------------------------------------------
1 | moduleName=mlsql-cube-3.0
2 | mainClass=tech.mlsql.plugins.shell.app.MLSQLCube
3 | scala_version=2.12
4 | spark_version=3.0
5 | version=0.1.0-SNAPSHOT
6 | author=allwefantasy
7 | mlsqlVersions=""
8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-cube
9 | mlsqlPluginType=app
10 | desc=mlsql-shell
11 |
12 |
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/mlsql-cube/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | mlsql-plugins-3.0_2.12
7 | tech.mlsql
8 | 0.1.0-SNAPSHOT
9 |
10 | 4.0.0
11 |
12 | mlsql-cube-3.0_2.12
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 | shade
21 |
22 |
23 |
24 | org.apache.maven.plugins
25 | maven-shade-plugin
26 | 3.2.0
27 |
28 |
29 |
30 | *:*
31 |
32 | META-INF/*.SF
33 | META-INF/*.DSA
34 | META-INF/*.RSA
35 |
36 |
37 |
38 | false
39 |
40 |
41 | org.apache.poi
42 | shadeio.poi
43 |
44 |
45 | com.norbitltd.spoiwo
46 | shadeio.spoiwo
47 |
48 |
49 | com.github.pjfanning
50 | shadeio.pjfanning
51 |
52 |
53 | org.apache.commons.compress
54 | shadeio.commons.compress
55 |
56 |
57 |
58 |
59 |
60 |
61 | package
62 |
63 | shade
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
--------------------------------------------------------------------------------
/mlsql-ds/.repo/desc.template.plugin:
--------------------------------------------------------------------------------
1 | moduleName=mlsql-ds-{{spark_binary_version}}
2 | mainClass=tech.mlsql.plugins.ds.app.MLSQLDs
3 | scala_version={{scala_binary_version}}
4 | spark_version={{spark_binary_version}}
5 | version=0.1.0-SNAPSHOT
6 | author=allwefantasy
7 | mlsqlVersions=""
8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-ds
9 | mlsqlPluginType=app
10 | desc=ds
11 |
12 |
13 |
14 |
15 |
16 |
17 |
--------------------------------------------------------------------------------
/mlsql-ds/desc.plugin:
--------------------------------------------------------------------------------
1 | moduleName=mlsql-ds-3.0
2 | mainClass=tech.mlsql.plugins.ds.app.MLSQLDs
3 | scala_version=2.12
4 | spark_version=3.0
5 | version=0.1.0-SNAPSHOT
6 | author=allwefantasy
7 | mlsqlVersions=""
8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-ds
9 | mlsqlPluginType=app
10 | desc=ds
11 |
12 |
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/mlsql-ds/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | mlsql-plugins-3.0_2.12
7 | tech.mlsql
8 | 0.1.0-SNAPSHOT
9 |
10 | 4.0.0
11 |
12 | mlsql-ds-3.0_2.12
13 |
14 |
15 | com.databricks
16 | spark-xml_2.12
17 | 0.13.0
18 |
19 |
20 |
21 |
22 |
23 |
24 | shade
25 |
26 |
27 |
28 | org.apache.maven.plugins
29 | maven-shade-plugin
30 | 3.2.0
31 |
32 |
33 |
34 | *:*
35 |
36 | META-INF/*.SF
37 | META-INF/*.DSA
38 | META-INF/*.RSA
39 |
40 |
41 |
42 | false
43 |
44 |
45 | org.apache.poi
46 | shadeio.poi
47 |
48 |
49 | com.norbitltd.spoiwo
50 | shadeio.spoiwo
51 |
52 |
53 | com.github.pjfanning
54 | shadeio.pjfanning
55 |
56 |
57 | org.apache.commons.compress
58 | shadeio.commons.compress
59 |
60 |
61 |
62 |
63 |
64 |
65 | package
66 |
67 | shade
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
--------------------------------------------------------------------------------
/mlsql-ds/src/main/java/tech/mlsql/plugins/ds/app/MLSQLDs.scala:
--------------------------------------------------------------------------------
1 | package tech.mlsql.plugins.ds.app
2 |
3 | import streaming.core.datasource.MLSQLRegistry
4 | import tech.mlsql.common.utils.classloader.ClassLoaderTool
5 | import tech.mlsql.common.utils.log.Logging
6 | import tech.mlsql.version.VersionCompatibility
7 |
8 | /**
9 | * 1/6/2021 WilliamZhu(allwefantasy@gmail.com)
10 | */
11 | class MLSQLDs extends tech.mlsql.app.App with VersionCompatibility with Logging {
12 | override def run(args: Seq[String]): Unit = {
13 | registerDS(classOf[MLSQLXml].getName)
14 | }
15 |
16 |
17 | def registerDS(name: String) = {
18 | val dataSource = ClassLoaderTool.classForName(name).newInstance()
19 | if (dataSource.isInstanceOf[MLSQLRegistry]) {
20 | dataSource.asInstanceOf[MLSQLRegistry].register()
21 | }
22 | }
23 |
24 | override def supportedVersions: Seq[String] = {
25 | MLSQLDs.versions
26 | }
27 | }
28 |
29 | object MLSQLDs {
30 | val versions = Seq(">=2.1.0")
31 | }
--------------------------------------------------------------------------------
/mlsql-ds/src/main/java/tech/mlsql/plugins/ds/app/MLSQLXml.scala:
--------------------------------------------------------------------------------
1 | package tech.mlsql.plugins.ds.app
2 |
3 | import org.apache.spark.sql.SparkSession
4 | import streaming.core.datasource._
5 | import streaming.dsl.ScriptSQLExec
6 | import streaming.dsl.mmlib.algs.param.{BaseParams, WowParams}
7 | import tech.mlsql.version.VersionCompatibility
8 |
9 | /**
10 | * 29/9/2021 WilliamZhu(allwefantasy@gmail.com)
11 | */
12 | class MLSQLXml(override val uid: String)
13 | extends MLSQLBaseFileSource
14 | with WowParams with VersionCompatibility {
15 | def this() = this(BaseParams.randomUID())
16 |
17 | override def sourceInfo(config: DataAuthConfig): SourceInfo = {
18 | val context = ScriptSQLExec.contextGetOrForTest()
19 | val owner = config.config.get("owner").getOrElse(context.owner)
20 | SourceInfo(shortFormat, "", resourceRealPath(context.execListener, Option(owner), config.path))
21 | }
22 |
23 | override def explainParams(spark: SparkSession) = {
24 | _explainParams(spark)
25 | }
26 |
27 | override def register(): Unit = {
28 | DataSourceRegistry.register(MLSQLDataSourceKey(fullFormat, MLSQLSparkDataSourceType), this)
29 | DataSourceRegistry.register(MLSQLDataSourceKey(shortFormat, MLSQLSparkDataSourceType), this)
30 | }
31 |
32 | override def fullFormat: String = "com.databricks.spark.xml"
33 |
34 | override def shortFormat: String = "xml"
35 |
36 | override def supportedVersions: Seq[String] = {
37 | MLSQLDs.versions
38 | }
39 | }
--------------------------------------------------------------------------------
/mlsql-excel/.repo/desc.template.plugin:
--------------------------------------------------------------------------------
1 | moduleName=mlsql-excel-{{spark_binary_version}}
2 | mainClass=tech.mlsql.plugins.ds.MLSQLExcel
3 | scala_version={{scala_binary_version}}
4 | spark_version={{spark_binary_version}}
5 | version=0.1.0-SNAPSHOT
6 | author=allwefantasy
7 | mlsqlVersions="1.5.0,1.5.0-SNAPSHOT,1.6.0,1.6.0-SNAPSHOT"
8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-excel
9 | mlsqlPluginType=ds
10 | desc=excel
11 |
12 |
13 |
14 |
15 |
16 |
17 |
--------------------------------------------------------------------------------
/mlsql-excel/README.md:
--------------------------------------------------------------------------------
1 | ## Install
2 |
3 | ```
4 | !plugin ds add - "mlsql-excel-2.4";
5 | ```
6 |
7 | or install as app:
8 |
9 | ```
10 | !plugin app add "tech.mlsql.plugins.ds.MLSQLApp" "mlsql-excel-2.4";
11 | ```
12 |
13 |
14 | ## Usage
15 |
16 | ```sql
17 | load excel.`/tmp/upload/example_en.xlsx`
18 | where useHeader="true" and
19 | maxRowsInMemory="100"
20 | and dataAddress="A1:C8"
21 | as data;
22 |
23 | select * from data as output;
24 | ```
25 |
26 |
27 |
28 |
29 |
30 |
--------------------------------------------------------------------------------
/mlsql-excel/desc.plugin:
--------------------------------------------------------------------------------
1 | moduleName=mlsql-excel-3.0
2 | mainClass=tech.mlsql.plugins.ds.MLSQLExcel
3 | scala_version=2.12
4 | spark_version=3.0
5 | version=0.1.0-SNAPSHOT
6 | author=allwefantasy
7 | mlsqlVersions="1.5.0,1.5.0-SNAPSHOT,1.6.0,1.6.0-SNAPSHOT"
8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-excel
9 | mlsqlPluginType=ds
10 | desc=excel
11 |
12 |
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/mlsql-excel/src/main/java/com/crealytics/spark/excel/DefaultSource.scala:
--------------------------------------------------------------------------------
1 | package com.crealytics.spark.excel
2 |
3 | import org.apache.hadoop.fs.Path
4 | import org.apache.spark.sql.sources._
5 | import org.apache.spark.sql.types.StructType
6 | import org.apache.spark.sql.{DataFrame, SQLContext, SaveMode}
7 |
8 | class DefaultSource extends RelationProvider with SchemaRelationProvider with CreatableRelationProvider {
9 |
10 | /** Creates a new relation for retrieving data from an Excel file
11 | */
12 | override def createRelation(sqlContext: SQLContext, parameters: Map[String, String]): ExcelRelation =
13 | createRelation(sqlContext, parameters, null)
14 |
15 | /** Creates a new relation for retrieving data from an Excel file
16 | */
17 | override def createRelation(
18 | sqlContext: SQLContext,
19 | parameters: Map[String, String],
20 | schema: StructType
21 | ): ExcelRelation = {
22 | val wbReader = WorkbookReader(parameters, sqlContext.sparkContext.hadoopConfiguration)
23 | val dataLocator = DataLocator(parameters)
24 | ExcelRelation(
25 | header = checkParameter(parameters, "header").toBoolean,
26 | treatEmptyValuesAsNulls = parameters.get("treatEmptyValuesAsNulls").fold(false)(_.toBoolean),
27 | usePlainNumberFormat = parameters.get("usePlainNumberFormat").fold(false)(_.toBoolean),
28 | userSchema = Option(schema),
29 | inferSheetSchema = parameters.get("inferSchema").fold(false)(_.toBoolean),
30 | addColorColumns = parameters.get("addColorColumns").fold(false)(_.toBoolean),
31 | timestampFormat = parameters.get("timestampFormat"),
32 | excerptSize = parameters.get("excerptSize").fold(10)(_.toInt),
33 | dataLocator = dataLocator,
34 | workbookReader = wbReader
35 | )(sqlContext)
36 | }
37 |
38 | override def createRelation(
39 | sqlContext: SQLContext,
40 | mode: SaveMode,
41 | parameters: Map[String, String],
42 | data: DataFrame
43 | ): BaseRelation = {
44 | val path = checkParameter(parameters, "path")
45 | val header = checkParameter(parameters, "header").toBoolean
46 | val filesystemPath = new Path(path)
47 | val fs = filesystemPath.getFileSystem(sqlContext.sparkContext.hadoopConfiguration)
48 | new ExcelFileSaver(
49 | fs,
50 | filesystemPath,
51 | data,
52 | saveMode = mode,
53 | header = header,
54 | dataLocator = DataLocator(parameters)
55 | ).save()
56 |
57 | createRelation(sqlContext, parameters, data.schema)
58 | }
59 |
60 | // Forces a Parameter to exist, otherwise an exception is thrown.
61 | private def checkParameter(map: Map[String, String], param: String): String = {
62 | if (!map.contains(param)) {
63 | throw new IllegalArgumentException(s"Parameter ${'"'}$param${'"'} is missing in options.")
64 | } else {
65 | map.apply(param)
66 | }
67 | }
68 | }
69 |
--------------------------------------------------------------------------------
/mlsql-excel/src/main/java/com/crealytics/spark/excel/DefaultSource15.scala:
--------------------------------------------------------------------------------
1 | package com.crealytics.spark.excel
2 |
3 | import org.apache.spark.sql.sources.DataSourceRegister
4 |
5 | class DefaultSource15 extends DefaultSource with DataSourceRegister {
6 | override def shortName(): String = "excel"
7 | }
8 |
--------------------------------------------------------------------------------
/mlsql-excel/src/main/java/com/crealytics/spark/excel/ExcelFileSaver.scala:
--------------------------------------------------------------------------------
1 | package com.crealytics.spark.excel
2 |
3 | import com.norbitltd.spoiwo.model._
4 | import com.norbitltd.spoiwo.natures.streaming.xlsx.Model2XlsxConversions._
5 | import org.apache.hadoop.fs.{FSDataInputStream, FileSystem, Path}
6 | import org.apache.poi.xssf.usermodel.XSSFWorkbook
7 | import org.apache.spark.sql.{DataFrame, SaveMode}
8 | import java.io.BufferedOutputStream
9 |
10 | import org.apache.poi.xssf.streaming.SXSSFWorkbook
11 |
12 | import scala.collection.JavaConverters._
13 |
14 | object ExcelFileSaver {
15 | final val DEFAULT_SHEET_NAME = "Sheet1"
16 | final val DEFAULT_DATE_FORMAT = "yy-m-d h:mm"
17 | final val DEFAULT_TIMESTAMP_FORMAT = "yyyy-mm-dd hh:mm:ss.000"
18 | }
19 |
20 | class ExcelFileSaver(
21 | fs: FileSystem,
22 | location: Path,
23 | dataFrame: DataFrame,
24 | saveMode: SaveMode,
25 | dataLocator: DataLocator,
26 | header: Boolean = true
27 | ) {
28 | def save(): Unit = {
29 | def sheet(workbook: SXSSFWorkbook) = {
30 | val headerRow = if (header) Some(dataFrame.schema.fields.map(_.name).toSeq) else None
31 | val dataRows = dataFrame
32 | .toLocalIterator()
33 | .asScala
34 | .map(_.toSeq)
35 | dataLocator.toSheet(headerRow, dataRows, workbook)
36 | }
37 | val fileAlreadyExists = fs.exists(location)
38 | def writeToWorkbook(workbook: SXSSFWorkbook): Unit = {
39 | Workbook(sheet(workbook)).writeToExisting(workbook)
40 | autoClose(new BufferedOutputStream(fs.create(location)))(workbook.write)
41 | }
42 | (fileAlreadyExists, saveMode) match {
43 | case (false, _) | (_, SaveMode.Overwrite) =>
44 | if (fileAlreadyExists) {
45 | fs.delete(location, true)
46 | }
47 | writeToWorkbook(new SXSSFWorkbook())
48 | case (true, SaveMode.ErrorIfExists) =>
49 | sys.error(s"path $location already exists.")
50 | case (true, SaveMode.Ignore) => ()
51 | case (true, SaveMode.Append) =>
52 | val inputStream: FSDataInputStream = fs.open(location)
53 | val workbook = new SXSSFWorkbook(new XSSFWorkbook(inputStream))
54 | inputStream.close()
55 | writeToWorkbook(workbook)
56 | }
57 | }
58 |
59 | def autoClose[A <: AutoCloseable, B](closeable: A)(fun: (A) => B): B = {
60 | try {
61 | fun(closeable)
62 | } finally {
63 | closeable.close()
64 | }
65 | }
66 | }
67 |
--------------------------------------------------------------------------------
/mlsql-excel/src/main/java/com/crealytics/spark/excel/PlainNumberFormat.scala:
--------------------------------------------------------------------------------
1 | package com.crealytics.spark.excel
2 |
3 | import java.math.BigDecimal
4 | import java.text.FieldPosition
5 | import java.text.Format
6 | import java.text.ParsePosition
7 |
8 | /** A format that formats a double as a plain string without rounding and scientific notation.
9 | * All other operations are unsupported.
10 | * @see [[org.apache.poi.ss.usermodel.ExcelGeneralNumberFormat]] and SSNFormat from
11 | * [[org.apache.poi.ss.usermodel.DataFormatter]] from Apache POI.
12 | */
13 | object PlainNumberFormat extends Format {
14 |
15 | override def format(number: AnyRef, toAppendTo: StringBuffer, pos: FieldPosition): StringBuffer =
16 | toAppendTo.append(new BigDecimal(number.toString).toPlainString)
17 |
18 | override def parseObject(source: String, pos: ParsePosition): AnyRef =
19 | throw new UnsupportedOperationException()
20 | }
21 |
--------------------------------------------------------------------------------
/mlsql-excel/src/main/java/com/crealytics/spark/excel/Utils.scala:
--------------------------------------------------------------------------------
1 | package com.crealytics.spark.excel
2 | import scala.util.{Success, Try}
3 |
4 | object Utils {
5 | implicit class RichTry[T](t: Try[T]) {
6 | def toEither: Either[Throwable, T] = t.transform(s => Success(Right(s)), f => Success(Left(f))).get
7 | }
8 |
9 | case class MapIncluding[K](keys: Seq[K], optionally: Seq[K] = Seq()) {
10 | def unapply[V](m: Map[K, V]): Option[(Seq[V], Seq[Option[V]])] =
11 | if (keys.forall(m.contains)) {
12 | Some((keys.map(m), optionally.map(m.get)))
13 | } else {
14 | None
15 | }
16 | }
17 | sealed trait MapRequirements[K] {
18 | type ResultType[V]
19 | def unapplySeq[V](m: Map[K, V]): Option[ResultType[V]]
20 | }
21 | case class RequiredKeys[K](keys: K*) extends MapRequirements[K] {
22 | type ResultType[V] = Seq[V]
23 | def unapplySeq[V](m: Map[K, V]): Option[Seq[V]] =
24 | if (keys.forall(m.contains)) {
25 | Some(keys.map(m))
26 | } else {
27 | None
28 | }
29 | }
30 | case class OptionalKeys[K](keys: K*) extends MapRequirements[K] {
31 | type ResultType[V] = Seq[Option[V]]
32 | def unapplySeq[V](m: Map[K, V]): Option[Seq[Option[V]]] = Some(keys.map(m.get))
33 | }
34 | case class MapWith[K](
35 | requiredKeys: RequiredKeys[K] = RequiredKeys[K](),
36 | optionalKeys: OptionalKeys[K] = OptionalKeys[K]()
37 | ) {
38 | def unapply[V](m: Map[K, V]): Option[(requiredKeys.ResultType[V], optionalKeys.ResultType[V])] =
39 | for {
40 | req <- requiredKeys.unapplySeq(m)
41 | opt <- optionalKeys.unapplySeq(m)
42 | } yield (req, opt)
43 | }
44 | }
45 |
--------------------------------------------------------------------------------
/mlsql-excel/src/main/java/com/crealytics/spark/excel/WorkbookReader.scala:
--------------------------------------------------------------------------------
1 | package com.crealytics.spark.excel
2 |
3 | import java.io.InputStream
4 |
5 | import com.crealytics.spark.excel.Utils.MapIncluding
6 | import com.github.pjfanning.xlsx.StreamingReader
7 | import org.apache.hadoop.conf.Configuration
8 | import org.apache.hadoop.fs.{FileSystem, Path}
9 | import org.apache.poi.ss.usermodel.{Workbook, WorkbookFactory}
10 |
11 | trait WorkbookReader {
12 | protected def openWorkbook(): Workbook
13 | def withWorkbook[T](f: Workbook => T): T = {
14 | val workbook = openWorkbook()
15 | val res = f(workbook)
16 | workbook.close()
17 | res
18 | }
19 | def sheetNames: Seq[String] = {
20 | withWorkbook(workbook =>
21 | for (sheetIx <- (0 until workbook.getNumberOfSheets())) yield {
22 | workbook.getSheetAt(sheetIx).getSheetName()
23 | }
24 | )
25 | }
26 | }
27 |
28 | object WorkbookReader {
29 | val WithLocationMaxRowsInMemoryAndPassword =
30 | MapIncluding(Seq("path"), optionally = Seq("maxRowsInMemory", "workbookPassword"))
31 |
32 | def apply(parameters: Map[String, String], hadoopConfiguration: Configuration): WorkbookReader = {
33 | def readFromHadoop(location: String) = {
34 | val path = new Path(location)
35 | FileSystem.get(path.toUri, hadoopConfiguration).open(path)
36 | }
37 | parameters match {
38 | case WithLocationMaxRowsInMemoryAndPassword(Seq(location), Seq(Some(maxRowsInMemory), passwordOption)) =>
39 | new StreamingWorkbookReader(readFromHadoop(location), passwordOption, maxRowsInMemory.toInt)
40 | case WithLocationMaxRowsInMemoryAndPassword(Seq(location), Seq(None, passwordOption)) =>
41 | new DefaultWorkbookReader(readFromHadoop(location), passwordOption)
42 | }
43 | }
44 | }
45 | class DefaultWorkbookReader(inputStreamProvider: => InputStream, workbookPassword: Option[String])
46 | extends WorkbookReader {
47 | protected def openWorkbook(): Workbook =
48 | workbookPassword
49 | .fold(WorkbookFactory.create(inputStreamProvider))(password =>
50 | WorkbookFactory.create(inputStreamProvider, password)
51 | )
52 | }
53 |
54 | class StreamingWorkbookReader(inputStreamProvider: => InputStream, workbookPassword: Option[String], maxRowsInMem: Int)
55 | extends WorkbookReader {
56 | override protected def openWorkbook(): Workbook = {
57 | val builder = StreamingReader
58 | .builder()
59 | .rowCacheSize(maxRowsInMem)
60 | .bufferSize(4096)
61 | workbookPassword
62 | .fold(builder)(password => builder.password(password))
63 | .open(inputStreamProvider)
64 | }
65 | }
66 |
--------------------------------------------------------------------------------
/mlsql-ext-ets/.repo/desc.template.plugin:
--------------------------------------------------------------------------------
1 | moduleName=mlsql-ext-ets-{{spark_binary_version}}
2 | mainClass=tech.mlsql.plugins.ext.ets.app.MLSQLETApp
3 | scala_version={{scala_binary_version}}
4 | spark_version={{spark_binary_version}}
5 | version=0.1.0-SNAPSHOT
6 | author=allwefantasy
7 | mlsqlVersions=""
8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-ext-ets
9 | mlsqlPluginType=app
10 | desc=mlsql-ext-ets
11 |
12 |
13 |
14 |
15 |
16 |
17 |
--------------------------------------------------------------------------------
/mlsql-ext-ets/desc.plugin:
--------------------------------------------------------------------------------
1 | moduleName=mlsql-ext-ets-3.0
2 | mainClass=tech.mlsql.plugins.ext.ets.app.MLSQLETApp
3 | scala_version=2.12
4 | spark_version=3.0
5 | version=0.1.0-SNAPSHOT
6 | author=allwefantasy
7 | mlsqlVersions=""
8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-ext-ets
9 | mlsqlPluginType=app
10 | desc=mlsql-ext-ets
11 |
12 |
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/mlsql-ext-ets/src/main/java/tech/mlsql/plugins/ext/ets/app/MLSQLETApp.scala:
--------------------------------------------------------------------------------
1 | package tech.mlsql.plugins.ext.ets.app
2 |
3 | import tech.mlsql.common.utils.log.Logging
4 | import tech.mlsql.version.VersionCompatibility
5 |
6 | /**
7 | * 31/5/2021 WilliamZhu(allwefantasy@gmail.com)
8 | */
9 | class MLSQLETApp extends tech.mlsql.app.App with VersionCompatibility with Logging {
10 | override def run(args: Seq[String]): Unit = {
11 |
12 | }
13 |
14 |
15 | override def supportedVersions: Seq[String] = {
16 | MLSQLETApp.versions
17 | }
18 | }
19 |
20 | object MLSQLETApp {
21 | val versions = Seq("2.1.0", "2.1.0-SNAPSHOT", "2.0.0", "2.0.1")
22 | }
--------------------------------------------------------------------------------
/mlsql-ke/.repo/desc.template.plugin:
--------------------------------------------------------------------------------
1 | moduleName=mlsql-ke-{{spark_binary_version}}
2 | mainClass=tech.mlsql.plugins.ke.app.MLSQLKE
3 | scala_version={{scala_binary_version}}
4 | spark_version={{spark_binary_version}}
5 | version=0.1.0-SNAPSHOT
6 | author=allwefantasy
7 | mlsqlVersions=""
8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-ke
9 | mlsqlPluginType=app
10 | desc=mlsql-ke
11 |
12 |
13 |
14 |
15 |
16 |
17 |
--------------------------------------------------------------------------------
/mlsql-ke/desc.plugin:
--------------------------------------------------------------------------------
1 | moduleName=mlsql-ke-3.0
2 | mainClass=tech.mlsql.plugins.ke.app.MLSQLKE
3 | scala_version=2.12
4 | spark_version=3.0
5 | version=0.1.0-SNAPSHOT
6 | author=allwefantasy
7 | mlsqlVersions=""
8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-ke
9 | mlsqlPluginType=app
10 | desc=mlsql-ke
11 |
12 |
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/mlsql-ke/src/main/java/tech/mlsql/plugins/ke/app/MLSQLKE.scala:
--------------------------------------------------------------------------------
1 | package tech.mlsql.plugins.ke.app
2 |
3 | import tech.mlsql.common.utils.log.Logging
4 | import tech.mlsql.ets.register.ETRegister
5 | import tech.mlsql.plugins.ke.ets.{KEAPISchedule, KEAutoModel, KEBuildSegment}
6 | import tech.mlsql.version.VersionCompatibility
7 |
8 | /**
9 | * 2/6/2021 WilliamZhu(allwefantasy@gmail.com)
10 | */
11 | class MLSQLKE extends tech.mlsql.app.App with VersionCompatibility with Logging {
12 | override def run(args: Seq[String]): Unit = {
13 | ETRegister.register("KeApi", classOf[KEAPISchedule].getName)
14 | ETRegister.register("KeAutoModel", classOf[KEAutoModel].getName)
15 | ETRegister.register("KeBuildSegment", classOf[KEBuildSegment].getName)
16 | }
17 |
18 |
19 | override def supportedVersions: Seq[String] = {
20 | MLSQLKE.versions
21 | }
22 | }
23 |
24 | object MLSQLKE {
25 | val versions = Seq("2.1.0", "2.1.0-SNAPSHOT", "2.0.0", "2.0.1")
26 | }
27 |
--------------------------------------------------------------------------------
/mlsql-ke/src/main/java/tech/mlsql/plugins/ke/ets/KEBuildSegment.scala:
--------------------------------------------------------------------------------
1 | package tech.mlsql.plugins.ke.ets
2 |
3 | import com.alibaba.fastjson.{JSON, JSONObject}
4 | import org.apache.spark.ml.util.Identifiable
5 | import org.apache.spark.sql.DataFrame
6 | import streaming.dsl.{ConnectMeta, DBMappingKey}
7 | import streaming.dsl.mmlib.algs.param.WowParams
8 | import tech.mlsql.common.utils.log.Logging
9 |
10 | class KEBuildSegment(override val uid: String) extends KEAPISchedule with WowParams with Logging {
11 |
12 | def this() = this(Identifiable.randomUID("tech.mlsql.plugins.ets.BuildSegment"))
13 |
14 | override def train(df: DataFrame, path: String, params: Map[String, String]): DataFrame = {
15 | val jsonObj = new JSONObject
16 | val split = path.split("\\.")
17 | val connectName = split(0)
18 | jsonObj.put("project", split(1))
19 | if (params.contains("start")) {
20 | jsonObj.put("start", params("start"))
21 | }
22 | if (params.contains("end")) {
23 | jsonObj.put("end", params("end"))
24 | }
25 | if (params.contains("sub_partition_values")) {
26 | jsonObj.put("sub_partition_values", JSON.parseArray(params("sub_partition_values")))
27 | }
28 | if (params.contains("build_all_indexes")) {
29 | jsonObj.put("build_all_indexes", params("build_all_indexes").toBoolean)
30 | }
31 | if (params.contains("build_all_sub_partitions")) {
32 | jsonObj.put("build_all_sub_partitions", params("build_all_sub_partitions").toBoolean)
33 | }
34 | if (params.contains("priority")) {
35 | jsonObj.put("priority", params("priority").toInt)
36 | }
37 | var url = new String
38 | ConnectMeta.presentThenCall(DBMappingKey("ke", connectName), options => {
39 | url = "http://" + options("host") + ":" + options("port") + "/kylin/api/models/" + params("model") + "/segments"
40 | })
41 | sendPostAPI(df, params, jsonObj, url, connectName)
42 | }
43 | }
44 |
45 |
--------------------------------------------------------------------------------
/mlsql-language-server/.repo/desc.template.plugin:
--------------------------------------------------------------------------------
1 | moduleName=mlsql-language-server-{{spark_binary_version}}
2 | mainClass=tech.mlsql.plugins.ke.app.MLSQLLanguageServer
3 | scala_version={{scala_binary_version}}
4 | spark_version={{spark_binary_version}}
5 | version=0.1.0-SNAPSHOT
6 | author=allwefantasy
7 | mlsqlVersions=""
8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-language-server
9 | mlsqlPluginType=app
10 | desc=mlsql-language-server
11 |
12 |
13 |
14 |
15 |
16 |
17 |
--------------------------------------------------------------------------------
/mlsql-language-server/build.sh:
--------------------------------------------------------------------------------
1 | SOURCE=/Users/allwefantasy/Volumes/Samsung_T5/allwefantasy/CSDNWorkSpace/mlsqlplugins/mlsql-language-server/build/
2 | TARGET=/Users/allwefantasy/projects/mlsql/src/mlsql-lang/mlsql-app_2.4-2.1.0-SNAPSHOT/plugin
3 | #conda activate mlsql-plugin-tool
4 | mlsql_plugin_tool build --module_name mlsql-language-server --spark spark243
5 | scp ${SOURCE}/mlsql-language-server-2.4_2.11-0.1.0-SNAPSHOT.jar ${TARGET}/
6 |
--------------------------------------------------------------------------------
/mlsql-language-server/desc.plugin:
--------------------------------------------------------------------------------
1 | moduleName=mlsql-language-server-3.0
2 | mainClass=tech.mlsql.plugins.ke.app.MLSQLLanguageServer
3 | scala_version=2.12
4 | spark_version=3.0
5 | version=0.1.0-SNAPSHOT
6 | author=allwefantasy
7 | mlsqlVersions=""
8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-language-server
9 | mlsqlPluginType=app
10 | desc=mlsql-language-server
11 |
12 |
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/mlsql-language-server/src/main/java/tech/mlsql/plugins/langserver/AutoSuggestWrapper.scala:
--------------------------------------------------------------------------------
1 | package tech.mlsql.plugins.langserver
2 |
3 | import net.csdn.common.exception.RenderFinish
4 | import net.csdn.common.jline.ANSI.Renderer.RenderException
5 | import net.csdn.modules.http.DefaultRestRequest
6 | import net.csdn.modules.mock.MockRestResponse
7 | import streaming.rest.RestController
8 | import tech.mlsql.autosuggest.statement.SuggestItem
9 | import tech.mlsql.common.utils.log.Logging
10 | import tech.mlsql.common.utils.serder.json.JSONTool
11 |
12 | import scala.collection.JavaConverters._
13 |
14 | /**
15 | * 1/9/2021 WilliamZhu(allwefantasy@gmail.com)
16 | */
17 | class AutoSuggestWrapper(params: java.util.Map[String, String]) extends Logging {
18 | def run() = {
19 | try {
20 | params.put("executeMode", "autoSuggest")
21 | logInfo(JSONTool.toJsonStr(params.asScala.toMap))
22 |
23 | val restRequest = new DefaultRestRequest("POST", params)
24 | val restReponse = new MockRestResponse()
25 | val controller = new RestController()
26 | net.csdn.modules.http.RestController.enhanceApplicationController(controller, restRequest, restReponse)
27 | try {
28 | controller.script
29 | } catch {
30 | case _: RenderFinish =>
31 | }
32 | val jsonStr = restReponse.content()
33 | JSONTool.parseJson[List[SuggestItem]](jsonStr).asJava
34 | } catch {
35 | case e: Exception =>
36 | logInfo("Suggest fail", e)
37 | List[SuggestItem]().asJava
38 | }
39 |
40 |
41 | }
42 | }
43 |
--------------------------------------------------------------------------------
/mlsql-language-server/src/main/java/tech/mlsql/plugins/langserver/LSContext.java:
--------------------------------------------------------------------------------
1 | package tech.mlsql.plugins.langserver;
2 |
3 | import net.sf.json.JSONObject;
4 |
5 | import java.util.HashMap;
6 | import java.util.Map;
7 |
8 | /**
9 | * 2/9/2021 WilliamZhu(allwefantasy@gmail.com)
10 | */
11 | public class LSContext {
12 | final public static Map initParams = new HashMap<>();
13 |
14 | public static void parse(String jsonStr) {
15 | JSONObject obj = JSONObject.fromObject(jsonStr);
16 | for (Object key : obj.keySet()) {
17 | initParams.put(key.toString(), obj.getString(key.toString()));
18 | }
19 | }
20 | }
21 |
--------------------------------------------------------------------------------
/mlsql-language-server/src/main/java/tech/mlsql/plugins/langserver/MLSQLWorkspaceService.java:
--------------------------------------------------------------------------------
1 | package tech.mlsql.plugins.langserver;
2 |
3 | import org.eclipse.lsp4j.DidChangeConfigurationParams;
4 | import org.eclipse.lsp4j.DidChangeWatchedFilesParams;
5 | import org.eclipse.lsp4j.services.WorkspaceService;
6 |
7 | /**
8 | * 25/8/2021 WilliamZhu(allwefantasy@gmail.com)
9 | */
10 | public class MLSQLWorkspaceService implements WorkspaceService {
11 | @Override
12 | public void didChangeConfiguration(DidChangeConfigurationParams params) {
13 |
14 | }
15 |
16 | @Override
17 | public void didChangeWatchedFiles(DidChangeWatchedFilesParams params) {
18 |
19 | }
20 | }
21 |
--------------------------------------------------------------------------------
/mlsql-language-server/src/main/java/tech/mlsql/plugins/langserver/commons/client/Message.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018, WSO2 Inc. (http://wso2.com) All Rights Reserved.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package tech.mlsql.plugins.langserver.commons.client;
17 | /**
18 | * {@link Message} Parsed log message sent to client.
19 | *
20 | */
21 | public class Message {
22 | private String id;
23 | private String direction;
24 | private String headers;
25 | private String httpMethod;
26 | private String path;
27 | private String contentType;
28 | private String payload;
29 | private String headerType;
30 |
31 | public Message(String id, String direction, String headers, String httpMethod, String path, String contentType,
32 | String payload, String headerType) {
33 | this.id = id;
34 | this.direction = direction;
35 | this.headers = headers;
36 | this.httpMethod = httpMethod;
37 | this.path = path;
38 | this.contentType = contentType;
39 | this.payload = payload;
40 | this.headerType = headerType;
41 | }
42 |
43 | public void setId(String id) {
44 | this.id = id;
45 | }
46 |
47 | public String getId() {
48 | return id;
49 | }
50 |
51 | public String getDirection() {
52 | return direction;
53 | }
54 |
55 | public String getHeaders() {
56 | return headers;
57 | }
58 |
59 | public String getHttpMethod() {
60 | return httpMethod;
61 | }
62 |
63 | public String getPath() {
64 | return path;
65 | }
66 |
67 | public String getContentType() {
68 | return contentType;
69 | }
70 |
71 | public String getPayload() {
72 | return payload;
73 | }
74 |
75 | public String getHeaderType() {
76 | return headerType;
77 | }
78 | }
79 |
80 |
--------------------------------------------------------------------------------
/mlsql-language-server/src/main/java/tech/mlsql/plugins/langserver/commons/client/TraceRecord.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018, WSO2 Inc. (http://wso2.com) All Rights Reserved.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package tech.mlsql.plugins.langserver.commons.client;
17 |
18 | import com.google.gson.JsonObject;
19 |
20 | import java.util.UUID;
21 |
22 | /**
23 | * Model class for trace log.
24 | */
25 | public class TraceRecord {
26 | private Message message;
27 | private String rawMessage;
28 | private String id;
29 | private String millis;
30 | private String sequence;
31 | private String logger;
32 | private String sourceClass;
33 | private String sourceMethod;
34 | private String thread;
35 |
36 | public TraceRecord(Message message, JsonObject record, String rawMessage) {
37 | this.message = message;
38 | this.rawMessage = rawMessage;
39 | this.id = UUID.randomUUID().toString();
40 | this.millis = record.get("millis").getAsString();
41 | this.sequence = record.get("sequenceNumber").getAsString();
42 | this.logger = record.get("loggerName").getAsString();
43 | this.sourceClass = record.get("sourceClassName").getAsString();
44 | this.sourceMethod = record.get("sourceMethodName").getAsString();
45 | this.thread = record.get("threadID").getAsString();
46 | }
47 |
48 | public String getRawMessage() {
49 | return rawMessage;
50 | }
51 |
52 | public String getLogger() {
53 | return logger;
54 | }
55 |
56 | public String getSequence() {
57 | return sequence;
58 | }
59 |
60 | public String getThread() {
61 | return thread;
62 | }
63 |
64 | public String getMillis() {
65 | return millis;
66 | }
67 |
68 | public String getSourceMethod() {
69 | return sourceMethod;
70 | }
71 |
72 | public String getSourceClass() {
73 | return sourceClass;
74 | }
75 |
76 | public Message getMessage() {
77 | return message;
78 | }
79 |
80 | public String getId() {
81 | return id;
82 | }
83 | }
84 |
--------------------------------------------------------------------------------
/mlsql-language-server/src/main/java/tech/mlsql/plugins/langserver/launchers/stdio/Launcher.java:
--------------------------------------------------------------------------------
1 | package tech.mlsql.plugins.langserver.launchers.stdio;
2 |
3 | import org.eclipse.lsp4j.services.LanguageClient;
4 | import tech.mlsql.plugins.langserver.MLSQLLanguageServer;
5 |
6 | import java.io.IOException;
7 | import java.io.InputStream;
8 | import java.io.PrintWriter;
9 | import java.util.concurrent.ExecutionException;
10 |
11 |
12 | /**
13 | * 25/8/2021 WilliamZhu(allwefantasy@gmail.com)
14 | */
15 | public class Launcher {
16 | public static void main(String[] args) throws InterruptedException, ExecutionException {
17 |
18 | MLSQLLanguageServer server = new MLSQLLanguageServer();
19 |
20 | boolean lspInspectorTrace = false;
21 |
22 |
23 | org.eclipse.lsp4j.jsonrpc.Launcher launcher = null;
24 |
25 | if (lspInspectorTrace) {
26 | launcher = org.eclipse.lsp4j.jsonrpc.Launcher.createLauncher(server, LanguageClient.class, exitOnClose(System.in), System.out,
27 | true, new PrintWriter(System.err));
28 | } else {
29 | launcher = org.eclipse.lsp4j.jsonrpc.Launcher.createLauncher(server, LanguageClient.class, System.in, System.out);
30 | }
31 |
32 |
33 | LanguageClient client = launcher.getRemoteProxy();
34 | server.connect(client);
35 | launcher.startListening().get();
36 |
37 | }
38 |
39 | private static InputStream exitOnClose(InputStream delegate) {
40 | return new InputStream() {
41 | @Override
42 | public int read() throws IOException {
43 | return exitIfNegative(delegate.read());
44 | }
45 |
46 | int exitIfNegative(int result) {
47 | if (result < 0) {
48 | System.err.println("Input stream has closed. Exiting...");
49 | System.exit(0);
50 | }
51 | return result;
52 | }
53 | };
54 | }
55 | }
56 |
--------------------------------------------------------------------------------
/mlsql-language-server/src/main/java/tech/mlsql/plugins/langserver/launchers/stdio/MLSQLDesktopApp.scala:
--------------------------------------------------------------------------------
1 | package tech.mlsql.plugins.langserver.launchers.stdio
2 |
3 | import streaming.core.StreamingApp
4 | import tech.mlsql.common.utils.path.PathFun
5 |
6 | import scala.collection.mutable.ArrayBuffer
7 |
8 | /**
9 | * 26/8/2021 WilliamZhu(allwefantasy@gmail.com)
10 | */
11 | object MLSQLDesktopApp {
12 | def main(args: Array[String]): Unit = {
13 | val defaultMap = arrayToMap(Array(
14 | "-streaming.master", "local[*]",
15 | "-streaming.name", "MLSQL-desktop",
16 | "-streaming.rest", "true",
17 | "-streaming.thrift", "false",
18 | "-streaming.platform", "spark",
19 | "-streaming.spark.service", "true",
20 | "-streaming.job.cancel", "true",
21 | "-streaming.datalake.path", PathFun.joinPath(".","data"),
22 | "-streaming.driver.port", "9003",
23 | "-streaming.plugin.clzznames", "tech.mlsql.plugins.ds.MLSQLExcelApp,tech.mlsql.plugins.shell.app.MLSQLShell,tech.mlsql.plugins.assert.app.MLSQLAssert"
24 | ))
25 | val extraMap = arrayToMap(args)
26 | StreamingApp.main( mapToArray(defaultMap ++ extraMap))
27 | }
28 |
29 | def arrayToMap(args: Array[String]): Map[String, String] = {
30 | val res = scala.collection.mutable.HashMap[String, String]()
31 | var i = 0;
32 | while (i < args.length) {
33 | res += (args(i) -> args(i + 1))
34 | i += 2
35 | }
36 | res.toMap
37 | }
38 |
39 | def mapToArray(args:Map[String,String]):Array[String] = {
40 | args.flatMap{item=>
41 | val (key,value) = item
42 | Array(key,value)
43 | } .toArray
44 | }
45 | }
46 |
47 | class MLSQLDesktopApp
48 |
--------------------------------------------------------------------------------
/mlsql-mllib/.repo/desc.template.plugin:
--------------------------------------------------------------------------------
1 | moduleName=mlsql-mllib-{{spark_binary_version}}
2 | mainClass=tech.mlsql.plugins.mllib.app.MLSQLMllib
3 | scala_version={{scala_binary_version}}
4 | spark_version={{spark_binary_version}}
5 | version=0.1.0-SNAPSHOT
6 | author=allwefantasy
7 | mlsqlVersions=""
8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-mllib
9 | mlsqlPluginType=app
10 | desc=mllib
11 |
12 |
13 |
14 |
15 |
16 |
17 |
--------------------------------------------------------------------------------
/mlsql-mllib/.repo/pom.template.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | mlsql-plugins-{{spark_binary_version}}_{{scala_binary_version}}
7 | tech.mlsql
8 | 0.1.0-SNAPSHOT
9 |
10 | 4.0.0
11 |
12 | mlsql-mllib-{{spark_binary_version}}_{{scala_binary_version}}
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 | shade
21 |
22 |
23 |
24 | org.apache.maven.plugins
25 | maven-shade-plugin
26 | 3.2.0
27 |
28 |
29 |
30 | *:*
31 |
32 | META-INF/*.SF
33 | META-INF/*.DSA
34 | META-INF/*.RSA
35 |
36 |
37 |
38 | false
39 |
40 |
41 | org.apache.poi
42 | shadeio.poi
43 |
44 |
45 | com.norbitltd.spoiwo
46 | shadeio.spoiwo
47 |
48 |
49 | com.github.pjfanning
50 | shadeio.pjfanning
51 |
52 |
53 | org.apache.commons.compress
54 | shadeio.commons.compress
55 |
56 |
57 |
58 |
59 |
60 |
61 | package
62 |
63 | shade
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
--------------------------------------------------------------------------------
/mlsql-mllib/README.md:
--------------------------------------------------------------------------------
1 | # mlsql-mmlib
2 |
3 | This plugin provide ET wrapper for spark-mllib.
4 |
5 | ## Install from store
6 |
7 | Execute following command in web console:
8 |
9 | ```
10 | !plugin app add "tech.mlsql.plugins.mllib.app.MLSQLMllib" "mlsql-mllib-2.4";
11 | ```
12 |
13 | Check installation:
14 |
15 | ```
16 | !show et/ClassificationEvaluator;
17 | !show etc/RegressionEvaluator;
18 | ```
19 |
20 | ## Install Manually
21 |
22 | Firstly, build shade jar in your terminal:
23 |
24 | ```shell
25 | pip install mlsql_plugin_tool
26 | mlsql_plugin_tool build --module_name mlsql-mllib --spark spark243
27 | ```
28 |
29 | then change start script of MLSQL Engine,
30 |
31 | Add Jar:
32 |
33 | ```
34 | --jars YOUR_JAR_PATH
35 | ```
36 |
37 | Register Class:
38 |
39 | ```
40 | -streaming.plugin.clzznames tech.mlsql.plugins.mllib.app.MLSQLMllib
41 | ```
42 |
43 | If there are more than one class, use comma to seperate them. For example:
44 |
45 | ```
46 | -streaming.plugin.clzznames classA,classB,classC
47 | ```
48 |
49 | ## Usage
50 |
51 | Classification:
52 |
53 | ```sql
54 | predict data as RandomForest.`/tmp/model` as predicted_table;
55 | run predicted_table as ClassificationEvaluator.``;
56 | ```
57 |
58 | Regression:
59 |
60 | ```sql
61 | predict data as LinearRegressionExt.`/tmp/model` as predicted_table;
62 | run predicted_table as RegressionEvaluator.``;
63 | ```
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
--------------------------------------------------------------------------------
/mlsql-mllib/desc.plugin:
--------------------------------------------------------------------------------
1 | moduleName=mlsql-mllib-3.0
2 | mainClass=tech.mlsql.plugins.mllib.app.MLSQLMllib
3 | scala_version=2.12
4 | spark_version=3.0
5 | version=0.1.0-SNAPSHOT
6 | author=allwefantasy
7 | mlsqlVersions=""
8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-mllib
9 | mlsqlPluginType=app
10 | desc=mllib
11 |
12 |
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/mlsql-mllib/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | mlsql-plugins-3.0_2.12
7 | tech.mlsql
8 | 0.1.0-SNAPSHOT
9 |
10 | 4.0.0
11 |
12 | mlsql-mllib-3.0_2.12
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 | shade
21 |
22 |
23 |
24 | org.apache.maven.plugins
25 | maven-shade-plugin
26 | 3.2.0
27 |
28 |
29 |
30 | *:*
31 |
32 | META-INF/*.SF
33 | META-INF/*.DSA
34 | META-INF/*.RSA
35 |
36 |
37 |
38 | false
39 |
40 |
41 | org.apache.poi
42 | shadeio.poi
43 |
44 |
45 | com.norbitltd.spoiwo
46 | shadeio.spoiwo
47 |
48 |
49 | com.github.pjfanning
50 | shadeio.pjfanning
51 |
52 |
53 | org.apache.commons.compress
54 | shadeio.commons.compress
55 |
56 |
57 |
58 |
59 |
60 |
61 | package
62 |
63 | shade
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
--------------------------------------------------------------------------------
/mlsql-mllib/src/main/java/tech/mlsql/plugins/mllib/app/MLSQLMllib.scala:
--------------------------------------------------------------------------------
1 | package tech.mlsql.plugins.mllib.app
2 |
3 | import tech.mlsql.common.utils.log.Logging
4 | import tech.mlsql.dsl.CommandCollection
5 | import tech.mlsql.ets.register.ETRegister
6 | import tech.mlsql.plugins.mllib.ets._
7 | import tech.mlsql.version.VersionCompatibility
8 |
9 | /**
10 | * 31/5/2021 WilliamZhu(allwefantasy@gmail.com)
11 | */
12 | class MLSQLMllib extends tech.mlsql.app.App with VersionCompatibility with Logging {
13 | override def run(args: Seq[String]): Unit = {
14 | ETRegister.register("ClassificationEvaluator", classOf[ClassificationEvaluator].getName)
15 | ETRegister.register("RegressionEvaluator", classOf[RegressionEvaluator].getName)
16 | ETRegister.register("AutoMLExt", classOf[AutoMLExt].getName)
17 | ETRegister.register("SampleDatasetExt", classOf[SampleDatasetExt].getName)
18 | ETRegister.register("TakeRandomSampleExt", classOf[TakeRandomSampleExt].getName)
19 | ETRegister.register("ColumnsExt", classOf[ColumnsExt].getName)
20 |
21 | // !columns drop fields from tableName;
22 | CommandCollection.refreshCommandMapping(Map("columns" ->
23 | """
24 | |run {3} as ColumnsExt.`` where action="{0}" and fields="{1}"
25 | |""".stripMargin))
26 |
27 | }
28 |
29 |
30 | override def supportedVersions: Seq[String] = {
31 | MLSQLMllib.versions
32 | }
33 | }
34 |
35 | object MLSQLMllib {
36 | val versions = Seq(">=2.0.0", "2.1.0", "2.1.0-SNAPSHOT", "2.0.0", "2.0.1")
37 | }
--------------------------------------------------------------------------------
/mlsql-mllib/src/main/java/tech/mlsql/plugins/mllib/ets/ClassificationEvaluator.scala:
--------------------------------------------------------------------------------
1 | package tech.mlsql.plugins.mllib.ets
2 |
3 | import org.apache.spark.ml.param.Param
4 | import org.apache.spark.sql.expressions.UserDefinedFunction
5 | import org.apache.spark.sql.{DataFrame, SparkSession}
6 | import streaming.dsl.auth.TableAuthResult
7 | import streaming.dsl.mmlib._
8 | import streaming.dsl.mmlib.algs.classfication.BaseClassification
9 | import streaming.dsl.mmlib.algs.param.{BaseParams, WowParams}
10 | import streaming.dsl.mmlib.algs.{CodeExampleText, Functions, MetricValue}
11 | import tech.mlsql.dsl.auth.ETAuth
12 | import tech.mlsql.dsl.auth.dsl.mmlib.ETMethod.ETMethod
13 | import tech.mlsql.plugins.mllib.app.MLSQLMllib
14 | import tech.mlsql.version.VersionCompatibility
15 |
16 | /**
17 | * 31/5/2021 WilliamZhu(allwefantasy@gmail.com)
18 | */
19 | class ClassificationEvaluator(override val uid: String) extends SQLAlg
20 | with VersionCompatibility with Functions with WowParams with ETAuth with BaseClassification {
21 | def this() = this(BaseParams.randomUID())
22 |
23 | /**
24 | * run table as ClassificationEvaluator.`` where labelCol="label";
25 | */
26 | override def train(df: DataFrame, path: String, params: Map[String, String]): DataFrame = {
27 | batchPredict(df, path, params)
28 | }
29 |
30 | override def batchPredict(df: DataFrame, path: String, params: Map[String, String]): DataFrame = {
31 | val items = multiclassClassificationEvaluate(df, (evaluator) => {
32 | evaluator.setLabelCol(params.getOrElse(labelCol.name, "label"))
33 | evaluator.setPredictionCol("prediction")
34 | })
35 | import df.sparkSession.implicits._
36 | df.sparkSession.createDataset[MetricValue](items).toDF()
37 | }
38 |
39 | override def load(sparkSession: SparkSession, path: String, params: Map[String, String]): Any = ???
40 |
41 | override def predict(sparkSession: SparkSession, _model: Any, name: String, params: Map[String, String]): UserDefinedFunction = ???
42 |
43 | override def supportedVersions: Seq[String] = {
44 | MLSQLMllib.versions
45 | }
46 |
47 | override def auth(etMethod: ETMethod, path: String, params: Map[String, String]): List[TableAuthResult] = {
48 | List()
49 | }
50 |
51 | override def modelType: ModelType = AlgType
52 |
53 | override def doc: Doc = Doc(HtmlDoc,
54 | """
55 | |Compute f1|weightedPrecision|weightedRecall|accuracy for predicted table.
56 | """.stripMargin)
57 |
58 |
59 | override def codeExample: Code = Code(SQLCode, CodeExampleText.jsonStr +
60 | """
61 | |predict data as RandomForest.`/tmp/model` as predicted_table;
62 | |run predicted_table as ClassificationEvaluator.``;
63 | """.stripMargin)
64 |
65 | override def explainParams(sparkSession: SparkSession): DataFrame = {
66 | _explainParams(sparkSession)
67 | }
68 |
69 | final val labelCol: Param[String] = new Param[String](this, "labelCol", "default: label")
70 |
71 | }
72 |
--------------------------------------------------------------------------------
/mlsql-mllib/src/main/java/tech/mlsql/plugins/mllib/ets/ColumnsExt.scala:
--------------------------------------------------------------------------------
1 | package tech.mlsql.plugins.mllib.ets
2 |
3 | import org.apache.spark.ml.param.{Param, StringArrayParam}
4 | import org.apache.spark.sql.expressions.UserDefinedFunction
5 | import org.apache.spark.sql.{DataFrame, SparkSession}
6 | import streaming.dsl.mmlib.algs.classfication.BaseClassification
7 | import streaming.dsl.mmlib.algs.param.BaseParams
8 | import streaming.dsl.mmlib.algs.{Functions, MllibFunctions}
9 | import streaming.dsl.mmlib._
10 |
11 | /**
12 | * 10/10/2021 WilliamZhu(allwefantasy@gmail.com)
13 | */
14 | class ColumnsExt(override val uid: String) extends SQLAlg
15 | with Functions
16 | with MllibFunctions
17 | with BaseClassification
18 | with PluginBaseETAuth {
19 |
20 | def this() = this(BaseParams.randomUID())
21 |
22 | override def train(df: DataFrame, path: String, params: Map[String, String]): DataFrame = {
23 | val _action = params.getOrElse(action.name, $(action).toString)
24 | val _fields = params.getOrElse(fields.name, $(fields).mkString(",")).split(",")
25 | val dfName = params("__dfname__")
26 | if (_fields.length == 0) return df
27 | _action match {
28 | case "drop" | "remove" =>
29 | val newdf = df.drop(_fields: _*)
30 | newdf.createOrReplaceTempView(dfName)
31 | newdf
32 | }
33 | }
34 |
35 |
36 | override def skipOriginalDFName: Boolean = false
37 |
38 | override def batchPredict(df: DataFrame, path: String, params: Map[String, String]): DataFrame = {
39 | train(df, path, params)
40 | }
41 |
42 | override def modelType: ModelType = ProcessType
43 |
44 | override def doc: Doc = Doc(MarkDownDoc,
45 | """
46 | |
47 | |""".stripMargin)
48 |
49 | override def codeExample: Code = Code(SQLCode,
50 | """
51 | |select 1 as a,2 as b as mockTable;
52 | |!columns drop a from mockTable;
53 | |select * from mockTable as output;
54 | |""".stripMargin)
55 |
56 |
57 | override def load(sparkSession: SparkSession, path: String, params: Map[String, String]): Any = ???
58 |
59 | override def predict(sparkSession: SparkSession, _model: Any, name: String, params: Map[String, String]): UserDefinedFunction = ???
60 |
61 | override def etName: String = "__columns_operator__"
62 |
63 | final val action: Param[String] =
64 | new Param[String](this, name = "action", doc = "")
65 | setDefault(action, "drop")
66 |
67 | final val fields: StringArrayParam =
68 | new StringArrayParam(this, name = "fields", doc = "")
69 | setDefault(fields, Array[String]())
70 |
71 | }
72 |
--------------------------------------------------------------------------------
/mlsql-mllib/src/main/java/tech/mlsql/plugins/mllib/ets/PluginBaseETAuth.scala:
--------------------------------------------------------------------------------
1 | package tech.mlsql.plugins.mllib.ets
2 |
3 | import streaming.dsl.ScriptSQLExec
4 | import streaming.dsl.auth._
5 | import tech.mlsql.dsl.auth.ETAuth
6 | import tech.mlsql.dsl.auth.dsl.mmlib.ETMethod.ETMethod
7 |
8 | /**
9 | * 27/9/2021 WilliamZhu(allwefantasy@gmail.com)
10 | */
11 | trait PluginBaseETAuth extends ETAuth {
12 | override def auth(etMethod: ETMethod, path: String, params: Map[String, String]): List[TableAuthResult] = {
13 | val vtable = MLSQLTable(
14 | Option(DB_DEFAULT.MLSQL_SYSTEM.toString),
15 | Option(etName),
16 | OperateType.SELECT,
17 | Option("select"),
18 | TableType.SYSTEM)
19 |
20 | val context = ScriptSQLExec.contextGetOrForTest()
21 | context.execListener.getTableAuth match {
22 | case Some(tableAuth) =>
23 | tableAuth.auth(List(vtable))
24 | case None =>
25 | List(TableAuthResult(granted = true, ""))
26 | }
27 | }
28 |
29 | def etName: String
30 | }
31 |
--------------------------------------------------------------------------------
/mlsql-mllib/src/main/java/tech/mlsql/plugins/mllib/ets/RegressionEvaluator.scala:
--------------------------------------------------------------------------------
1 | package tech.mlsql.plugins.mllib.ets
2 |
3 | import org.apache.spark.ml.param.Param
4 | import org.apache.spark.sql.expressions.UserDefinedFunction
5 | import org.apache.spark.sql.{DataFrame, SparkSession}
6 | import streaming.dsl.auth.TableAuthResult
7 | import streaming.dsl.mmlib._
8 | import streaming.dsl.mmlib.algs.classfication.BaseClassification
9 | import streaming.dsl.mmlib.algs.param.{BaseParams, WowParams}
10 | import streaming.dsl.mmlib.algs.{CodeExampleText, Functions, MetricValue}
11 | import tech.mlsql.dsl.auth.ETAuth
12 | import tech.mlsql.dsl.auth.dsl.mmlib.ETMethod.ETMethod
13 | import tech.mlsql.plugins.mllib.app.MLSQLMllib
14 | import tech.mlsql.version.VersionCompatibility
15 |
16 | /**
17 | * 1/6/2021 WilliamZhu(allwefantasy@gmail.com)
18 | */
19 | class RegressionEvaluator(override val uid: String) extends SQLAlg
20 | with VersionCompatibility with Functions with WowParams with ETAuth with BaseClassification {
21 | def this() = this(BaseParams.randomUID())
22 |
23 | /**
24 | * run table as RegressionEvaluator.`` where labelCol="label";
25 | */
26 | override def train(df: DataFrame, path: String, params: Map[String, String]): DataFrame = {
27 | batchPredict(df, path, params)
28 | }
29 |
30 | override def batchPredict(df: DataFrame, path: String, params: Map[String, String]): DataFrame = {
31 | val items = "mse|rmse|r2|mae".split("\\|").map { metricName =>
32 | val evaluator = new org.apache.spark.ml.evaluation.RegressionEvaluator().setMetricName(metricName)
33 | evaluator.setLabelCol(params.getOrElse(labelCol.name, "label"))
34 | evaluator.setPredictionCol("prediction")
35 | MetricValue(metricName, evaluator.evaluate(df))
36 | }.toList
37 |
38 | import df.sparkSession.implicits._
39 | df.sparkSession.createDataset[MetricValue](items).toDF()
40 | }
41 |
42 | override def load(sparkSession: SparkSession, path: String, params: Map[String, String]): Any = ???
43 |
44 | override def predict(sparkSession: SparkSession, _model: Any, name: String, params: Map[String, String]): UserDefinedFunction = ???
45 |
46 | override def supportedVersions: Seq[String] = {
47 | MLSQLMllib.versions
48 | }
49 |
50 | override def auth(etMethod: ETMethod, path: String, params: Map[String, String]): List[TableAuthResult] = {
51 | List()
52 | }
53 |
54 | override def modelType: ModelType = AlgType
55 |
56 | override def doc: Doc = Doc(HtmlDoc,
57 | """
58 | |Compute mse|rmse|r2|mae for predicted table.
59 | """.stripMargin)
60 |
61 |
62 | override def codeExample: Code = Code(SQLCode, CodeExampleText.jsonStr +
63 | """
64 | |predict data as LinearRegressionExt.`/tmp/model` as predicted_table;
65 | |run predicted_table as RegressionEvaluator.``;
66 | """.stripMargin)
67 |
68 | override def explainParams(sparkSession: SparkSession): DataFrame = {
69 | _explainParams(sparkSession)
70 | }
71 |
72 | final val labelCol: Param[String] = new Param[String](this, "labelCol", "default: label")
73 |
74 | }
--------------------------------------------------------------------------------
/mlsql-mllib/src/main/java/tech/mlsql/plugins/mllib/ets/TakeRandomSampleExt.scala:
--------------------------------------------------------------------------------
1 | package tech.mlsql.plugins.mllib.ets
2 |
3 | import org.apache.spark.ml.param.Param
4 | import org.apache.spark.sql.expressions.UserDefinedFunction
5 | import org.apache.spark.sql.{DataFrame, SparkSession}
6 | import streaming.dsl.mmlib._
7 | import streaming.dsl.mmlib.algs.classfication.BaseClassification
8 | import streaming.dsl.mmlib.algs.param.BaseParams
9 | import streaming.dsl.mmlib.algs.{Functions, MllibFunctions}
10 |
11 | /**
12 | * 27/9/2021 WilliamZhu(allwefantasy@gmail.com)
13 | */
14 | class TakeRandomSampleExt(override val uid: String) extends SQLAlg
15 | with Functions
16 | with MllibFunctions
17 | with BaseClassification
18 | with PluginBaseETAuth {
19 |
20 | def this() = this(BaseParams.randomUID())
21 |
22 | override def train(df: DataFrame, path: String, params: Map[String, String]): DataFrame = {
23 | val _size = params.getOrElse(size.name, $(size).toString).toLong
24 | val _fraction = params.getOrElse(fraction.name, $(fraction).toString).toDouble
25 |
26 | val newdf = (_fraction, _size) match {
27 | case (-1, -1) =>
28 | df
29 | case (-1, s) =>
30 | val count = df.count()
31 | df.sample(Math.min(s * 1.0 / count + 0.2, 1.0)).limit(s.toInt)
32 | case (f, -1) =>
33 | df.sample(f)
34 |
35 | case (f, s) =>
36 | df.sample(Math.min(f + 0.1, 1.0)).limit(s.toInt)
37 | }
38 | if (_fraction != -1) {
39 | df.sample(_fraction)
40 | }
41 |
42 | newdf
43 | }
44 |
45 |
46 | override def batchPredict(df: DataFrame, path: String, params: Map[String, String]): DataFrame = {
47 | train(df, path, params)
48 | }
49 |
50 | override def load(sparkSession: SparkSession, path: String, params: Map[String, String]): Any = ???
51 |
52 | override def predict(sparkSession: SparkSession, _model: Any, name: String, params: Map[String, String]): UserDefinedFunction = ???
53 |
54 | override def etName: String = "__take_random_sample_operator__"
55 |
56 | override def modelType: ModelType = ProcessType
57 |
58 | override def doc: Doc = Doc(MarkDownDoc,
59 | """
60 | |
61 | |""".stripMargin)
62 |
63 | override def codeExample: Code = Code(SQLCode,
64 | """
65 | |
66 | |
67 | |""".stripMargin)
68 |
69 | final val fraction: Param[Double] = new Param[Double](this, name = "fraction", doc = "")
70 | setDefault(fraction, -1.0D)
71 |
72 | final val size: Param[Long] = new Param[Long](this, "size", "")
73 | setDefault(size, -1L)
74 |
75 |
76 | }
77 |
--------------------------------------------------------------------------------
/mlsql-shell/.repo/desc.template.plugin:
--------------------------------------------------------------------------------
1 | moduleName=mlsql-shell-{{spark_binary_version}}
2 | mainClass=tech.mlsql.plugins.shell.app.MLSQLShell
3 | scala_version={{scala_binary_version}}
4 | spark_version={{spark_binary_version}}
5 | version=0.1.0-SNAPSHOT
6 | author=allwefantasy
7 | mlsqlVersions=""
8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-shell
9 | mlsqlPluginType=app
10 | desc=mlsql-shell
11 |
12 |
13 |
14 |
15 |
16 |
17 |
--------------------------------------------------------------------------------
/mlsql-shell/.repo/pom.template.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | mlsql-plugins-{{spark_binary_version}}_{{scala_binary_version}}
7 | tech.mlsql
8 | 0.1.0-SNAPSHOT
9 |
10 | 4.0.0
11 |
12 | mlsql-shell-{{spark_binary_version}}_{{scala_binary_version}}
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 | shade
21 |
22 |
23 |
24 | org.apache.maven.plugins
25 | maven-shade-plugin
26 | 3.2.0
27 |
28 |
29 |
30 | *:*
31 |
32 | META-INF/*.SF
33 | META-INF/*.DSA
34 | META-INF/*.RSA
35 |
36 |
37 |
38 | false
39 |
40 |
41 | org.apache.poi
42 | shadeio.poi
43 |
44 |
45 | com.norbitltd.spoiwo
46 | shadeio.spoiwo
47 |
48 |
49 | com.github.pjfanning
50 | shadeio.pjfanning
51 |
52 |
53 | org.apache.commons.compress
54 | shadeio.commons.compress
55 |
56 |
57 |
58 |
59 |
60 |
61 | package
62 |
63 | shade
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
--------------------------------------------------------------------------------
/mlsql-shell/README.md:
--------------------------------------------------------------------------------
1 | # mlsql-shell
2 |
3 | This plugin provide execute shell command in MLSQL Engine Driver Side.
4 |
5 | 
6 |
7 | ## Install from store
8 |
9 | Execute following command in web console:
10 |
11 | ```
12 | !plugin app add - "mlsql-shell-2.4";
13 | ```
14 |
15 | Check installation:
16 |
17 | ```
18 | !sh pip install pyjava;
19 | ```
20 |
21 |
22 | ## Install Manually
23 |
24 | Firstly, build shade jar in your terminal:
25 |
26 | ```shell
27 | pip install mlsql_plugin_tool
28 | mlsql_plugin_tool build --module_name mlsql-shell --spark spark243
29 | ```
30 |
31 | then change start script of MLSQL Engine,
32 |
33 | Add Jar:
34 |
35 | ```
36 | --jars YOUR_JAR_PATH
37 | ```
38 |
39 | Register Class:
40 |
41 | ```
42 | -streaming.plugin.clzznames tech.mlsql.plugins.shell.app.MLSQLShell
43 | ```
44 |
45 | If there are more than one class, use comma to seperate them. For example:
46 |
47 | ```
48 | -streaming.plugin.clzznames classA,classB,classC
49 | ```
50 |
51 | ## Usage
52 |
53 | ```sql
54 | !sh pip install pyjava;
55 | !sh echo "yes";
56 | !sh wget "https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-shell";
57 | ```
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
--------------------------------------------------------------------------------
/mlsql-shell/desc.plugin:
--------------------------------------------------------------------------------
1 | moduleName=mlsql-shell-3.0
2 | mainClass=tech.mlsql.plugins.shell.app.MLSQLShell
3 | scala_version=2.12
4 | spark_version=3.0
5 | version=0.1.0-SNAPSHOT
6 | author=allwefantasy
7 | mlsqlVersions=""
8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-shell
9 | mlsqlPluginType=app
10 | desc=mlsql-shell
11 |
12 |
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/mlsql-shell/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | mlsql-plugins-3.0_2.12
7 | tech.mlsql
8 | 0.1.0-SNAPSHOT
9 |
10 | 4.0.0
11 |
12 | mlsql-shell-3.0_2.12
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 | shade
21 |
22 |
23 |
24 | org.apache.maven.plugins
25 | maven-shade-plugin
26 | 3.2.0
27 |
28 |
29 |
30 | *:*
31 |
32 | META-INF/*.SF
33 | META-INF/*.DSA
34 | META-INF/*.RSA
35 |
36 |
37 |
38 | false
39 |
40 |
41 | org.apache.poi
42 | shadeio.poi
43 |
44 |
45 | com.norbitltd.spoiwo
46 | shadeio.spoiwo
47 |
48 |
49 | com.github.pjfanning
50 | shadeio.pjfanning
51 |
52 |
53 | org.apache.commons.compress
54 | shadeio.commons.compress
55 |
56 |
57 |
58 |
59 |
60 |
61 | package
62 |
63 | shade
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
--------------------------------------------------------------------------------
/mlsql-shell/src/main/java/tech/mlsql/plugins/shell/app/MLSQLShell.scala:
--------------------------------------------------------------------------------
1 | package tech.mlsql.plugins.shell.app
2 |
3 | import tech.mlsql.common.utils.log.Logging
4 | import tech.mlsql.dsl.CommandCollection
5 | import tech.mlsql.ets.register.ETRegister
6 | import tech.mlsql.plugins.shell.ets.{CopyFromLocal, ShellExecute}
7 | import tech.mlsql.version.VersionCompatibility
8 |
9 | /**
10 | * 2/6/2021 WilliamZhu(allwefantasy@gmail.com)
11 | */
12 | class MLSQLShell extends tech.mlsql.app.App with VersionCompatibility with Logging {
13 | override def run(args: Seq[String]): Unit = {
14 | ETRegister.register("ShellExecute", classOf[ShellExecute].getName)
15 | CommandCollection.refreshCommandMapping(Map("sh" ->
16 | """
17 | |run command as ShellExecute.`` where parameters='''{:all}'''
18 | |""".stripMargin))
19 |
20 | ETRegister.register("CopyFromLocal", classOf[CopyFromLocal].getName)
21 | CommandCollection.refreshCommandMapping(Map("copyFromLocal" ->
22 | """
23 | |run command as CopyFromLocal.`{1}` where src="{0}"
24 | |""".stripMargin))
25 | }
26 |
27 |
28 | override def supportedVersions: Seq[String] = {
29 | MLSQLShell.versions
30 | }
31 | }
32 |
33 | object MLSQLShell {
34 | val versions = Seq("2.1.0", "2.1.0-SNAPSHOT", "2.0.0", "2.0.1")
35 | }
36 |
--------------------------------------------------------------------------------
/mlsql-shell/src/main/java/tech/mlsql/plugins/shell/ets/CopyFromLocal.scala:
--------------------------------------------------------------------------------
1 | package tech.mlsql.plugins.shell.ets
2 |
3 | import org.apache.spark.sql.expressions.UserDefinedFunction
4 | import org.apache.spark.sql.{DataFrame, SparkSession}
5 | import streaming.dsl.ScriptSQLExec
6 | import streaming.dsl.auth._
7 | import streaming.dsl.mmlib.SQLAlg
8 | import streaming.dsl.mmlib.algs.Functions
9 | import streaming.dsl.mmlib.algs.param.{BaseParams, WowParams}
10 | import tech.mlsql.common.utils.serder.json.JSONTool
11 | import tech.mlsql.dsl.auth.ETAuth
12 | import tech.mlsql.dsl.auth.dsl.mmlib.ETMethod.ETMethod
13 | import tech.mlsql.plugins.shell.app.MLSQLShell
14 | import tech.mlsql.tool.HDFSOperatorV2
15 | import tech.mlsql.version.VersionCompatibility
16 |
17 | /**
18 | * 2/6/2021 WilliamZhu(allwefantasy@gmail.com)
19 | */
20 | class CopyFromLocal(override val uid: String) extends SQLAlg
21 | with VersionCompatibility with Functions with WowParams with ETAuth {
22 | def this() = this(BaseParams.randomUID())
23 |
24 | /**
25 | * !copyFromLocal src dst;
26 | */
27 | override def train(df: DataFrame, path: String, params: Map[String, String]): DataFrame = {
28 |
29 | HDFSOperatorV2.copyToHDFS(params("src"), path, false, false)
30 | import df.sparkSession.implicits._
31 | df.sparkSession.createDataset[String](Seq().toSeq).toDF("content")
32 | }
33 |
34 | override def skipPathPrefix: Boolean = false
35 |
36 | override def load(sparkSession: SparkSession, path: String, params: Map[String, String]): Any = ???
37 |
38 | override def predict(sparkSession: SparkSession, _model: Any, name: String, params: Map[String, String]): UserDefinedFunction = ???
39 |
40 | override def supportedVersions: Seq[String] = MLSQLShell.versions
41 |
42 | override def auth(etMethod: ETMethod, path: String, params: Map[String, String]): List[TableAuthResult] = {
43 | val vtable = MLSQLTable(
44 | db = Option(DB_DEFAULT.MLSQL_SYSTEM.toString),
45 | table = Option("__copy_from_local__"),
46 | operateType = OperateType.EMPTY,
47 | sourceType = Option("_mlsql_"),
48 | tableType = TableType.SYSTEM)
49 |
50 | val context = ScriptSQLExec.contextGetOrForTest()
51 | context.execListener.getTableAuth match {
52 | case Some(tableAuth) =>
53 | tableAuth.auth(List(vtable))
54 | case None => List(TableAuthResult(true, ""))
55 | }
56 | }
57 | }
58 |
--------------------------------------------------------------------------------
/mlsql-shell/src/main/java/tech/mlsql/plugins/shell/ets/ShellExecute.scala:
--------------------------------------------------------------------------------
1 | package tech.mlsql.plugins.shell.ets
2 |
3 | import org.apache.spark.sql.expressions.UserDefinedFunction
4 | import org.apache.spark.sql.{DataFrame, SparkSession}
5 | import streaming.dsl.ScriptSQLExec
6 | import streaming.dsl.auth._
7 | import streaming.dsl.mmlib.SQLAlg
8 | import streaming.dsl.mmlib.algs.Functions
9 | import streaming.dsl.mmlib.algs.param.{BaseParams, WowParams}
10 | import tech.mlsql.common.utils.serder.json.JSONTool
11 | import tech.mlsql.common.utils.shell.ShellCommand
12 | import tech.mlsql.dsl.auth.ETAuth
13 | import tech.mlsql.dsl.auth.dsl.mmlib.ETMethod.ETMethod
14 | import tech.mlsql.plugins.shell.app.MLSQLShell
15 | import tech.mlsql.version.VersionCompatibility
16 |
17 | import scala.collection.mutable.ArrayBuffer
18 |
19 | /**
20 | * 2/6/2021 WilliamZhu(allwefantasy@gmail.com)
21 | */
22 | class ShellExecute(override val uid: String) extends SQLAlg
23 | with VersionCompatibility with Functions with WowParams with ETAuth {
24 | def this() = this(BaseParams.randomUID())
25 |
26 | /**
27 | * !sh pip install pyjava;
28 | */
29 | override def train(df: DataFrame, path: String, params: Map[String, String]): DataFrame = {
30 | val args = JSONTool.parseJson[List[String]](params("parameters"))
31 | import df.sparkSession.implicits._
32 |
33 | args.head match {
34 | case "script" =>
35 | val res = ShellCommand.exec(args.last)
36 | df.sparkSession.createDataset[String](Seq(res)).toDF("content")
37 | case _ =>
38 |
39 | val process = os.proc(args).spawn()
40 | val result = ArrayBuffer[String]()
41 |
42 | var errLine = process.stderr.readLine()
43 |
44 | while (errLine != null) {
45 | logInfo(format(errLine))
46 | result.append(errLine)
47 | errLine = process.stderr.readLine()
48 | }
49 |
50 |
51 | var line = process.stdout.readLine()
52 | while (line != null) {
53 | logInfo(format(line))
54 | result.append(line)
55 | line = process.stdout.readLine()
56 | }
57 |
58 | df.sparkSession.createDataset[String](result.toSeq).toDF("content")
59 | }
60 |
61 |
62 | }
63 |
64 | override def skipPathPrefix: Boolean = false
65 |
66 | override def load(sparkSession: SparkSession, path: String, params: Map[String, String]): Any = ???
67 |
68 | override def predict(sparkSession: SparkSession, _model: Any, name: String, params: Map[String, String]): UserDefinedFunction = ???
69 |
70 | override def supportedVersions: Seq[String] = MLSQLShell.versions
71 |
72 | override def auth(etMethod: ETMethod, path: String, params: Map[String, String]): List[TableAuthResult] = {
73 | val vtable = MLSQLTable(
74 | db = Option(DB_DEFAULT.MLSQL_SYSTEM.toString),
75 | table = Option("__shell_execute__"),
76 | operateType = OperateType.EMPTY,
77 | sourceType = Option("_mlsql_"),
78 | tableType = TableType.SYSTEM)
79 |
80 | val context = ScriptSQLExec.contextGetOrForTest()
81 | context.execListener.getTableAuth match {
82 | case Some(tableAuth) =>
83 | tableAuth.auth(List(vtable))
84 | case None => List(TableAuthResult(true, ""))
85 | }
86 | }
87 | }
88 |
--------------------------------------------------------------------------------
/run-script/.repo/pom.template.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | mlsql-plugins-{{spark_binary_version}}_{{scala_binary_version}}
7 | tech.mlsql
8 | 0.1.0-SNAPSHOT
9 |
10 | 4.0.0
11 |
12 | run-script-${spark.binary.version}_${scala.binary.version}
13 |
14 |
15 |
--------------------------------------------------------------------------------
/run-script/README.md:
--------------------------------------------------------------------------------
1 | ## Install
2 |
3 | ```sql
4 | !plugin et add - "run-script-2.4" named runScript;
5 | ```
6 |
7 | ## Usage
8 |
9 | ```sql
10 | set code1='''
11 | select 1 as a as b;
12 | ''';
13 | !runScript '''${code1}''' named output;
14 | ```
--------------------------------------------------------------------------------
/run-script/desc.plugin:
--------------------------------------------------------------------------------
1 | moduleName=run-script-2.4
2 | mainClass=tech.mlsql.plugins.et.RunScript
3 | version=0.1.0-SNAPSHOT
4 | author=allwefantasy
5 | mlsqlVersions="1.5.0,1.5.0-SNAPSHOT,1.6.0,1.6.0-SNAPSHOT"
6 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/run-script
7 | scala_version=2.11
8 | spark_version=2.4
9 | mlsqlPluginType=et
10 | desc=wow
11 |
12 |
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/run-script/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | mlsql-plugins-3.0_2.12
7 | tech.mlsql
8 | 0.1.0-SNAPSHOT
9 |
10 | 4.0.0
11 |
12 | run-script-${spark.binary.version}_${scala.binary.version}
13 |
14 |
15 |
--------------------------------------------------------------------------------
/run-script/src/main/java/tech/mlsql/plugins/et/RunScript.scala:
--------------------------------------------------------------------------------
1 | package tech.mlsql.plugins.et
2 |
3 | import org.apache.spark.sql.expressions.UserDefinedFunction
4 | import org.apache.spark.sql.{DataFrame, SparkSession}
5 | import streaming.dsl.ScriptSQLExec
6 | import streaming.dsl.auth.TableAuthResult
7 | import streaming.dsl.mmlib._
8 | import streaming.dsl.mmlib.algs.Functions
9 | import streaming.dsl.mmlib.algs.param.{BaseParams, WowParams}
10 | import tech.mlsql.common.utils.serder.json.JSONTool
11 | import tech.mlsql.dsl.auth.ETAuth
12 | import tech.mlsql.dsl.auth.dsl.mmlib.ETMethod.ETMethod
13 | import tech.mlsql.ets.ScriptRunner
14 | import tech.mlsql.version.VersionCompatibility
15 |
16 |
17 | class RunScript(override val uid: String) extends SQLAlg with VersionCompatibility with Functions with WowParams with ETAuth {
18 | def this() = this(BaseParams.randomUID())
19 |
20 | //
21 | override def train(df: DataFrame, path: String, params: Map[String, String]): DataFrame = {
22 |
23 | val context = ScriptSQLExec.context()
24 | val command = JSONTool.parseJson[List[String]](params("parameters")).toArray
25 | val sparkOpt = Option(df.sparkSession)
26 | command match {
27 | case Array(script, "named", tableName) =>
28 | var jobRes: DataFrame = ScriptRunner.rubSubJob(
29 | script,
30 | (_df: DataFrame) => {},
31 | sparkOpt,
32 | true,
33 | true).get
34 | jobRes.createOrReplaceTempView(tableName)
35 | jobRes
36 | case _ => throw new RuntimeException("try !runScript code named table1")
37 | }
38 |
39 | }
40 |
41 | override def auth(etMethod: ETMethod, path: String, params: Map[String, String]): List[TableAuthResult] = {
42 | List()
43 | }
44 |
45 | override def supportedVersions: Seq[String] = {
46 | Seq("1.5.0-SNAPSHOT", "1.5.0", "1.6.0-SNAPSHOT", "1.6.0")
47 | }
48 |
49 |
50 | override def doc: Doc = Doc(MarkDownDoc,
51 | s"""
52 | |When you want to get the result from command and used
53 | | in next command(SQL), you can use !last command.
54 | |
55 | |For example:
56 | |
57 | |```
58 | |${codeExample.code}
59 | |```
60 | """.stripMargin)
61 |
62 |
63 | override def codeExample: Code = Code(SQLCode,
64 | """
65 | |!hdfs /tmp;
66 | |!last named hdfsTmpTable;
67 | |select * from hdfsTmpTable;
68 | """.stripMargin)
69 |
70 | override def batchPredict(df: DataFrame, path: String, params: Map[String, String]): DataFrame = train(df, path, params)
71 |
72 | override def load(sparkSession: SparkSession, path: String, params: Map[String, String]): Any = ???
73 |
74 | override def predict(sparkSession: SparkSession, _model: Any, name: String, params: Map[String, String]): UserDefinedFunction = ???
75 |
76 |
77 | }
78 |
--------------------------------------------------------------------------------
/save-then-load/.repo/pom.template.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | mlsql-plugins-{{spark_binary_version}}_{{scala_binary_version}}
7 | tech.mlsql
8 | 0.1.0-SNAPSHOT
9 |
10 | 4.0.0
11 |
12 | save-then-load-${spark.binary.version}_${scala.binary.version}
13 |
14 |
15 |
--------------------------------------------------------------------------------
/save-then-load/README.md:
--------------------------------------------------------------------------------
1 | ## Install
2 |
3 | ```sql
4 | !plugin et add - "save-then-load-2.4" named saveThenLoad;
5 | ```
6 |
7 | ## Usage
8 |
9 | This plugin will save the table into delta table and load it again.
10 |
11 | ```sql
12 | !saveThenLoad tableName;
13 | select * from tableName as output;
14 | ```
--------------------------------------------------------------------------------
/save-then-load/desc.plugin:
--------------------------------------------------------------------------------
1 | moduleName=save-then-load-2.4
2 | mainClass=tech.mlsql.plugins.et.SaveThenLoad
3 | version=0.1.0-SNAPSHOT
4 | author=allwefantasy
5 | mlsqlVersions="1.5.0,1.5.0-SNAPSHOT,1.6.0,1.6.0-SNAPSHOT"
6 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/save-then-load
7 | scala_version=2.11
8 | spark_version=2.4
9 | mlsqlPluginType=et
10 | desc=wow
11 |
12 |
13 |
14 |
15 |
16 |
17 |
--------------------------------------------------------------------------------
/save-then-load/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | mlsql-plugins-3.0_2.12
7 | tech.mlsql
8 | 0.1.0-SNAPSHOT
9 |
10 | 4.0.0
11 |
12 | save-then-load-${spark.binary.version}_${scala.binary.version}
13 |
14 |
15 |
--------------------------------------------------------------------------------
/save-then-load/src/main/java/tech/mlsql/plugins/et/SaveThenLoad.scala:
--------------------------------------------------------------------------------
1 | package tech.mlsql.plugins.et
2 |
3 | import org.apache.spark.sql.expressions.UserDefinedFunction
4 | import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}
5 | import streaming.core.datasource.impl.MLSQLDelta
6 | import streaming.core.datasource.{DataSinkConfig, DataSourceConfig}
7 | import streaming.dsl.auth.TableAuthResult
8 | import streaming.dsl.mmlib._
9 | import streaming.dsl.mmlib.algs.Functions
10 | import streaming.dsl.mmlib.algs.param.{BaseParams, WowParams}
11 | import tech.mlsql.common.utils.serder.json.JSONTool
12 | import tech.mlsql.dsl.auth.ETAuth
13 | import tech.mlsql.dsl.auth.dsl.mmlib.ETMethod.ETMethod
14 | import tech.mlsql.version.VersionCompatibility
15 |
16 | /**
17 | * 13/1/2020 WilliamZhu(allwefantasy@gmail.com)
18 | */
19 | class SaveThenLoad(override val uid: String) extends SQLAlg with VersionCompatibility with Functions with WowParams with ETAuth {
20 | def this() = this(BaseParams.randomUID())
21 |
22 | override def train(df: DataFrame, path: String, params: Map[String, String]): DataFrame = {
23 | val command = JSONTool.parseJson[List[String]](params("parameters")).toArray
24 | val session = df.sparkSession
25 | command match {
26 | case Array(tableName) =>
27 | val ds = new MLSQLDelta()
28 | ds.save(session.table(tableName).write, DataSinkConfig(s"__tmp__.${tableName}", Map(), SaveMode.Overwrite, Option(df)))
29 | val newDF = ds.load(session.read, DataSourceConfig(s"__tmp__.${tableName}", Map(), Option(df)))
30 | newDF.createOrReplaceTempView(tableName)
31 | newDF
32 | case _ => throw new RuntimeException("!saveThenLoad tableName;")
33 | }
34 | }
35 |
36 | override def auth(etMethod: ETMethod, path: String, params: Map[String, String]): List[TableAuthResult] = {
37 | List()
38 | }
39 |
40 | override def supportedVersions: Seq[String] = {
41 | Seq("1.5.0-SNAPSHOT", "1.5.0", "1.6.0-SNAPSHOT", "1.6.0")
42 | }
43 |
44 |
45 | override def doc: Doc = Doc(MarkDownDoc,
46 | s"""
47 | |
48 | |```
49 | |${codeExample.code}
50 | |```
51 | """.stripMargin)
52 |
53 |
54 | override def codeExample: Code = Code(SQLCode,
55 | """
56 | |example
57 | """.stripMargin)
58 |
59 | override def batchPredict(df: DataFrame, path: String, params: Map[String, String]): DataFrame = train(df, path, params)
60 |
61 | override def load(sparkSession: SparkSession, path: String, params: Map[String, String]): Any = ???
62 |
63 | override def predict(sparkSession: SparkSession, _model: Any, name: String, params: Map[String, String]): UserDefinedFunction = ???
64 |
65 |
66 | }
67 |
--------------------------------------------------------------------------------
/stream-persist/.repo/desc.template.plugin:
--------------------------------------------------------------------------------
1 | moduleName=stream-persist-app-{{spark_binary_version}}
2 | mainClass=tech.mlsq.streambootstrapatstartup.StreamApp
3 | scala_version={{scala_binary_version}}
4 | spark_version={{spark_binary_version}}
5 | version=0.1.0-SNAPSHOT
6 | author=allwefantasy
7 | mlsqlVersions="1.5.0,1.5.0-SNAPSHOT,1.6.0,1.6.0-SNAPSHOT"
8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/stream-boostrap-at-startup
9 | mlsqlPluginType=app
10 | desc=wow
11 |
12 |
--------------------------------------------------------------------------------
/stream-persist/.repo/pom.template.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | mlsql-plugins-{{spark_binary_version}}_{{scala_binary_version}}
7 | tech.mlsql
8 | 0.1.0-SNAPSHOT
9 |
10 | 4.0.0
11 |
12 | stream-persist-${spark.binary.version}_${scala.binary.version}
13 |
14 |
--------------------------------------------------------------------------------
/stream-persist/README.md:
--------------------------------------------------------------------------------
1 | ## Install command:
2 |
3 | ```
4 | !plugin app add - "stream-persist-app-2.4";
5 | ```
6 |
7 | The first plugin is APP plugin and the second is ET plugin.
8 |
9 |
10 | ## Usage
11 |
12 | Use ET Plugin to persist stream job.
13 |
14 | ```sql
15 | !streamPersist persist streamExample;
16 |
17 | !streamPersist remove streamExample;
18 |
19 | !streamPersist list;
20 | ```
21 |
22 | And then once MLSQL Engine is restarted, and the stream job streamExample will be
23 | boosted at the startup of MLSQL.
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/stream-persist/db.sql:
--------------------------------------------------------------------------------
1 | CREATE TABLE `w_streams` (
2 | `id` int(11) unsigned NOT NULL AUTO_INCREMENT,
3 | `name` varchar(256) DEFAULT NULL,
4 | `content` text,
5 | `owner` varchar(256) DEFAULT NULL,
6 | `home` varchar(256) DEFAULT NULL,
7 | PRIMARY KEY (`id`)
8 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
--------------------------------------------------------------------------------
/stream-persist/desc.plugin:
--------------------------------------------------------------------------------
1 | moduleName=stream-persist-app-3.0
2 | mainClass=tech.mlsq.streambootstrapatstartup.StreamApp
3 | scala_version=2.12
4 | spark_version=3.0
5 | version=0.1.0-SNAPSHOT
6 | author=allwefantasy
7 | mlsqlVersions="1.5.0,1.5.0-SNAPSHOT,1.6.0,1.6.0-SNAPSHOT"
8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/stream-boostrap-at-startup
9 | mlsqlPluginType=app
10 | desc=wow
11 |
--------------------------------------------------------------------------------
/stream-persist/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | mlsql-plugins-3.0_2.12
7 | tech.mlsql
8 | 0.1.0-SNAPSHOT
9 |
10 | 4.0.0
11 |
12 | stream-persist-${spark.binary.version}_${scala.binary.version}
13 |
14 |
--------------------------------------------------------------------------------
/stream-persist/src/main/java/tech/mlsq/streambootstrapatstartup/StreamApp.scala:
--------------------------------------------------------------------------------
1 | package tech.mlsq.streambootstrapatstartup
2 |
3 | import _root_.streaming.core.strategy.platform.{PlatformManager, SparkRuntime}
4 | import _root_.streaming.dsl.{MLSQLExecuteContext, ScriptSQLExec, ScriptSQLExecListener}
5 | import org.apache.spark.sql.SparkSession
6 | import tech.mlsql.common.utils.log.Logging
7 | import tech.mlsql.dsl.CommandCollection
8 | import tech.mlsql.ets.ScriptRunner
9 | import tech.mlsql.ets.register.ETRegister
10 | import tech.mlsql.job.{JobManager, MLSQLJobType}
11 | import tech.mlsql.store.DBStore
12 | import tech.mlsql.version.VersionCompatibility
13 |
14 | /**
15 | * 2019-09-20 WilliamZhu(allwefantasy@gmail.com)
16 | */
17 | class StreamApp extends tech.mlsql.app.App with VersionCompatibility with Logging {
18 |
19 |
20 | override def run(args: Seq[String]): Unit = {
21 | val root = runtime.sparkSession
22 | import root.implicits._
23 |
24 | ETRegister.register("StreamPersistCommand", classOf[StreamPersistCommand].getName)
25 | CommandCollection.refreshCommandMapping(Map("streamPersist" -> "StreamPersistCommand"))
26 |
27 | val thread = new Thread("start MLSQL stream") {
28 | override def run(): Unit = {
29 | while (!PlatformManager.RUNTIME_IS_READY.get()) {
30 | Thread.sleep(3000)
31 | logInfo("Waiting MLSQL runtime ready to start streams.")
32 | }
33 | logInfo("Starting to start streams.")
34 | val streams = DBStore.store.tryReadTable(root, StreamAppConfig.TABLE, () => root.createDataset[Stream](Seq()).toDF())
35 | streams.as[Stream].collect().foreach { stream =>
36 | val session = getSessionByOwner(stream.owner)
37 | val job = JobManager.getJobInfo(stream.owner, stream.name, MLSQLJobType.STREAM, stream.content, -1)
38 | setUpScriptSQLExecListener(stream.owner, session, job.groupId, stream.home)
39 | ScriptRunner.runJob(stream.content, job, (df) => {
40 |
41 | })
42 | }
43 | }
44 | }
45 | thread.start()
46 |
47 | }
48 |
49 | def setUpScriptSQLExecListener(owner: String, sparkSession: SparkSession, groupId: String, home: String) = {
50 | val context = new ScriptSQLExecListener(sparkSession, "", Map[String, String](owner -> home))
51 | ScriptSQLExec.setContext(new MLSQLExecuteContext(context, owner, context.pathPrefix(None), groupId, Map()))
52 | context.addEnv("SKIP_AUTH", "true")
53 | context.addEnv("HOME", context.pathPrefix(None))
54 | context.addEnv("OWNER", owner)
55 | context
56 | }
57 |
58 | def getSessionByOwner(owner: String) = {
59 | runtime.getSession(owner)
60 | }
61 |
62 | def runtime = {
63 | PlatformManager.getRuntime.asInstanceOf[SparkRuntime]
64 | }
65 |
66 | override def supportedVersions: Seq[String] = {
67 | Seq("1.5.0-SNAPSHOT", "1.5.0", "1.6.0-SNAPSHOT", "1.6.0")
68 | }
69 | }
70 |
71 | object StreamAppConfig {
72 | val TABLE = "__mlsql__.streams"
73 | }
74 |
75 | case class Stream(name: String, content: String, owner: String, home: String)
76 |
77 | object StreamApp {
78 | }
79 |
--------------------------------------------------------------------------------
/stream-persist/src/main/java/tech/mlsq/streambootstrapatstartup/StreamPersistCommand.scala:
--------------------------------------------------------------------------------
1 | package tech.mlsq.streambootstrapatstartup
2 |
3 | import _root_.streaming.dsl.ScriptSQLExec
4 | import _root_.streaming.dsl.mmlib.SQLAlg
5 | import _root_.streaming.dsl.mmlib.algs.Functions
6 | import _root_.streaming.dsl.mmlib.algs.param.{BaseParams, WowParams}
7 | import org.apache.spark.sql.expressions.UserDefinedFunction
8 | import org.apache.spark.sql.{DataFrame, SparkSession}
9 | import org.apache.spark.sql.mlsql.session.MLSQLException
10 | import tech.mlsql.common.utils.serder.json.JSONTool
11 | import tech.mlsql.datalake.DataLake
12 | import tech.mlsql.job.JobManager
13 | import tech.mlsql.store.DBStore
14 | import tech.mlsql.version.VersionCompatibility
15 |
16 | /**
17 | * 2019-09-20 WilliamZhu(allwefantasy@gmail.com)
18 | */
19 | class StreamPersistCommand(override val uid: String) extends SQLAlg with VersionCompatibility with Functions with WowParams {
20 | def this() = this(BaseParams.randomUID())
21 |
22 | override def train(df: DataFrame, path: String, params: Map[String, String]): DataFrame = {
23 | val spark = df.sparkSession
24 | val dataLake = new DataLake(spark)
25 | require(dataLake.isEnable, "data lake should be enabled.")
26 | import spark.implicits._
27 |
28 | val command = JSONTool.parseJson[List[String]](params("parameters"))
29 | command match {
30 | case Seq("persist", streamName) =>
31 | JobManager.getJobInfo.filter(f => f._2.jobName == streamName).map(f => f._2).headOption match {
32 | case Some(item) =>
33 | val data = spark.createDataset(Seq(Stream(streamName, item.jobContent, item.owner, ScriptSQLExec.context().home)))
34 | DBStore.store.saveTable(spark, data.toDF(), StreamAppConfig.TABLE, Option("name"), false)
35 | DBStore.store.readTable(spark, StreamAppConfig.TABLE)
36 | case None => throw new MLSQLException(s"not stream ${streamName} exists")
37 | }
38 | case Seq("remove", streamName) =>
39 | DBStore.store.saveTable(spark, spark.createDataset[Stream](Seq(Stream(streamName, null, null, null))).toDF(), StreamAppConfig.TABLE, Option("name"), true)
40 | DBStore.store.readTable(spark, StreamAppConfig.TABLE)
41 |
42 | case Seq("list") =>
43 | DBStore.store.readTable(spark, StreamAppConfig.TABLE)
44 | }
45 |
46 | }
47 |
48 |
49 | override def supportedVersions: Seq[String] = {
50 | Seq("1.5.0-SNAPSHOT", "1.5.0", "1.6.0-SNAPSHOT", "1.6.0")
51 | }
52 |
53 | override def batchPredict(df: DataFrame, path: String, params: Map[String, String]): DataFrame = train(df, path, params)
54 |
55 | override def load(sparkSession: SparkSession, path: String, params: Map[String, String]): Any = ???
56 |
57 | override def predict(sparkSession: SparkSession, _model: Any, name: String, params: Map[String, String]): UserDefinedFunction = ???
58 | }
59 |
--------------------------------------------------------------------------------
/table-repartition/.repo/desc.template.plugin:
--------------------------------------------------------------------------------
1 | moduleName=table-repartition-{{spark_binary_version}}
2 | mainClass=tech.mlsql.plugins.et.TableRepartition
3 | scala_version={{scala_binary_version}}
4 | spark_version={{spark_binary_version}}
5 | version=0.1.0-SNAPSHOT
6 | author=allwefantasy
7 | mlsqlVersions="1.5.0,1.5.0-SNAPSHOT,1.6.0,1.6.0-SNAPSHOT"
8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/table-repartition
9 | mlsqlPluginType=et
10 | desc=wow
--------------------------------------------------------------------------------
/table-repartition/.repo/pom.template.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | mlsql-plugins-{{spark_binary_version}}_{{scala_binary_version}}
7 | tech.mlsql
8 | 0.1.0-SNAPSHOT
9 |
10 | 4.0.0
11 |
12 | table-repartition-${spark.binary.version}_${scala.binary.version}
13 |
14 |
15 |
--------------------------------------------------------------------------------
/table-repartition/README.md:
--------------------------------------------------------------------------------
1 | ## Install
2 |
3 | ```sql
4 | !plugin et add - "table-repartition-2.4";
5 | ```
6 |
7 | ## Usage
8 |
9 | ```sql
10 | set rawText='''
11 | {"id":9,"content":"Spark好的语言1","label":0.0}
12 | {"id":10,"content":"MLSQL是一个好的语言7","label":0.0}
13 | {"id":13,"content":"MLSQL是一个好的语言7","label":0.0}
14 | ''';
15 |
16 | load jsonStr.`rawText` as orginal_text_corpus;
17 |
18 | select id,content,label from orginal_text_corpus as orginal_text_corpus1;
19 | run orginal_text_corpus1 as TableRepartition.`` where partitionNum="2" and partitionType="range" and partitionCols="id"
20 | as newtable;
21 | ```
--------------------------------------------------------------------------------
/table-repartition/desc.plugin:
--------------------------------------------------------------------------------
1 | moduleName=table-repartition-3.0
2 | mainClass=tech.mlsql.plugins.et.TableRepartition
3 | scala_version=2.12
4 | spark_version=3.0
5 | version=0.1.0-SNAPSHOT
6 | author=allwefantasy
7 | mlsqlVersions="1.5.0,1.5.0-SNAPSHOT,1.6.0,1.6.0-SNAPSHOT"
8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/table-repartition
9 | mlsqlPluginType=et
10 | desc=wow
--------------------------------------------------------------------------------
/table-repartition/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | mlsql-plugins-3.0_2.12
7 | tech.mlsql
8 | 0.1.0-SNAPSHOT
9 |
10 | 4.0.0
11 |
12 | table-repartition-${spark.binary.version}_${scala.binary.version}
13 |
14 |
15 |
--------------------------------------------------------------------------------
/table-repartition/src/main/java/tech/mlsql/plugins/et/TableRepartition.scala:
--------------------------------------------------------------------------------
1 | package tech.mlsql.plugins.et
2 |
3 | import org.apache.spark.ml.param.{IntParam, Param}
4 | import org.apache.spark.sql.expressions.UserDefinedFunction
5 | import org.apache.spark.sql.mlsql.session.MLSQLException
6 | import org.apache.spark.sql.{DataFrame, SparkSession, functions => F}
7 | import streaming.dsl.auth.TableAuthResult
8 | import streaming.dsl.mmlib._
9 | import streaming.dsl.mmlib.algs.Functions
10 | import streaming.dsl.mmlib.algs.param.{BaseParams, WowParams}
11 | import tech.mlsql.dsl.auth.ETAuth
12 | import tech.mlsql.dsl.auth.dsl.mmlib.ETMethod.ETMethod
13 | import tech.mlsql.version.VersionCompatibility
14 |
15 |
16 | class TableRepartition(override val uid: String) extends SQLAlg with VersionCompatibility with Functions with WowParams with ETAuth {
17 | def this() = this(BaseParams.randomUID())
18 |
19 | //
20 | override def train(df: DataFrame, path: String, params: Map[String, String]): DataFrame = {
21 |
22 | params.get(partitionNum.name).map { item =>
23 | set(partitionNum, item.toInt)
24 | item
25 | }.getOrElse {
26 | throw new MLSQLException(s"${partitionNum.name} is required")
27 | }
28 |
29 | params.get(partitionType.name).map { item =>
30 | set(partitionType, item)
31 | item
32 | }.getOrElse {
33 | set(partitionType, "hash")
34 | }
35 |
36 | params.get(partitionCols.name).map { item =>
37 | set(partitionCols, item)
38 | item
39 | }.getOrElse {
40 | set(partitionCols, "")
41 | }
42 |
43 | $(partitionType) match {
44 | case "range" =>
45 |
46 | require(params.contains(partitionCols.name), "At least one partition-by expression must be specified.")
47 | df.repartitionByRange($(partitionNum), $(partitionCols).split(",").map(name => F.col(name)): _*)
48 |
49 | case _ =>
50 | df.repartition($(partitionNum))
51 | }
52 |
53 |
54 | }
55 |
56 | override def auth(etMethod: ETMethod, path: String, params: Map[String, String]): List[TableAuthResult] = {
57 | List()
58 | }
59 |
60 | override def supportedVersions: Seq[String] = {
61 | Seq("1.5.0-SNAPSHOT", "1.5.0", "1.6.0-SNAPSHOT", "1.6.0")
62 | }
63 |
64 |
65 | override def doc: Doc = Doc(MarkDownDoc,
66 | s"""
67 | |
68 | """.stripMargin)
69 |
70 |
71 | override def codeExample: Code = Code(SQLCode,
72 | """
73 | |
74 | """.stripMargin)
75 |
76 | override def batchPredict(df: DataFrame, path: String, params: Map[String, String]): DataFrame = train(df, path, params)
77 |
78 | override def load(sparkSession: SparkSession, path: String, params: Map[String, String]): Any = ???
79 |
80 | override def predict(sparkSession: SparkSession, _model: Any, name: String, params: Map[String, String]): UserDefinedFunction = ???
81 |
82 | final val partitionNum: IntParam = new IntParam(this, "partitionNum",
83 | "")
84 | final val partitionType: Param[String] = new Param[String](this, "partitionType",
85 | "")
86 |
87 | final val partitionCols: Param[String] = new Param[String](this, "partitionCols",
88 | "")
89 |
90 | override def explainParams(sparkSession: SparkSession): DataFrame = _explainParams(sparkSession)
91 |
92 | }
93 |
--------------------------------------------------------------------------------