├── .gitignore
├── .repo
    └── pom.template.xml
├── README.md
├── binlog2delta
    ├── .DS_Store
    ├── .repo
    │   ├── desc.template.plugin
    │   └── pom.template.xml
    ├── README.md
    ├── desc.plugin
    ├── pom.xml
    └── src
    │   ├── .DS_Store
    │   └── main
    │       ├── .DS_Store
    │       ├── java
    │           ├── .DS_Store
    │           └── tech
    │           │   └── mlsql
    │           │       └── plugins
    │           │           └── binlog2delta
    │           │               └── JavaDoc.java
    │       └── resources
    │           ├── main.mlsql
    │           └── plugin.json
├── connect-persist
    ├── .repo
    │   ├── desc.template.plugin
    │   └── pom.template.xml
    ├── README.md
    ├── db.sql
    ├── desc.plugin
    ├── pom.xml
    └── src
    │   └── main
    │       └── java
    │           └── tech
    │               └── mlsql
    │                   └── plugins
    │                       ├── et
    │                           └── ConnectPersistCommand.scala
    │                       └── mllib
    │                           └── ConnectPersistApp.scala
├── delta-enhancer
    ├── .repo
    │   └── pom.template.xml
    ├── README.md
    ├── desc.plugin
    ├── pom.xml
    └── src
    │   └── main
    │       └── java
    │           └── tech
    │               └── mlsql
    │                   └── plugin
    │                       └── et
    │                           └── DeltaCommand.scala
├── desktop-publish.sh
├── dev
    ├── change-scala-version.sh
    ├── change-version-to-2.11.sh
    └── change-version-to-2.12.sh
├── ds-hbase-2x
    ├── .repo
    │   ├── desc.template.plugin
    │   └── pom.template.xml
    ├── README.md
    ├── desc.plugin
    ├── pom.xml
    └── src
    │   └── main
    │       └── java
    │           ├── org
    │               └── apache
    │               │   └── spark
    │               │       └── sql
    │               │           └── execution
    │               │               └── datasources
    │               │                   └── hbase2x
    │               │                       ├── DefaultSource.scala
    │               │                       ├── HBaseConfBuilder.scala
    │               │                       ├── HBaseType.scala
    │               │                       ├── JavaDoc.java
    │               │                       └── SparkHBaseConf.scala
    │           └── tech
    │               └── mlsql
    │                   └── plugins
    │                       └── ds
    │                           └── MLSQLHBase2x.scala
├── echo-controller
    ├── .repo
    │   └── pom.template.xml
    ├── README.md
    ├── desc.plugin
    ├── pom.xml
    └── src
    │   └── main
    │       └── java
    │           └── tech
    │               └── mlsql
    │                   └── plugins
    │                       └── mllib
    │                           └── echocontroller
    │                               └── StreamApp.scala
├── install-all.sh
├── install.sh
├── last-command
    ├── .repo
    │   └── pom.template.xml
    ├── README.md
    ├── desc.plugin
    ├── pom.xml
    └── src
    │   └── main
    │       └── java
    │           └── tech
    │               └── mlsql
    │                   └── plugins
    │                       └── et
    │                           └── LastCommand.scala
├── mlsql-analysis-toolkit
    ├── .repo
    │   └── pom.template.xml
    ├── README.md
    ├── desc.plugin
    ├── pom.xml
    └── src
    │   └── main
    │       └── java
    │           └── tech
    │               └── mlsql
    │                   └── plugins
    │                       └── analysis
    │                           ├── AnalysisApp.scala
    │                           ├── ApproxQuantile.scala
    │                           └── DFTool.scala
├── mlsql-assert
    ├── .repo
    │   ├── desc.template.plugin
    │   └── pom.template.xml
    ├── README.md
    ├── desc.plugin
    ├── pom.xml
    └── src
    │   └── main
    │       └── java
    │           └── tech
    │               └── mlsql
    │                   └── plugins
    │                       └── assert
    │                           ├── app
    │                               └── MLSQLAssert.scala
    │                           └── ets
    │                               ├── Assert.scala
    │                               └── MLSQLThrow.scala
├── mlsql-bigdl
    ├── .repo
    │   ├── desc.template.plugin
    │   └── pom.template.xml
    ├── README.md
    ├── desc.plugin
    ├── pom.xml
    └── src
    │   └── main
    │       └── java
    │           ├── com
    │               └── intel
    │               │   └── analytics
    │               │       └── bigdl
    │               │           └── visualization
    │               │               ├── LogTrainSummary.scala
    │               │               └── WowFileWriter.scala
    │           └── tech
    │               └── mlsql
    │                   └── plugins
    │                       └── bigdl
    │                           ├── BigDLApp.scala
    │                           ├── BigDLFunctions.scala
    │                           ├── OptimizeParamExtractor.scala
    │                           ├── SQLBigDLClassifyExt.scala
    │                           ├── SQLImageLoaderExt.scala
    │                           ├── SQLLeNet5Ext.scala
    │                           ├── SQLMnistLoaderExt.scala
    │                           ├── WowClassNLLCriterion.scala
    │                           └── WowLoggerFilter.scala
├── mlsql-canal
    ├── .repo
    │   ├── desc.template.plugin
    │   └── pom.template.xml
    ├── README.md
    ├── desc.plugin
    ├── pom.xml
    └── src
    │   └── main
    │       └── scala
    │           └── tech
    │               └── mlsql
    │                   └── plugins
    │                       └── canal
    │                           ├── CanalApp.scala
    │                           ├── ets
    │                               └── BinlogToDelta.scala
    │                           ├── mysql
    │                               ├── JdbcTypeParser.scala
    │                               ├── MysqlType.java
    │                               └── statement
    │                               │   └── DDLStatementParser.scala
    │                           ├── sink
    │                               ├── BinlogConstants.scala
    │                               ├── BinlogWritter.scala
    │                               ├── DeltaSink.scala
    │                               └── Sink.scala
    │                           └── util
    │                               └── JacksonUtil.scala
├── mlsql-cli
    ├── .repo
    │   ├── desc.template.plugin
    │   └── pom.template.xml
    ├── desc.plugin
    ├── pom.xml
    └── src
    │   └── main
    │       └── java
    │           └── tech
    │               └── mlsql
    │                   └── plugin
    │                       └── cli
    │                           └── app
    │                               ├── CliCommands.java
    │                               ├── CliException.java
    │                               ├── CliExceptionUtils.java
    │                               ├── MLSQLCli.java
    │                               └── MLSQLCmd.java
├── mlsql-cube
    ├── .repo
    │   ├── desc.template.plugin
    │   └── pom.template.xml
    ├── desc.plugin
    └── pom.xml
├── mlsql-ds
    ├── .repo
    │   ├── desc.template.plugin
    │   └── pom.template.xml
    ├── desc.plugin
    ├── pom.xml
    └── src
    │   └── main
    │       └── java
    │           └── tech
    │               └── mlsql
    │                   └── plugins
    │                       └── ds
    │                           └── app
    │                               ├── MLSQLDs.scala
    │                               └── MLSQLXml.scala
├── mlsql-excel
    ├── .repo
    │   ├── desc.template.plugin
    │   └── pom.template.xml
    ├── README.md
    ├── desc.plugin
    ├── pom.xml
    └── src
    │   └── main
    │       └── java
    │           ├── com
    │               └── crealytics
    │               │   └── spark
    │               │       └── excel
    │               │           ├── DataColumn.scala
    │               │           ├── DataLocator.scala
    │               │           ├── DefaultSource.scala
    │               │           ├── DefaultSource15.scala
    │               │           ├── ExcelFileSaver.scala
    │               │           ├── ExcelRelation.scala
    │               │           ├── InferSchema.scala
    │               │           ├── PlainNumberFormat.scala
    │               │           ├── Utils.scala
    │               │           ├── WorkbookReader.scala
    │               │           └── package.scala
    │           └── tech
    │               └── mlsql
    │                   └── plugins
    │                       └── ds
    │                           └── MLSQLExcel.scala
├── mlsql-ext-ets
    ├── .repo
    │   ├── desc.template.plugin
    │   └── pom.template.xml
    ├── desc.plugin
    ├── pom.xml
    └── src
    │   └── main
    │       └── java
    │           └── tech
    │               └── mlsql
    │                   └── plugins
    │                       └── ext
    │                           └── ets
    │                               └── app
    │                                   └── MLSQLETApp.scala
├── mlsql-ke
    ├── .repo
    │   ├── desc.template.plugin
    │   └── pom.template.xml
    ├── desc.plugin
    ├── pom.xml
    └── src
    │   └── main
    │       └── java
    │           └── tech
    │               └── mlsql
    │                   └── plugins
    │                       └── ke
    │                           ├── app
    │                               └── MLSQLKE.scala
    │                           └── ets
    │                               ├── KEAPISchedule.scala
    │                               ├── KEAutoModel.scala
    │                               └── KEBuildSegment.scala
├── mlsql-language-server
    ├── .repo
    │   ├── desc.template.plugin
    │   └── pom.template.xml
    ├── build.sh
    ├── desc.plugin
    ├── pom.xml
    └── src
    │   └── main
    │       └── java
    │           └── tech
    │               └── mlsql
    │                   └── plugins
    │                       └── langserver
    │                           ├── AutoSuggestWrapper.scala
    │                           ├── FileTracker.java
    │                           ├── LSContext.java
    │                           ├── MLSQLDocumentService.java
    │                           ├── MLSQLLanguageServer.java
    │                           ├── MLSQLWorkspaceService.java
    │                           ├── commons
    │                               └── client
    │                               │   ├── Message.java
    │                               │   └── TraceRecord.java
    │                           └── launchers
    │                               └── stdio
    │                                   ├── Launcher.java
    │                                   └── MLSQLDesktopApp.scala
├── mlsql-mllib
    ├── .repo
    │   ├── desc.template.plugin
    │   └── pom.template.xml
    ├── README.md
    ├── desc.plugin
    ├── pom.xml
    └── src
    │   └── main
    │       └── java
    │           └── tech
    │               └── mlsql
    │                   └── plugins
    │                       └── mllib
    │                           ├── app
    │                               └── MLSQLMllib.scala
    │                           └── ets
    │                               ├── AutoMLExt.scala
    │                               ├── ClassificationEvaluator.scala
    │                               ├── ColumnsExt.scala
    │                               ├── PluginBaseETAuth.scala
    │                               ├── RegressionEvaluator.scala
    │                               ├── SampleDatasetExt.scala
    │                               └── TakeRandomSampleExt.scala
├── mlsql-shell
    ├── .repo
    │   ├── desc.template.plugin
    │   └── pom.template.xml
    ├── README.md
    ├── desc.plugin
    ├── pom.xml
    └── src
    │   └── main
    │       └── java
    │           └── tech
    │               └── mlsql
    │                   └── plugins
    │                       └── shell
    │                           ├── app
    │                               └── MLSQLShell.scala
    │                           └── ets
    │                               ├── CopyFromLocal.scala
    │                               └── ShellExecute.scala
├── pom.xml
├── run-script
    ├── .repo
    │   └── pom.template.xml
    ├── README.md
    ├── desc.plugin
    ├── pom.xml
    └── src
    │   └── main
    │       └── java
    │           └── tech
    │               └── mlsql
    │                   └── plugins
    │                       └── et
    │                           └── RunScript.scala
├── save-then-load
    ├── .repo
    │   └── pom.template.xml
    ├── README.md
    ├── desc.plugin
    ├── pom.xml
    └── src
    │   └── main
    │       └── java
    │           └── tech
    │               └── mlsql
    │                   └── plugins
    │                       └── et
    │                           └── SaveThenLoad.scala
├── stream-persist
    ├── .repo
    │   ├── desc.template.plugin
    │   └── pom.template.xml
    ├── README.md
    ├── db.sql
    ├── desc.plugin
    ├── pom.xml
    └── src
    │   └── main
    │       └── java
    │           └── tech
    │               └── mlsq
    │                   └── streambootstrapatstartup
    │                       ├── StreamApp.scala
    │                       └── StreamPersistCommand.scala
└── table-repartition
    ├── .repo
        ├── desc.template.plugin
        └── pom.template.xml
    ├── README.md
    ├── desc.plugin
    ├── pom.xml
    └── src
        └── main
            └── java
                └── tech
                    └── mlsql
                        └── plugins
                            └── et
                                └── TableRepartition.scala


/.gitignore:
--------------------------------------------------------------------------------
1 | .idea
2 | *.iml
3 | target
4 | .DS_Store
5 | /**/build
6 | 
7 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # mlsql-plugins
 2 | 
 3 | This project is a collection of plugins for MLSQL.
 4 | Please check every module in project for more detail.
 5 | 
 6 | ## Build Shade Jar
 7 | 
 8 | Requirements:
 9 | 
10 | 1. Python >= 3.6
11 | 2. Maven >= 3.0
12 | 
13 | You can install [mlsql_plugin_tool](https://github.com/allwefantasy/mlsql_plugin_tool) to build module in this project.
14 | 
15 | Install command:
16 | 
17 | ```
18 | pip install mlsql_plugin_tool
19 | ```
20 | 
21 | Build shard jar comamnd:
22 | 
23 | ```
24 | mlsql_plugin_tool build --module_name xxxxx --spark spark243
25 | ```
26 | 
27 | 1. spark: two options are avaiable, spark243, spark311
28 | 2. module_name e.g mlsql-excel, ds-hbase-2x
29 | 
30 | Once build success, the system will show message like fowllowing:
31 | 
32 | ```
33 | 
34 | ====Build success!=====
35 |  File location 0：
36 |  /Users/allwefantasy/Volumes/Samsung_T5/allwefantasy/CSDNWorkSpace/mlsqlplugins/ds-hbase-2x/target/ds-hbase-2x-2.4_2.11-0.1.0-SNAPSHOT.jar
37 | 
38 | ```
39 | 
40 | Then you can install this plugin(jar file) in [MLSQL Engine](https://docs.mlsql.tech/mlsql-stack/plugin/offline_install.html)
41 | 
42 | ## Plugins which Both Support Spark 2.4.3/3.1.1
43 | 
44 | 1. binlog2delta
45 | 2. connect-persist
46 | 3. ds-hbase-2x
47 | 4. mlsql-bigdl
48 | 5. mlsql-excel
49 | 6. stream-persist
50 | 7. mlsql-mllib


--------------------------------------------------------------------------------
/binlog2delta/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allwefantasy/mlsql-plugins/17e1a380d823a443b2503d883a9f1e50aeb832cf/binlog2delta/.DS_Store


--------------------------------------------------------------------------------
/binlog2delta/.repo/desc.template.plugin:
--------------------------------------------------------------------------------
1 | mainClass=-
2 | version=0.1.0-SNAPSHOT
3 | author=allwefantasy
4 | mlsqlVersions="1.5.0,1.5.0-SNAPSHOT,1.6.0,1.6.0-SNAPSHOT"
5 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/binlog2delta
6 | scala_version=2.11
7 | mlsqlPluginType=script
8 | desc=wow
9 | 


--------------------------------------------------------------------------------
/binlog2delta/.repo/pom.template.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
 3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 5 |     <parent>
 6 |         <artifactId>mlsql-plugins-{{spark_binary_version}}_{{scala_binary_version}}</artifactId>
 7 |         <groupId>tech.mlsql</groupId>
 8 |         <version>0.1.0-SNAPSHOT</version>
 9 |     </parent>
10 |     <modelVersion>4.0.0</modelVersion>
11 | 
12 |     <artifactId>binlog2delta_${scala.binary.version}</artifactId>
13 | 
14 | 
15 | </project>


--------------------------------------------------------------------------------
/binlog2delta/README.md:
--------------------------------------------------------------------------------
 1 | ## Install command:
 2 | 
 3 | ```
 4 | !plugin script add - binlog2delta;
 5 | ```
 6 | 
 7 | ## Usage
 8 | 
 9 | ```sql
10 | set checkpointLocation="/tmp/cpl-binlog-m";
11 | include plugin.`binlog2delta`;
12 | ```
13 | 
14 | Here are parameter you can set before include the plugin:
15 | 
16 | ```sql
17 | set streamName="binlog";
18 | 
19 | set host="127.0.0.1";
20 | set port="3306";
21 | set userName="root";
22 | set password="mlsql";
23 | set bingLogNamePrefix="mysql-bin";
24 | set binlogIndex="1";
25 | set binlogFileOffset="4";
26 | set databaseNamePattern="mlsql_console";
27 | set tableNamePattern="script_file";
28 | 
29 | set deltaTableHome="/tmp/binlog2delta";
30 | set idCols="id";
31 | set duration="10";
32 | set checkpointLocation="/tmp/ck-binlog2delta";
33 | 
34 | ``` 
35 | 
36 | ## Check the content in plugin
37 | 
38 | ```sql
39 | !plugin script show binlog2delta/plugin.json;
40 | ```
41 | 
42 | or
43 | 
44 | ```sql
45 | !plugin script show binlog2delta/main.json;
46 | ``` 
47 | 
48 | 


--------------------------------------------------------------------------------
/binlog2delta/desc.plugin:
--------------------------------------------------------------------------------
1 | mainClass=-
2 | version=0.1.0-SNAPSHOT
3 | author=allwefantasy
4 | mlsqlVersions="1.5.0,1.5.0-SNAPSHOT,1.6.0,1.6.0-SNAPSHOT"
5 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/binlog2delta
6 | scala_version=2.11
7 | mlsqlPluginType=script
8 | desc=wow


--------------------------------------------------------------------------------
/binlog2delta/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
 3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 5 |     <parent>
 6 |         <artifactId>mlsql-plugins-3.0_2.12</artifactId>
 7 |         <groupId>tech.mlsql</groupId>
 8 |         <version>0.1.0-SNAPSHOT</version>
 9 |     </parent>
10 |     <modelVersion>4.0.0</modelVersion>
11 | 
12 |     <artifactId>binlog2delta_${scala.binary.version}</artifactId>
13 | 
14 | 
15 | </project>


--------------------------------------------------------------------------------
/binlog2delta/src/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allwefantasy/mlsql-plugins/17e1a380d823a443b2503d883a9f1e50aeb832cf/binlog2delta/src/.DS_Store


--------------------------------------------------------------------------------
/binlog2delta/src/main/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allwefantasy/mlsql-plugins/17e1a380d823a443b2503d883a9f1e50aeb832cf/binlog2delta/src/main/.DS_Store


--------------------------------------------------------------------------------
/binlog2delta/src/main/java/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allwefantasy/mlsql-plugins/17e1a380d823a443b2503d883a9f1e50aeb832cf/binlog2delta/src/main/java/.DS_Store


--------------------------------------------------------------------------------
/binlog2delta/src/main/java/tech/mlsql/plugins/binlog2delta/JavaDoc.java:
--------------------------------------------------------------------------------
1 | package tech.mlsql.plugins.binlog2delta;
2 | 
3 | /**
4 |  * 2019-09-16 WilliamZhu(allwefantasy@gmail.com)
5 |  */
6 | public class JavaDoc {
7 | }
8 | 


--------------------------------------------------------------------------------
/binlog2delta/src/main/resources/main.mlsql:
--------------------------------------------------------------------------------
 1 | 
 2 | set streamName="binlog" where  type="defaultParam";
 3 | 
 4 | set host="127.0.0.1"  where  type="defaultParam";
 5 | set port="3306"  where  type="defaultParam";
 6 | set userName="root"  where  type="defaultParam";
 7 | set password="mlsql"  where  type="defaultParam";
 8 | set bingLogNamePrefix="mysql-bin"  where  type="defaultParam";
 9 | set binlogIndex="1"  where  type="defaultParam";
10 | set binlogFileOffset="4"  where  type="defaultParam";
11 | set databaseNamePattern="mlsql_console"  where  type="defaultParam";
12 | set tableNamePattern="script_file"  where  type="defaultParam";
13 | 
14 | set deltaTableHome="/tmp/binlog2delta"  where  type="defaultParam";
15 | set idCols="id"  where  type="defaultParam";
16 | set duration="10"  where  type="defaultParam";
17 | set checkpointLocation="/tmp/ck-binlog2delta"  where  type="defaultParam";
18 | 
19 | set dbPrefix="mysql"  where  type="defaultParam";
20 | 
21 | 
22 | load binlog.`` where
23 | host="${host}"
24 | and port="${port}"
25 | and userName="${userName}"
26 | and password="${password}"
27 | and bingLogNamePrefix="${bingLogNamePrefix}"
28 | and binlogIndex="${binlogIndex}"
29 | and binlogFileOffset="${binlogFileOffset}"
30 | and databaseNamePattern="${databaseNamePattern}"
31 | and tableNamePattern="${tableNamePattern}"
32 | as binlogTable;
33 | 
34 | save append binlogTable
35 | as rate.`${dbPrefix}_{db}.{table}`
36 | options mode="Append"
37 | and idCols="${idCols}"
38 | and syncType="binlog"
39 | and duration="${duration}"
40 | and checkpointLocation="${checkpointLocation}";


--------------------------------------------------------------------------------
/connect-persist/.repo/desc.template.plugin:
--------------------------------------------------------------------------------
 1 | moduleName=connect-persist-app-{{spark_binary_version}}
 2 | mainClass=tech.mlsql.plugins.app.ConnectPersistApp
 3 | scala_version={{scala_binary_version}}
 4 | spark_version={{spark_binary_version}}
 5 | version=0.1.0-SNAPSHOT
 6 | author=allwefantasy
 7 | mlsqlVersions="1.5.0,1.5.0-SNAPSHOT,1.6.0,1.6.0-SNAPSHOT"
 8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/stream-persist
 9 | mlsqlPluginType=app
10 | desc=wow
11 | 


--------------------------------------------------------------------------------
/connect-persist/.repo/pom.template.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
 3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 5 |     <parent>
 6 |         <artifactId>mlsql-plugins-{{spark_binary_version}}_{{scala_binary_version}}</artifactId>
 7 |         <groupId>tech.mlsql</groupId>
 8 |         <version>0.1.0-SNAPSHOT</version>
 9 |     </parent>
10 |     <modelVersion>4.0.0</modelVersion>
11 | 
12 |     <artifactId>connect-persist-${spark.binary.version}_${scala.binary.version}</artifactId>
13 | 
14 | </project>


--------------------------------------------------------------------------------
/connect-persist/README.md:
--------------------------------------------------------------------------------
 1 | ## Install command:
 2 | 
 3 | ```
 4 | !plugin app add - 'connect-persist-app-2.4';
 5 | ```
 6 | 
 7 | > Notice:
 8 | > If you set MLSQL meta store as MySQL, you should import db.sql file into  
 9 | > your meta database.
10 | 
11 | ## Usage
12 | 
13 | Use ET Plugin to persist stream job.
14 | 
15 | ```sql
16 | !connectPersist;
17 | ```
18 | 
19 | And then once MLSQL Engine is restarted, and the connect info will be 
20 | restored at the startup of MLSQL.
21 | 
22 | 
23 | 
24 | 


--------------------------------------------------------------------------------
/connect-persist/db.sql:
--------------------------------------------------------------------------------
1 | CREATE TABLE `w_connect_table` (
2 |   `id` int(11) unsigned NOT NULL AUTO_INCREMENT,
3 |   `format` varchar(256) DEFAULT NULL,
4 |   `db` varchar(256) DEFAULT NULL,
5 |   `options` text,
6 |   PRIMARY KEY (`id`)
7 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8;


--------------------------------------------------------------------------------
/connect-persist/desc.plugin:
--------------------------------------------------------------------------------
 1 | moduleName=connect-persist-app-3.0
 2 | mainClass=tech.mlsql.plugins.app.ConnectPersistApp
 3 | scala_version=2.12
 4 | spark_version=3.0
 5 | version=0.1.0-SNAPSHOT
 6 | author=allwefantasy
 7 | mlsqlVersions="1.5.0,1.5.0-SNAPSHOT,1.6.0,1.6.0-SNAPSHOT"
 8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/stream-persist
 9 | mlsqlPluginType=app
10 | desc=wow


--------------------------------------------------------------------------------
/connect-persist/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
 3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 5 |     <parent>
 6 |         <artifactId>mlsql-plugins-3.0_2.12</artifactId>
 7 |         <groupId>tech.mlsql</groupId>
 8 |         <version>0.1.0-SNAPSHOT</version>
 9 |     </parent>
10 |     <modelVersion>4.0.0</modelVersion>
11 | 
12 |     <artifactId>connect-persist-${spark.binary.version}_${scala.binary.version}</artifactId>
13 | 
14 | </project>


--------------------------------------------------------------------------------
/connect-persist/src/main/java/tech/mlsql/plugins/et/ConnectPersistCommand.scala:
--------------------------------------------------------------------------------
 1 | package tech.mlsql.plugins.et
 2 | 
 3 | import java.util.concurrent.ConcurrentHashMap
 4 | 
 5 | import org.apache.spark.sql.expressions.UserDefinedFunction
 6 | import org.apache.spark.sql.{DataFrame, SparkSession}
 7 | import streaming.dsl.{ConnectMeta, DBMappingKey}
 8 | import streaming.dsl.auth.TableAuthResult
 9 | import streaming.dsl.mmlib._
10 | import streaming.dsl.mmlib.algs.Functions
11 | import streaming.dsl.mmlib.algs.param.{BaseParams, WowParams}
12 | import tech.mlsql.common.utils.classloader.ClassLoaderTool
13 | import tech.mlsql.common.utils.lang.sc.ScalaReflect
14 | import tech.mlsql.dsl.auth.ETAuth
15 | import tech.mlsql.dsl.auth.dsl.mmlib.ETMethod.ETMethod
16 | import tech.mlsql.store.DBStore
17 | import tech.mlsql.version.VersionCompatibility
18 | 
19 | import scala.collection.JavaConverters._
20 | 
21 | /**
22 |  * 15/1/2020 WilliamZhu(allwefantasy@gmail.com)
23 |  */
24 | class ConnectPersistCommand(override val uid: String) extends SQLAlg with VersionCompatibility with Functions with WowParams with ETAuth {
25 |   def this() = this(BaseParams.randomUID())
26 | 
27 | 
28 |   override def train(df: DataFrame, path: String, params: Map[String, String]): DataFrame = {
29 |     val session = df.sparkSession
30 |     val dbMapping = ConnectMeta.toMap
31 |     val items = dbMapping.toList.map(f => ConnectMetaItem(f._1.format, f._1.db, f._2))
32 |     import session.implicits._
33 |     val newdf = session.createDataset[ConnectMetaItem](items).toDF()
34 |     DBStore.store.saveTable(session, newdf, ConnectPersistMeta.connectTableName, Option("format,db"), false)
35 |     newdf
36 |   }
37 | 
38 |   override def auth(etMethod: ETMethod, path: String, params: Map[String, String]): List[TableAuthResult] = {
39 |     List()
40 |   }
41 | 
42 |   override def supportedVersions: Seq[String] = {
43 |     Seq("1.5.0-SNAPSHOT", "1.5.0", "1.6.0-SNAPSHOT", "1.6.0")
44 |   }
45 | 
46 | 
47 |   override def doc: Doc = Doc(MarkDownDoc,
48 |     s"""
49 |        |
50 |        |```
51 |        |${codeExample.code}
52 |        |```
53 |     """.stripMargin)
54 | 
55 | 
56 |   override def codeExample: Code = Code(SQLCode,
57 |     """
58 |       |example
59 |     """.stripMargin)
60 | 
61 |   override def batchPredict(df: DataFrame, path: String, params: Map[String, String]): DataFrame = train(df, path, params)
62 | 
63 |   override def load(sparkSession: SparkSession, path: String, params: Map[String, String]): Any = ???
64 | 
65 |   override def predict(sparkSession: SparkSession, _model: Any, name: String, params: Map[String, String]): UserDefinedFunction = ???
66 | 
67 | }
68 | 
69 | object ConnectPersistMeta {
70 |   def connectTableName = "__mlsql__.connect_table"
71 | }
72 | 
73 | case class ConnectMetaItem(format: String, db: String, options: Map[String, String])
74 | 


--------------------------------------------------------------------------------
/connect-persist/src/main/java/tech/mlsql/plugins/mllib/ConnectPersistApp.scala:
--------------------------------------------------------------------------------
 1 | package tech.mlsql.plugins.mllib
 2 | 
 3 | import _root_.streaming.core.strategy.platform.{PlatformManager, SparkRuntime}
 4 | import _root_.streaming.dsl._
 5 | import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}
 6 | import tech.mlsql.common.utils.log.Logging
 7 | import tech.mlsql.datalake.DataLake
 8 | import tech.mlsql.dsl.CommandCollection
 9 | import tech.mlsql.ets.register.ETRegister
10 | import tech.mlsql.plugins.et.{ConnectMetaItem, ConnectPersistCommand, ConnectPersistMeta}
11 | import tech.mlsql.store.DBStore
12 | import tech.mlsql.version.VersionCompatibility
13 | 
14 | /**
15 |  * 15/1/2020 WilliamZhu(allwefantasy@gmail.com)
16 |  */
17 | class ConnectPersistApp extends tech.mlsql.app.App with VersionCompatibility with Logging {
18 |   override def run(args: Seq[String]): Unit = {
19 |     val root = runtime.sparkSession
20 |     import root.implicits._
21 | 
22 |     ETRegister.register("ConnectPersistCommand", classOf[ConnectPersistCommand].getName)
23 |     CommandCollection.refreshCommandMapping(Map("connectPersist" -> "ConnectPersistCommand"))
24 | 
25 |     val streams = DBStore.store.tryReadTable(root, ConnectPersistMeta.connectTableName, () => root.createDataset[ConnectMetaItem](Seq()).toDF())
26 |     streams.as[ConnectMetaItem].collect().foreach { item =>
27 |       logInfo(s"load connect statement format: ${item.format} db:${item.db}")
28 |       ConnectMeta.options(DBMappingKey(item.format, item.db), item.options)
29 |     }
30 |   }
31 | 
32 |   def runtime = {
33 |     PlatformManager.getRuntime.asInstanceOf[SparkRuntime]
34 |   }
35 | 
36 |   override def supportedVersions: Seq[String] = Seq("1.5.0-SNAPSHOT", "1.5.0", "1.6.0-SNAPSHOT", "1.6.0")
37 | }
38 | 
39 | 


--------------------------------------------------------------------------------
/delta-enhancer/.repo/pom.template.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
 3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 5 |     <parent>
 6 |         <artifactId>mlsql-plugins-{{spark_binary_version}}_{{scala_binary_version}}</artifactId>
 7 |         <groupId>tech.mlsql</groupId>
 8 |         <version>0.1.0-SNAPSHOT</version>
 9 |     </parent>
10 |     <modelVersion>4.0.0</modelVersion>
11 | 
12 |     <artifactId>delta-enhancer-${spark.binary.version}_${scala.binary.version}</artifactId>
13 |     <dependencies>
14 |         <dependency>
15 |             <groupId>tech.mlsql</groupId>
16 |             <artifactId>delta-plus_${scala.binary.version}</artifactId>
17 |             <version>${delta-plus.version}</version>
18 |             <scope>${scope}</scope>
19 |         </dependency>
20 |     </dependencies>
21 | 
22 | </project>


--------------------------------------------------------------------------------
/delta-enhancer/README.md:
--------------------------------------------------------------------------------
 1 | ## Install command:
 2 | 
 3 | ```
 4 | !plugin et add tech.mlsql.plugin.et.DeltaCommand delta-enhancer
 5 | named deltaEnhancer; 
 6 | ```
 7 | 
 8 | ## Usage
 9 | 
10 | ```sql
11 | !deltaEnhancer pruneDeletes __mlsql__.plugins 10000;
12 | ```
13 | 
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/delta-enhancer/desc.plugin:
--------------------------------------------------------------------------------
1 | tech.mlsql.plugin.et.DeltaCommand
2 | 


--------------------------------------------------------------------------------
/delta-enhancer/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
 3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 5 |     <parent>
 6 |         <artifactId>mlsql-plugins-3.0_2.12</artifactId>
 7 |         <groupId>tech.mlsql</groupId>
 8 |         <version>0.1.0-SNAPSHOT</version>
 9 |     </parent>
10 |     <modelVersion>4.0.0</modelVersion>
11 | 
12 |     <artifactId>delta-enhancer-${spark.binary.version}_${scala.binary.version}</artifactId>
13 |     <dependencies>
14 |         <dependency>
15 |             <groupId>tech.mlsql</groupId>
16 |             <artifactId>delta-plus_${scala.binary.version}</artifactId>
17 |             <version>${delta-plus.version}</version>
18 |             <scope>${scope}</scope>
19 |         </dependency>
20 |     </dependencies>
21 | 
22 | </project>


--------------------------------------------------------------------------------
/delta-enhancer/src/main/java/tech/mlsql/plugin/et/DeltaCommand.scala:
--------------------------------------------------------------------------------
 1 | package tech.mlsql.plugin.et
 2 | 
 3 | import io.delta.tables.DeltaTable
 4 | import org.apache.spark.sql.expressions.UserDefinedFunction
 5 | import org.apache.spark.sql.{DataFrame, SparkSession}
 6 | import streaming.dsl.mmlib.SQLAlg
 7 | import streaming.dsl.mmlib.algs.Functions
 8 | import streaming.dsl.mmlib.algs.param.{BaseParams, WowParams}
 9 | import tech.mlsql.common.utils.path.PathFun
10 | import tech.mlsql.common.utils.serder.json.JSONTool
11 | import tech.mlsql.datalake.DataLake
12 | import tech.mlsql.version.VersionCompatibility
13 | 
14 | /**
15 |   * 2019-09-11 WilliamZhu(allwefantasy@gmail.com)
16 |   */
17 | class DeltaCommand(override val uid: String) extends SQLAlg with VersionCompatibility with Functions with WowParams {
18 |   def this() = this(BaseParams.randomUID())
19 | 
20 | 
21 |   override def train(df: DataFrame, path: String, params: Map[String, String]): DataFrame = {
22 |     val spark = df.sparkSession
23 | 
24 |     def resolveRealPath(dataPath: String) = {
25 |       val dataLake = new DataLake(spark)
26 |       if (dataLake.isEnable) {
27 |         dataLake.identifyToPath(dataPath)
28 |       } else {
29 |         PathFun(path).add(dataPath).toPath
30 |       }
31 |     }
32 | 
33 | 
34 |     val command = JSONTool.parseJson[List[String]](params("parameters"))
35 |     command match {
36 |       case Seq("pruneDeletes", dataPath, howManyHoures, _*) =>
37 |         val deltaLog = DeltaTable.forPath(spark, resolveRealPath(dataPath))
38 |         deltaLog.vacuum(howManyHoures.toInt)
39 |     }
40 | 
41 |   }
42 | 
43 | 
44 |   override def supportedVersions: Seq[String] = {
45 |     Seq("1.5.0-SNAPSHOT", "1.5.0")
46 |   }
47 | 
48 |   override def batchPredict(df: DataFrame, path: String, params: Map[String, String]): DataFrame = train(df, path, params)
49 | 
50 |   override def load(sparkSession: SparkSession, path: String, params: Map[String, String]): Any = ???
51 | 
52 |   override def predict(sparkSession: SparkSession, _model: Any, name: String, params: Map[String, String]): UserDefinedFunction = ???
53 | 
54 | 
55 | }
56 | 


--------------------------------------------------------------------------------
/desktop-publish.sh:
--------------------------------------------------------------------------------
 1 | SOURCE=/Users/allwefantasy/Volumes/Samsung_T5/allwefantasy/CSDNWorkSpace/mlsqlplugins
 2 | TARGET=/Users/allwefantasy/projects/mlsql-desktop
 3 | #conda activate mlsql-plugin-tool
 4 | 
 5 | mods=${1:-mlsql-language-server mlsql-excel mlsql-assert mlsql-shell}
 6 | 
 7 | for mod in ${mods}
 8 | do
 9 |   echo "build= $mod"
10 |   mlsql_plugin_tool build --module_name ${mod} --spark spark311
11 | 
12 |   for os in linux mac win
13 |   do
14 |      cp ${SOURCE}/$mod/build/${mod}-3.0_2.12-0.1.0-SNAPSHOT.jar ${TARGET}/${os}/plugin
15 |   done
16 | done
17 | 
18 | #mlsql-language-server mlsql-excel mlsql-assert mlsql-shell
19 | 


--------------------------------------------------------------------------------
/dev/change-scala-version.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | #
 4 | # Licensed to the Apache Software Foundation (ASF) under one or more
 5 | # contributor license agreements.  See the NOTICE file distributed with
 6 | # this work for additional information regarding copyright ownership.
 7 | # The ASF licenses this file to You under the Apache License, Version 2.0
 8 | # (the "License"); you may not use this file except in compliance with
 9 | # the License.  You may obtain a copy of the License at
10 | #
11 | #    http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | #
19 | 
20 | set -e
21 | 
22 | VALID_VERSIONS=( 2.11 2.12 )
23 | 
24 | usage() {
25 |   echo "Usage: $(basename $0) [-h|--help] <version>
26 | where :
27 |   -h| --help Display this help text
28 |   valid version values : ${VALID_VERSIONS[*]}
29 | " 1>&2
30 |   exit 1
31 | }
32 | 
33 | if [[ ($# -ne 1) || ( $1 == "--help") ||  $1 == "-h" ]]; then
34 |   usage
35 | fi
36 | 
37 | TO_VERSION=$1
38 | 
39 | check_scala_version() {
40 |   for i in ${VALID_VERSIONS[*]}; do [ $i = "$1" ] && return 0; done
41 |   echo "Invalid Scala version: $1. Valid versions: ${VALID_VERSIONS[*]}" 1>&2
42 |   exit 1
43 | }
44 | 
45 | check_scala_version "$TO_VERSION"
46 | 
47 | if [ $TO_VERSION = "2.12" ]; then
48 |   FROM_VERSION="2.11"
49 | else
50 |   FROM_VERSION="2.12"
51 | fi
52 | 
53 | sed_i() {
54 |   sed -e "$1" "$2" > "$2.tmp" && mv "$2.tmp" "$2"
55 | }
56 | 
57 | export -f sed_i
58 | 
59 | BASEDIR=$(dirname $0)/..
60 | find "$BASEDIR" -name 'pom.xml' -not -path '*target*' -print \
61 |   -exec bash -c "sed_i 's/\(artifactId.*\)_'$FROM_VERSION'/\1_'$TO_VERSION'/g' {}" \;
62 | 
63 | # Also update <scala.binary.version> in parent POM
64 | # Match any scala binary version to ensure idempotency
65 | sed_i '1,/<scala\.binary\.version>[0-9]*\.[0-9]*</s/<scala\.binary\.version>[0-9]*\.[0-9]*</<scala.binary.version>'$TO_VERSION'</' \
66 |   "$BASEDIR/pom.xml"
67 | 
68 | # Update source of scaladocs
69 | # echo "$BASEDIR/docs/_plugins/copy_api_dirs.rb"
70 | # sed_i 's/scala\-'$FROM_VERSION'/scala\-'$TO_VERSION'/' "$BASEDIR/docs/_plugins/copy_api_dirs.rb"
71 | 


--------------------------------------------------------------------------------
/dev/change-version-to-2.11.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | #
 4 | # Licensed to the Apache Software Foundation (ASF) under one or more
 5 | # contributor license agreements.  See the NOTICE file distributed with
 6 | # this work for additional information regarding copyright ownership.
 7 | # The ASF licenses this file to You under the Apache License, Version 2.0
 8 | # (the "License"); you may not use this file except in compliance with
 9 | # the License.  You may obtain a copy of the License at
10 | #
11 | #    http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | #
19 | 
20 | # This script exists for backwards compability. Use change-scala-version.sh instead.
21 | echo "This script is deprecated. Please instead run: change-scala-version.sh 2.11"
22 | 
23 | $(dirname $0)/change-scala-version.sh 2.11
24 | 


--------------------------------------------------------------------------------
/dev/change-version-to-2.12.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | #
 4 | # Licensed to the Apache Software Foundation (ASF) under one or more
 5 | # contributor license agreements.  See the NOTICE file distributed with
 6 | # this work for additional information regarding copyright ownership.
 7 | # The ASF licenses this file to You under the Apache License, Version 2.0
 8 | # (the "License"); you may not use this file except in compliance with
 9 | # the License.  You may obtain a copy of the License at
10 | #
11 | #    http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | #
19 | 
20 | # This script exists for backwards compability. Use change-scala-version.sh instead.
21 | echo "This script is deprecated. Please instead run: change-scala-version.sh 2.10"
22 | 
23 | $(dirname $0)/change-scala-version.sh 2.12
24 | 


--------------------------------------------------------------------------------
/ds-hbase-2x/.repo/desc.template.plugin:
--------------------------------------------------------------------------------
 1 | moduleName=ds-hbase-2x-{{spark_binary_version}}
 2 | mainClass=tech.mlsql.plugins.ds.MLSQLHBase2x
 3 | scala_version={{scala_binary_version}}
 4 | spark_version={{spark_binary_version}}
 5 | version=0.1.0-SNAPSHOT
 6 | author=allwefantasy
 7 | mlsqlVersions="1.5.0,1.5.0-SNAPSHOT,1.6.0,1.6.0-SNAPSHOT"
 8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/ds-hbase-2x
 9 | mlsqlPluginType=ds
10 | desc=wow
11 | 
12 | 
13 | 
14 | 
15 | 
16 | 
17 | 
18 | 


--------------------------------------------------------------------------------
/ds-hbase-2x/README.md:
--------------------------------------------------------------------------------
 1 | ## Install
 2 | 
 3 | ```
 4 | !plugin ds add - ds-hbase-2x-2.4;
 5 | ```
 6 | 
 7 | ## Usage
 8 | 
 9 | DataFrame:
10 | 
11 | ```scala
12 | val data = (0 to 255).map { i =>
13 |       HBaseRecord(i, "extra")
14 |     }
15 | val tableName = "t1"
16 | val familyName = "c1"
17 | 
18 | 
19 | import spark.implicits._
20 | sc.parallelize(data).toDF.write
21 |   .options(Map(
22 |     "outputTableName" -> cat,
23 |     "family" -> family
24 |   ) ++ options)
25 |   .format("org.apache.spark.sql.execution.datasources.hbase2x")
26 |   .save()
27 |   
28 | val df = spark.read.format("org.apache.spark.sql.execution.datasources.hbase2x").options(
29 |   Map(
30 |     "inputTableName" -> tableName,
31 |     "family" -> familyName,
32 |     "field.type.col1" -> "BooleanType",
33 |     "field.type.col2" -> "DoubleType",
34 |     "field.type.col3" -> "FloatType",
35 |     "field.type.col4" -> "IntegerType",
36 |     "field.type.col5" -> "LongType",
37 |     "field.type.col6" -> "ShortType",
38 |     "field.type.col7" -> "StringType",
39 |     "field.type.col8" -> "ByteType"
40 |   )
41 | ).load() 
42 | ``` 
43 | 
44 | MLSQL: 
45 | 
46 | ```sql
47 | set rawText='''
48 | {"id":9,"content":"Spark好的语言1","label":0.0}
49 | {"id":10,"content":"MLSQL是一个好的语言7","label":0.0}
50 | {"id":12,"content":"MLSQL是一个好的语言7","label":0.0}
51 | ''';
52 | 
53 | load jsonStr.`rawText` as orginal_text_corpus;
54 | 
55 | select cast(id as String)  as rowkey,content,label from orginal_text_corpus as orginal_text_corpus1;
56 | 
57 | connect hbase2x where `zk`="127.0.0.1:2181"
58 | and `family`="cf" as hbase1;
59 | 
60 | save overwrite orginal_text_corpus1 
61 | as hbase2x.`hbase1:mlsql_example`;
62 | 
63 | load hbase2x.`hbase1:mlsql_example` where field.type.label="DoubleType"
64 | as mlsql_example ;
65 | 
66 | select * from mlsql_example as show_data;
67 | ```
68 | 
69 | You should configure parameters like `zookeeper.znode.parent`,`hbase.rootdir` according by 
70 | your HBase configuration.  
71 | 
72 | Parameters：
73 | 
74 | | Property Name  |  Meaning |
75 | |---|---|
76 | | tsSuffix |to overwrite hbase value's timestamp|
77 | |namespace|hbase namespace|
78 | | family |hbase family，family="" means load all existing families|
79 | | field.type.ck | specify type for ck(field name),now supports:LongType、FloatType、DoubleType、IntegerType、BooleanType、BinaryType、TimestampType、DateType，default: StringType。|
80 | 
81 | 
82 | 
83 | 
84 | 


--------------------------------------------------------------------------------
/ds-hbase-2x/desc.plugin:
--------------------------------------------------------------------------------
 1 | moduleName=ds-hbase-2x-3.0
 2 | mainClass=tech.mlsql.plugins.ds.MLSQLHBase2x
 3 | scala_version=2.12
 4 | spark_version=3.0
 5 | version=0.1.0-SNAPSHOT
 6 | author=allwefantasy
 7 | mlsqlVersions="1.5.0,1.5.0-SNAPSHOT,1.6.0,1.6.0-SNAPSHOT"
 8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/ds-hbase-2x
 9 | mlsqlPluginType=ds
10 | desc=wow
11 | 
12 | 
13 | 
14 | 
15 | 
16 | 
17 | 


--------------------------------------------------------------------------------
/ds-hbase-2x/src/main/java/org/apache/spark/sql/execution/datasources/hbase2x/HBaseConfBuilder.scala:
--------------------------------------------------------------------------------
 1 | package org.apache.spark.sql.execution.datasources.hbase2x
 2 | 
 3 | import org.apache.hadoop.hbase.HBaseConfiguration
 4 | import org.apache.spark.sql.SparkSession
 5 | import org.json4s.DefaultFormats
 6 | import scala.collection.JavaConversions._
 7 | 
 8 | /**
 9 |   * 2019-07-08 WilliamZhu(allwefantasy@gmail.com)
10 |   */
11 | object HBaseConfBuilder {
12 |   def build(spark: SparkSession, parameters: Map[String, String]) = {
13 |     val testConf = spark.sqlContext.sparkContext.conf.getBoolean(SparkHBaseConf.testConf, false)
14 |     if (testConf) SparkHBaseConf.conf
15 |     else {
16 |       implicit val formats = DefaultFormats
17 | 
18 |       // task is already broadcast; since hConf is per HBaseRelation (currently), broadcast'ing
19 |       // it again does not help - it actually hurts. When we add support for
20 |       // caching hConf across HBaseRelation, we can revisit broadcast'ing it (with a caching
21 |       // mechanism in place)
22 |       val hc = HBaseConfiguration.create()
23 |       
24 |       if (parameters.containsKey("zk") || parameters.containsKey("hbase.zookeeper.quorum")) {
25 |         hc.set("hbase.zookeeper.quorum", parameters.getOrElse("zk", parameters.getOrElse("hbase.zookeeper.quorum", "127.0.0.1:2181")))
26 |       }
27 | 
28 |       if (parameters.containsKey("znode")) {
29 |         hc.set("zookeeper.znode.parent", parameters.get("znode").get)
30 |       }
31 | 
32 |       if (parameters.containsKey("rootdir")) {
33 |         hc.set("hbase.rootdir", parameters.get("rootdir").get)
34 |       }
35 | 
36 |       /**
37 |         * when people confgiure the wrong zk address, by default the HBase client will
38 |         * try infinitely. We should control this group parameters to limit the try times.
39 |         */
40 |       hc.set("hbase.client.pause", parameters.getOrElse("hbase.client.pause", "1000"))
41 |       hc.set("zookeeper.recovery.retry", parameters.getOrElse("zookeeper.recovery.retry", "60"))
42 |       hc.set("hbase.client.retries.number", parameters.getOrElse("hbase.client.retries.number", "60"))
43 | 
44 | 
45 |       parameters.filter { f =>
46 |         f._1.startsWith("hbase.") || f._1.startsWith("zookeeper.") || f._1.startsWith("phoenix.")
47 |       }.foreach { f =>
48 |         hc.set(f._1, f._2)
49 |       }
50 | 
51 |       hc
52 |     }
53 | 
54 |   }
55 | }
56 | 


--------------------------------------------------------------------------------
/ds-hbase-2x/src/main/java/org/apache/spark/sql/execution/datasources/hbase2x/HBaseType.scala:
--------------------------------------------------------------------------------
1 | package org.apache.spark.sql.execution.datasources.hbase2x
2 | 
3 | /**
4 |   * 2019-07-08 WilliamZhu(allwefantasy@gmail.com)
5 |   */
6 | package object hbase2x {
7 |   type HBaseType = Array[Byte]
8 | }
9 | 


--------------------------------------------------------------------------------
/ds-hbase-2x/src/main/java/org/apache/spark/sql/execution/datasources/hbase2x/JavaDoc.java:
--------------------------------------------------------------------------------
1 | package org.apache.spark.sql.execution.datasources.hbase2x;
2 | 
3 | /**
4 |  * 2019-07-06 WilliamZhu(allwefantasy@gmail.com)
5 |  */
6 | public class JavaDoc {
7 | }
8 | 


--------------------------------------------------------------------------------
/ds-hbase-2x/src/main/java/org/apache/spark/sql/execution/datasources/hbase2x/SparkHBaseConf.scala:
--------------------------------------------------------------------------------
 1 | package org.apache.spark.sql.execution.datasources.hbase2x
 2 | 
 3 | import org.apache.hadoop.conf.Configuration
 4 | 
 5 | /**
 6 |   * 2019-07-08 WilliamZhu(allwefantasy@gmail.com)
 7 |   */
 8 | object SparkHBaseConf {
 9 |   val testConf = "spark.hbase.connector.test"
10 |   val credentialsManagerEnabled = "spark.hbase.connector.security.credentials.enabled"
11 |   val expireTimeFraction = "spark.hbase.connector.security.credentials.expireTimeFraction"
12 |   val refreshTimeFraction = "spark.hbase.connector.security.credentials.refreshTimeFraction"
13 |   val refreshDurationMins = "spark.hbase.connector.security.credentials.refreshDurationMins"
14 |   val principal = "spark.hbase.connector.security.credentials"
15 |   val keytab = "spark.hbase.connector.security.keytab"
16 | 
17 |   var conf: Configuration = _
18 |   var BulkGetSize = "spark.hbase.connector.bulkGetSize"
19 |   var defaultBulkGetSize = 100
20 |   var CachingSize = "spark.hbase.connector.cacheSize"
21 |   var defaultCachingSize = 100
22 |   // in milliseconds
23 |   val connectionCloseDelay = 10 * 60 * 1000
24 | }
25 | 


--------------------------------------------------------------------------------
/echo-controller/.repo/pom.template.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
 3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 5 |     <parent>
 6 |         <artifactId>mlsql-plugins-{{spark_binary_version}}_{{scala_binary_version}}</artifactId>
 7 |         <groupId>tech.mlsql</groupId>
 8 |         <version>0.1.0-SNAPSHOT</version>
 9 |     </parent>
10 |     <modelVersion>4.0.0</modelVersion>
11 | 
12 |     <artifactId>echo-controller-${spark.binary.version}_${scala.binary.version}</artifactId>
13 | 
14 | 
15 | </project>


--------------------------------------------------------------------------------
/echo-controller/README.md:
--------------------------------------------------------------------------------
 1 | ## Install 
 2 | 
 3 | ```sql
 4 | !plugin app add - echo-controller-2.4;
 5 | ```
 6 | 
 7 | ## Usage
 8 | 
 9 | ```
10 | select crawler_http("http://127.0.0.1:9003/run/script","POST",map("owner","wow","sql","select 1 as a as output;","executeMode","echo")) as c as output;
11 | ``` 
12 | 
13 | The server will response with `select 1 as a as output;` back instead of execute the sql. 


--------------------------------------------------------------------------------
/echo-controller/desc.plugin:
--------------------------------------------------------------------------------
1 | tech.mlsql.plugins.app.echocontroller.StreamApp
2 | 


--------------------------------------------------------------------------------
/echo-controller/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
 3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 5 |     <parent>
 6 |         <artifactId>mlsql-plugins-3.0_2.12</artifactId>
 7 |         <groupId>tech.mlsql</groupId>
 8 |         <version>0.1.0-SNAPSHOT</version>
 9 |     </parent>
10 |     <modelVersion>4.0.0</modelVersion>
11 | 
12 |     <artifactId>echo-controller-${spark.binary.version}_${scala.binary.version}</artifactId>
13 | 
14 | 
15 | </project>


--------------------------------------------------------------------------------
/echo-controller/src/main/java/tech/mlsql/plugins/mllib/echocontroller/StreamApp.scala:
--------------------------------------------------------------------------------
 1 | package tech.mlsql.plugins.mllib.echocontroller
 2 | 
 3 | import tech.mlsql.app.CustomController
 4 | import tech.mlsql.common.utils.serder.json.JSONTool
 5 | import tech.mlsql.runtime.AppRuntimeStore
 6 | import tech.mlsql.version.VersionCompatibility
 7 | 
 8 | /**
 9 |  * 7/11/2019 WilliamZhu(allwefantasy@gmail.com)
10 |  */
11 | class StreamApp extends tech.mlsql.app.App with VersionCompatibility {
12 |   override def run(args: Seq[String]): Unit = {
13 |     AppRuntimeStore.store.registerController("echo", classOf[EchoController].getName)
14 |   }
15 | 
16 |   override def supportedVersions: Seq[String] = Seq("1.5.0-SNAPSHOT", "1.5.0", "1.6.0-SNAPSHOT", "1.6.0")
17 | }
18 | 
19 | class EchoController extends CustomController {
20 |   override def run(params: Map[String, String]): String = {
21 |     JSONTool.toJsonStr(List(params("sql")))
22 |   }
23 | }
24 | 


--------------------------------------------------------------------------------
/install-all.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | ALL_MODUELS="mlsql-shell mlsql-assert mlsql-mllib mlsql-excel connect-persist last-command run-script save-then-load stream-persist table-repartition"
 4 | 
 5 | MODUELS=${1}
 6 | 
 7 | if [[ "${MODUELS}" == "" ]];then
 8 |    MODUELS = ALL_MODUELS
 9 | fi
10 | 
11 | for spark_version in spark243 spark311
12 | do
13 |   for module in ${MODUELS}
14 |   do
15 |      ./install.sh ${module} ${spark_version}
16 |   done
17 | done
18 | 
19 | 
20 | # ./install.sh ds-hbase-2x
21 | # ./install.sh mlsql-bigdl


--------------------------------------------------------------------------------
/install.sh:
--------------------------------------------------------------------------------
 1 | PROJECT=/Users/allwefantasy/Volumes/Samsung_T5/allwefantasy/CSDNWorkSpace/mlsqlplugins
 2 | 
 3 | MOUDLE_NAME=$1
 4 | VERSION="0.1.0-SNAPSHOT"
 5 | V=${2:-3.0}
 6 | MIDDLE="2.4_2.11"
 7 | 
 8 | SPARK="spark311"
 9 | 
10 | if [[ "${V}" == "2.4" ]]
11 | then
12 |    SPARK=spark243
13 | fi
14 | 
15 | if [[ "${SPARK}" == "spark311" ]]
16 | then
17 |    MIDDLE="3.0_2.12"
18 | fi
19 | 
20 | echo ${MOUDLE_NAME}
21 | echo ${SPARK}
22 | echo ${MIDDLE}
23 | 
24 | mlsql_plugin_tool build --module_name ${MOUDLE_NAME} --spark ${SPARK}
25 | mlsql_plugin_tool upload \
26 | --module_name ${MOUDLE_NAME}  \
27 | --user ${STORE_USER}        \
28 | --password ${STORE_PASSWORD} \
29 | --jar_path ${PROJECT}/${MOUDLE_NAME}/build/${MOUDLE_NAME}-${MIDDLE}-${VERSION}.jar
30 | 


--------------------------------------------------------------------------------
/last-command/.repo/pom.template.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
 3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 5 |     <parent>
 6 |         <artifactId>mlsql-plugins-{{spark_binary_version}}_{{scala_binary_version}}</artifactId>
 7 |         <groupId>tech.mlsql</groupId>
 8 |         <version>0.1.0-SNAPSHOT</version>
 9 |     </parent>
10 |     <modelVersion>4.0.0</modelVersion>
11 | 
12 |     <artifactId>last-command-${spark.binary.version}_${scala.binary.version}</artifactId>
13 | 
14 | 
15 | </project>


--------------------------------------------------------------------------------
/last-command/README.md:
--------------------------------------------------------------------------------
 1 | ## Install
 2 | 
 3 | ```
 4 | !plugin et add - last-command-2.4 named lastCommand;
 5 | ```
 6 | 
 7 | ## Help
 8 | 
 9 | 
10 | ```sql
11 | !show et LastCommand;
12 | ```
13 | 
14 | ## Usage
15 | 
16 | ```sql
17 | !hdfs -ls /tmp/;
18 | !lastCommand named hdfsTmpTable;
19 | select * from hdfsTmpTable as output;
20 | ```
21 | 
22 | 
23 | 
24 | 
25 | 
26 | 


--------------------------------------------------------------------------------
/last-command/desc.plugin:
--------------------------------------------------------------------------------
 1 | moduleName=last-command-2.4
 2 | mainClass=tech.mlsql.plugins.et.LastCommand
 3 | version=0.1.0-SNAPSHOT
 4 | author=allwefantasy
 5 | mlsqlVersions="1.5.0,1.5.0-SNAPSHOT,1.6.0,1.6.0-SNAPSHOT"
 6 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/last-command
 7 | scala_version=2.11
 8 | spark_version=2.4
 9 | mlsqlPluginType=et
10 | desc=last command
11 | 
12 | 
13 | 
14 | 
15 | 


--------------------------------------------------------------------------------
/last-command/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
 3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 5 |     <parent>
 6 |         <artifactId>mlsql-plugins-3.0_2.12</artifactId>
 7 |         <groupId>tech.mlsql</groupId>
 8 |         <version>0.1.0-SNAPSHOT</version>
 9 |     </parent>
10 |     <modelVersion>4.0.0</modelVersion>
11 | 
12 |     <artifactId>last-command-${spark.binary.version}_${scala.binary.version}</artifactId>
13 | 
14 | 
15 | </project>


--------------------------------------------------------------------------------
/last-command/src/main/java/tech/mlsql/plugins/et/LastCommand.scala:
--------------------------------------------------------------------------------
 1 | package tech.mlsql.plugins.et
 2 | 
 3 | import org.apache.spark.sql.expressions.UserDefinedFunction
 4 | import org.apache.spark.sql.{DataFrame, SparkSession}
 5 | import streaming.dsl.ScriptSQLExec
 6 | import streaming.dsl.mmlib._
 7 | import streaming.dsl.mmlib.algs.Functions
 8 | import streaming.dsl.mmlib.algs.param.{BaseParams, WowParams}
 9 | import tech.mlsql.common.utils.serder.json.JSONTool
10 | import tech.mlsql.version.VersionCompatibility
11 | 
12 | 
13 | class LastCommand(override val uid: String) extends SQLAlg with VersionCompatibility with Functions with WowParams {
14 |   def this() = this(BaseParams.randomUID())
15 | 
16 |   // 
17 |   override def train(df: DataFrame, path: String, params: Map[String, String]): DataFrame = {
18 | 
19 |     val context = ScriptSQLExec.context()
20 |     val command = JSONTool.parseJson[List[String]](params("parameters")).toArray
21 | 
22 |     //!last named table1;
23 |     context.execListener.getLastSelectTable() match {
24 |       case Some(tableName) =>
25 |         command match {
26 |           case Array("named", newTableName) =>
27 |             val newDf = context.execListener.sparkSession.table(tableName)
28 |             newDf.createOrReplaceTempView(newTableName)
29 |             newDf
30 |         }
31 |       case None => throw new RuntimeException("no table found in previous command")
32 |     }
33 |   }
34 | 
35 | 
36 |   override def supportedVersions: Seq[String] = {
37 |     Seq("1.5.0-SNAPSHOT", "1.5.0", "1.6.0-SNAPSHOT", "1.6.0")
38 |   }
39 | 
40 | 
41 |   override def doc: Doc = Doc(MarkDownDoc,
42 |     s"""
43 |        |When you want to get the result from command and used
44 |        | in next command(SQL), you can use !last command.
45 |        |
46 |       |For example:
47 |        |
48 |       |```
49 |        |${codeExample.code}
50 |        |```
51 |     """.stripMargin)
52 | 
53 | 
54 |   override def codeExample: Code = Code(SQLCode,
55 |     """
56 |       |!hdfs /tmp;
57 |       |!last named hdfsTmpTable;
58 |       |select * from hdfsTmpTable;
59 |     """.stripMargin)
60 | 
61 |   override def batchPredict(df: DataFrame, path: String, params: Map[String, String]): DataFrame = train(df, path, params)
62 | 
63 |   override def load(sparkSession: SparkSession, path: String, params: Map[String, String]): Any = ???
64 | 
65 |   override def predict(sparkSession: SparkSession, _model: Any, name: String, params: Map[String, String]): UserDefinedFunction = ???
66 | }
67 | 


--------------------------------------------------------------------------------
/mlsql-analysis-toolkit/.repo/pom.template.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
 3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 5 |     <parent>
 6 |         <artifactId>mlsql-plugins-{{spark_binary_version}}_{{scala_binary_version}}</artifactId>
 7 |         <groupId>tech.mlsql</groupId>
 8 |         <version>0.1.0-SNAPSHOT</version>
 9 |     </parent>
10 |     <modelVersion>4.0.0</modelVersion>
11 | 
12 |     <artifactId>mlsql-analysis-toolkit-${spark.binary.version}_${scala.binary.version}</artifactId>
13 | 
14 | 
15 | </project>


--------------------------------------------------------------------------------
/mlsql-analysis-toolkit/README.md:
--------------------------------------------------------------------------------
 1 | ## Install command:
 2 | 
 3 | ```
 4 | !plugin app add - "mlsql-analysis-toolkit-2.4";
 5 | ```
 6 | 
 7 | 
 8 | ## Usage
 9 | 
10 | To compute field in table the medium number.
11 | 
12 | ```sql
13 | !approxQuantile time_temp birthday "0.5" valued time_quantile;
14 | select ${time_quantile} as quantile as output;
15 | ```
16 | 
17 | 
18 | To create table with id column with fix size:
19 | 
20 |  ```sql
21 | !dataframe build range 100000 named table1;
22 | ```
23 | 
24 | 
25 | 
26 | 


--------------------------------------------------------------------------------
/mlsql-analysis-toolkit/desc.plugin:
--------------------------------------------------------------------------------
 1 | moduleName=mlsql-analysis-toolkit-2.4
 2 | mainClass=tech.mlsql.plugins.analysis.AnalysisApp
 3 | version=0.1.0-SNAPSHOT
 4 | author=allwefantasy
 5 | mlsqlVersions="1.5.0,1.5.0-SNAPSHOT,1.6.0,1.6.0-SNAPSHOT"
 6 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-analysis-toolkit
 7 | scala_version=2.11
 8 | spark_version=2.4
 9 | mlsqlPluginType=app
10 | desc=wow
11 | 
12 | 


--------------------------------------------------------------------------------
/mlsql-analysis-toolkit/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
 3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 5 |     <parent>
 6 |         <artifactId>mlsql-plugins-3.0_2.12</artifactId>
 7 |         <groupId>tech.mlsql</groupId>
 8 |         <version>0.1.0-SNAPSHOT</version>
 9 |     </parent>
10 |     <modelVersion>4.0.0</modelVersion>
11 | 
12 |     <artifactId>mlsql-analysis-toolkit-${spark.binary.version}_${scala.binary.version}</artifactId>
13 | 
14 | 
15 | </project>


--------------------------------------------------------------------------------
/mlsql-analysis-toolkit/src/main/java/tech/mlsql/plugins/analysis/AnalysisApp.scala:
--------------------------------------------------------------------------------
 1 | package tech.mlsql.plugins.analysis
 2 | 
 3 | import tech.mlsql.dsl.CommandCollection
 4 | import tech.mlsql.ets.register.ETRegister
 5 | import tech.mlsql.version.VersionCompatibility
 6 | 
 7 | /**
 8 |  * 26/4/2020 WilliamZhu(allwefantasy@gmail.com)
 9 |  */
10 | class AnalysisApp extends tech.mlsql.app.App with VersionCompatibility {
11 |   override def run(args: Seq[String]): Unit = {
12 |     ETRegister.register("ApproxQuantile", classOf[ApproxQuantile].getName)
13 |     CommandCollection.refreshCommandMapping(Map("approxQuantile" -> "ApproxQuantile"))
14 | 
15 |     ETRegister.register("DFTool", classOf[DFTool].getName)
16 |     CommandCollection.refreshCommandMapping(Map("dataframe" -> "DFTool"))
17 |   }
18 | 
19 |   override def supportedVersions: Seq[String] = Seq("1.5.0-SNAPSHOT", "1.5.0", "1.6.0-SNAPSHOT", "1.6.0")
20 | }


--------------------------------------------------------------------------------
/mlsql-analysis-toolkit/src/main/java/tech/mlsql/plugins/analysis/ApproxQuantile.scala:
--------------------------------------------------------------------------------
 1 | package tech.mlsql.plugins.analysis
 2 | 
 3 | import org.apache.spark.sql.expressions.UserDefinedFunction
 4 | import org.apache.spark.sql.{DataFrame, SparkSession}
 5 | import streaming.dsl.ScriptSQLExec
 6 | import streaming.dsl.mmlib.SQLAlg
 7 | import streaming.dsl.mmlib.algs.Functions
 8 | import streaming.dsl.mmlib.algs.param.{BaseParams, WowParams}
 9 | import tech.mlsql.common.utils.serder.json.JSONTool
10 | import tech.mlsql.version.VersionCompatibility
11 | 
12 | /**
13 |  * 26/4/2020 WilliamZhu(allwefantasy@gmail.com)
14 |  */
15 | class ApproxQuantile(override val uid: String) extends SQLAlg with VersionCompatibility with Functions with WowParams {
16 |   def this() = this(BaseParams.randomUID())
17 | 
18 |   override def train(df: DataFrame, path: String, params: Map[String, String]): DataFrame = {
19 |     val command = JSONTool.parseJson[List[String]](params("parameters")).toArray
20 | 
21 |     def compute(table: String, field: String, quantile: String, error: String) = {
22 |       df.sparkSession.table(table).stat.approxQuantile(field, Array(quantile.toDouble), error.toDouble)
23 |     }
24 | 
25 |     var tableName: String = null
26 | 
27 |     val res = command match {
28 |       case Array(table, field, quantile) =>
29 |         compute(table, field, quantile, "0").head
30 | 
31 |       case Array(table, field, quantile, "valued", value) =>
32 |         val f = compute(table, field, quantile, "0").head
33 |         ScriptSQLExec.context().execListener.addEnv(value, f.toString)
34 |         f
35 |       case Array(table, field, quantile, "named", value) =>
36 |         tableName = value
37 |         compute(table, field, quantile, "0").head
38 | 
39 |       case Array(table, field, quantile, error) =>
40 |         compute(table, field, quantile, error).head
41 | 
42 |       case Array(table, field, quantile, error, "valued", value) =>
43 |         val f = compute(table, field, quantile, error).head
44 |         ScriptSQLExec.context().execListener.addEnv(value, f.toString)
45 |         f
46 |       case Array(table, field, quantile, error, "named", value) =>
47 |         tableName = value
48 |         compute(table, field, quantile, error).head
49 |     }
50 | 
51 |     import df.sparkSession.implicits._
52 |     val newdf = df.sparkSession.createDataset[Double](Seq(res)).toDF("value")
53 |     if (tableName != null) {
54 |       newdf.createOrReplaceTempView(tableName)
55 |     }
56 |     newdf
57 | 
58 |   }
59 | 
60 | 
61 |   override def batchPredict(df: DataFrame, path: String, params: Map[String, String]): DataFrame = train(df, path, params)
62 | 
63 |   override def load(sparkSession: SparkSession, path: String, params: Map[String, String]): Any = ???
64 | 
65 |   override def predict(sparkSession: SparkSession, _model: Any, name: String, params: Map[String, String]): UserDefinedFunction = ???
66 | 
67 |   override def supportedVersions: Seq[String] = {
68 |     Seq("1.5.0-SNAPSHOT", "1.5.0", "1.6.0-SNAPSHOT", "1.6.0")
69 |   }
70 | }
71 | 


--------------------------------------------------------------------------------
/mlsql-analysis-toolkit/src/main/java/tech/mlsql/plugins/analysis/DFTool.scala:
--------------------------------------------------------------------------------
 1 | package tech.mlsql.plugins.analysis
 2 | 
 3 | import org.apache.spark.sql.expressions.UserDefinedFunction
 4 | import org.apache.spark.sql.{DataFrame, SparkSession}
 5 | import streaming.dsl.mmlib.SQLAlg
 6 | import streaming.dsl.mmlib.algs.Functions
 7 | import streaming.dsl.mmlib.algs.param.{BaseParams, WowParams}
 8 | import tech.mlsql.common.utils.serder.json.JSONTool
 9 | import tech.mlsql.version.VersionCompatibility
10 | 
11 | /**
12 |  * 1/5/2020 WilliamZhu(allwefantasy@gmail.com)
13 |  */
14 | class DFTool(override val uid: String) extends SQLAlg with VersionCompatibility with Functions with WowParams {
15 |   def this() = this(BaseParams.randomUID())
16 | 
17 |   /**
18 |    * !dataframe build range 100 named table1;
19 |    */
20 |   override def train(df: DataFrame, path: String, params: Map[String, String]): DataFrame = {
21 |     val command = JSONTool.parseJson[List[String]](params("parameters")).toArray
22 | 
23 |     val newdf = command match {
24 |       case Array("build", "range", end, "named", table) =>
25 |         val temp = df.sparkSession.range(end.toLong).toDF()
26 |         temp.createOrReplaceTempView(table)
27 |         temp
28 |     }
29 |     newdf
30 |   }
31 | 
32 | 
33 |   override def batchPredict(df: DataFrame, path: String, params: Map[String, String]): DataFrame = train(df, path, params)
34 | 
35 |   override def load(sparkSession: SparkSession, path: String, params: Map[String, String]): Any = ???
36 | 
37 |   override def predict(sparkSession: SparkSession, _model: Any, name: String, params: Map[String, String]): UserDefinedFunction = ???
38 | 
39 |   override def supportedVersions: Seq[String] = {
40 |     Seq("1.5.0-SNAPSHOT", "1.5.0", "1.6.0-SNAPSHOT", "1.6.0")
41 |   }
42 | }
43 | 
44 | 


--------------------------------------------------------------------------------
/mlsql-assert/.repo/desc.template.plugin:
--------------------------------------------------------------------------------
 1 | moduleName=mlsql-assert-{{spark_binary_version}}
 2 | mainClass=tech.mlsql.plugins.assert.app.MLSQLAssert
 3 | scala_version={{scala_binary_version}}
 4 | spark_version={{spark_binary_version}}
 5 | version=0.1.0-SNAPSHOT
 6 | author=allwefantasy
 7 | mlsqlVersions=""
 8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-assert
 9 | mlsqlPluginType=app
10 | desc=mlsql-shell
11 | 
12 | 
13 | 
14 | 
15 | 
16 | 
17 | 


--------------------------------------------------------------------------------
/mlsql-assert/.repo/pom.template.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
 3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 5 |     <parent>
 6 |         <artifactId>mlsql-plugins-{{spark_binary_version}}_{{scala_binary_version}}</artifactId>
 7 |         <groupId>tech.mlsql</groupId>
 8 |         <version>0.1.0-SNAPSHOT</version>
 9 |     </parent>
10 |     <modelVersion>4.0.0</modelVersion>
11 | 
12 |     <artifactId>mlsql-assert-{{spark_binary_version}}_{{scala_binary_version}}</artifactId>
13 |     <dependencies>
14 |        
15 | 
16 |     </dependencies>
17 | 
18 |     <profiles>
19 |         <profile>
20 |             <id>shade</id>
21 |             <build>
22 |                 <plugins>
23 |                     <plugin>
24 |                         <groupId>org.apache.maven.plugins</groupId>
25 |                         <artifactId>maven-shade-plugin</artifactId>
26 |                         <version>3.2.0</version>
27 |                         <configuration>
28 |                             <filters>
29 |                                 <filter>
30 |                                     <artifact>*:*</artifact>
31 |                                     <excludes>
32 |                                         <exclude>META-INF/*.SF</exclude>
33 |                                         <exclude>META-INF/*.DSA</exclude>
34 |                                         <exclude>META-INF/*.RSA</exclude>
35 |                                     </excludes>
36 |                                 </filter>
37 |                             </filters>
38 |                             <createDependencyReducedPom>false</createDependencyReducedPom>
39 |                             <relocations>
40 |                                 <relocation>
41 |                                     <pattern>org.apache.poi</pattern>
42 |                                     <shadedPattern>shadeio.poi</shadedPattern>
43 |                                 </relocation>
44 |                                 <relocation>
45 |                                     <pattern>com.norbitltd.spoiwo</pattern>
46 |                                     <shadedPattern>shadeio.spoiwo</shadedPattern>
47 |                                 </relocation>
48 |                                 <relocation>
49 |                                     <pattern>com.github.pjfanning</pattern>
50 |                                     <shadedPattern>shadeio.pjfanning</shadedPattern>
51 |                                 </relocation>
52 |                                 <relocation>
53 |                                     <pattern>org.apache.commons.compress</pattern>
54 |                                     <shadedPattern>shadeio.commons.compress</shadedPattern>
55 |                                 </relocation>
56 |                             </relocations>
57 |                         </configuration>
58 | 
59 |                         <executions>
60 |                             <execution>
61 |                                 <phase>package</phase>
62 |                                 <goals>
63 |                                     <goal>shade</goal>
64 |                                 </goals>
65 |                             </execution>
66 |                         </executions>
67 |                     </plugin>
68 |                 </plugins>
69 |             </build>
70 |         </profile>
71 |     </profiles>
72 | 
73 | </project>


--------------------------------------------------------------------------------
/mlsql-assert/README.md:
--------------------------------------------------------------------------------
  1 | # mlsql-shell
  2 | 
  3 | This plugin provide assert in table.
  4 | 
  5 | 
  6 | ## Install from store
  7 | 
  8 | Execute following command in web console:
  9 | 
 10 | ```
 11 | !plugin app add - "mlsql-assert-2.4";
 12 | ```
 13 | 
 14 | 
 15 | ## Install Manually
 16 | 
 17 | Firstly, build shade jar in your terminal:
 18 | 
 19 | ```shell
 20 | pip install mlsql_plugin_tool
 21 | mlsql_plugin_tool build --module_name mlsql-assert --spark spark243
 22 | ```
 23 | 
 24 | then change start script of MLSQL Engine,
 25 | 
 26 | Add Jar:
 27 | 
 28 | ```
 29 | --jars YOUR_JAR_PATH
 30 | ```
 31 | 
 32 | Register Class:
 33 | 
 34 | ```
 35 | -streaming.plugin.clzznames tech.mlsql.plugins.shell.app.MLSQLShell
 36 | ```
 37 | 
 38 | If there are more than one class, use comma to seperate them. For example:
 39 | 
 40 | ```
 41 | -streaming.plugin.clzznames classA,classB,classC
 42 | ```
 43 | 
 44 | ## Usage
 45 | 
 46 | ```sql
 47 | 
 48 | -- !plugin app remove "mlsql-assert-2.4";
 49 | -- !plugin app add - "mlsql-assert-2.4";
 50 | -- create test data
 51 | set jsonStr='''
 52 | {"features":[5.1,3.5,1.4,0.2],"label":0.0},
 53 | {"features":[5.1,3.5,1.4,0.2],"label":1.0}
 54 | {"features":[5.1,3.5,1.4,0.2],"label":0.0}
 55 | {"features":[4.4,2.9,1.4,0.2],"label":0.0}
 56 | {"features":[5.1,3.5,1.4,0.2],"label":1.0}
 57 | {"features":[5.1,3.5,1.4,0.2],"label":0.0}
 58 | {"features":[5.1,3.5,1.4,0.2],"label":0.0}
 59 | {"features":[4.7,3.2,1.3,0.2],"label":1.0}
 60 | {"features":[5.1,3.5,1.4,0.2],"label":0.0}
 61 | {"features":[5.1,3.5,1.4,0.2],"label":0.0}
 62 | ''';
 63 | load jsonStr.`jsonStr` as data;
 64 | select vec_dense(features) as features ,label as label from data
 65 | as data1;
 66 | 
 67 | -- use RandomForest
 68 | train data1 as RandomForest.`/tmp/model` where
 69 | 
 70 | -- once set true,every time you run this script, MLSQL will generate new directory for you model
 71 | keepVersion="true"
 72 | 
 73 | -- specicy the test dataset which will be used to feed evaluator to generate some metrics e.g. F1, Accurate
 74 | and evaluateTable="data1"
 75 | 
 76 | -- specify group 0 parameters
 77 | and `fitParam.0.labelCol`="features"
 78 | and `fitParam.0.featuresCol`="label"
 79 | and `fitParam.0.maxDepth`="2"
 80 | 
 81 | -- specify group 1 parameters
 82 | and `fitParam.1.featuresCol`="features"
 83 | and `fitParam.1.labelCol`="label"
 84 | and `fitParam.1.maxDepth`="10"
 85 | as model_result;
 86 | 
 87 | select name,value from model_result where name="status" as result;
 88 | -- make sure status of  all models are success.  
 89 | !assert result ''':value=="success"'''  "all model status should be success";
 90 | 
 91 | ```
 92 | 
 93 | 
 94 | 
 95 | 
 96 | 
 97 | 
 98 | 
 99 | 
100 | 
101 | 


--------------------------------------------------------------------------------
/mlsql-assert/desc.plugin:
--------------------------------------------------------------------------------
 1 | moduleName=mlsql-assert-3.0
 2 | mainClass=tech.mlsql.plugins.assert.app.MLSQLAssert
 3 | scala_version=2.12
 4 | spark_version=3.0
 5 | version=0.1.0-SNAPSHOT
 6 | author=allwefantasy
 7 | mlsqlVersions=""
 8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-assert
 9 | mlsqlPluginType=app
10 | desc=mlsql-shell
11 | 
12 | 
13 | 
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/mlsql-assert/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
 3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 5 |     <parent>
 6 |         <artifactId>mlsql-plugins-3.0_2.12</artifactId>
 7 |         <groupId>tech.mlsql</groupId>
 8 |         <version>0.1.0-SNAPSHOT</version>
 9 |     </parent>
10 |     <modelVersion>4.0.0</modelVersion>
11 | 
12 |     <artifactId>mlsql-assert-3.0_2.12</artifactId>
13 |     <dependencies>
14 |        
15 | 
16 |     </dependencies>
17 | 
18 |     <profiles>
19 |         <profile>
20 |             <id>shade</id>
21 |             <build>
22 |                 <plugins>
23 |                     <plugin>
24 |                         <groupId>org.apache.maven.plugins</groupId>
25 |                         <artifactId>maven-shade-plugin</artifactId>
26 |                         <version>3.2.0</version>
27 |                         <configuration>
28 |                             <filters>
29 |                                 <filter>
30 |                                     <artifact>*:*</artifact>
31 |                                     <excludes>
32 |                                         <exclude>META-INF/*.SF</exclude>
33 |                                         <exclude>META-INF/*.DSA</exclude>
34 |                                         <exclude>META-INF/*.RSA</exclude>
35 |                                     </excludes>
36 |                                 </filter>
37 |                             </filters>
38 |                             <createDependencyReducedPom>false</createDependencyReducedPom>
39 |                             <relocations>
40 |                                 <relocation>
41 |                                     <pattern>org.apache.poi</pattern>
42 |                                     <shadedPattern>shadeio.poi</shadedPattern>
43 |                                 </relocation>
44 |                                 <relocation>
45 |                                     <pattern>com.norbitltd.spoiwo</pattern>
46 |                                     <shadedPattern>shadeio.spoiwo</shadedPattern>
47 |                                 </relocation>
48 |                                 <relocation>
49 |                                     <pattern>com.github.pjfanning</pattern>
50 |                                     <shadedPattern>shadeio.pjfanning</shadedPattern>
51 |                                 </relocation>
52 |                                 <relocation>
53 |                                     <pattern>org.apache.commons.compress</pattern>
54 |                                     <shadedPattern>shadeio.commons.compress</shadedPattern>
55 |                                 </relocation>
56 |                             </relocations>
57 |                         </configuration>
58 | 
59 |                         <executions>
60 |                             <execution>
61 |                                 <phase>package</phase>
62 |                                 <goals>
63 |                                     <goal>shade</goal>
64 |                                 </goals>
65 |                             </execution>
66 |                         </executions>
67 |                     </plugin>
68 |                 </plugins>
69 |             </build>
70 |         </profile>
71 |     </profiles>
72 | 
73 | </project>


--------------------------------------------------------------------------------
/mlsql-assert/src/main/java/tech/mlsql/plugins/assert/app/MLSQLAssert.scala:
--------------------------------------------------------------------------------
 1 | package tech.mlsql.plugins.assert.app
 2 | 
 3 | import tech.mlsql.common.utils.log.Logging
 4 | import tech.mlsql.dsl.CommandCollection
 5 | import tech.mlsql.ets.register.ETRegister
 6 | import tech.mlsql.plugins.assert.ets.{Assert, MLSQLThrow}
 7 | import tech.mlsql.version.VersionCompatibility
 8 | 
 9 | /**
10 |  * 4/6/2021 WilliamZhu(allwefantasy@gmail.com)
11 |  */
12 | class MLSQLAssert extends tech.mlsql.app.App with VersionCompatibility with Logging {
13 |   override def run(args: Seq[String]): Unit = {
14 |     ETRegister.register("Assert", classOf[Assert].getName)
15 |     ETRegister.register("Throw", classOf[MLSQLThrow].getName)
16 |     CommandCollection.refreshCommandMapping(Map("assert" ->
17 |       """
18 |         |run command as Assert.`` where parameters='''{:all}'''
19 |         |""".stripMargin))
20 |     CommandCollection.refreshCommandMapping(Map("throw" ->
21 |       """
22 |         |run command as Throw.`` where msg='''{0}'''
23 |         |""".stripMargin))
24 |   }
25 | 
26 | 
27 |   override def supportedVersions: Seq[String] = {
28 |     MLSQLAssert.versions
29 |   }
30 | }
31 | 
32 | object MLSQLAssert {
33 |   val versions = Seq("2.1.0", "2.1.0-SNAPSHOT", "2.0.0", "2.0.1")
34 | }


--------------------------------------------------------------------------------
/mlsql-assert/src/main/java/tech/mlsql/plugins/assert/ets/MLSQLThrow.scala:
--------------------------------------------------------------------------------
 1 | package tech.mlsql.plugins.assert.ets
 2 | 
 3 | import org.apache.spark.sql.expressions.UserDefinedFunction
 4 | import org.apache.spark.sql.mlsql.session.MLSQLException
 5 | import org.apache.spark.sql.{DataFrame, SparkSession}
 6 | import streaming.dsl.auth.TableAuthResult
 7 | import streaming.dsl.mmlib.algs.Functions
 8 | import streaming.dsl.mmlib.algs.param.{BaseParams, WowParams}
 9 | import streaming.dsl.mmlib._
10 | import tech.mlsql.dsl.auth.ETAuth
11 | import tech.mlsql.dsl.auth.dsl.mmlib.ETMethod.ETMethod
12 | import tech.mlsql.plugins.assert.app.MLSQLAssert
13 | import tech.mlsql.version.VersionCompatibility
14 | 
15 | /**
16 |  * 4/9/2021 WilliamZhu(allwefantasy@gmail.com)
17 |  */
18 | class MLSQLThrow(override val uid: String) extends SQLAlg
19 |   with VersionCompatibility with Functions with WowParams with ETAuth {
20 |   def this() = this(BaseParams.randomUID())
21 | 
22 |   override def train(df: DataFrame, path: String, params: Map[String, String]): DataFrame = {
23 |     throw new RuntimeException(params("msg"))
24 |   }
25 | 
26 |   override def batchPredict(df: DataFrame, path: String, params: Map[String, String]): DataFrame = train(df, path, params)
27 | 
28 |   override def skipPathPrefix: Boolean = true
29 | 
30 |   override def load(sparkSession: SparkSession, path: String, params: Map[String, String]): Any = throw new MLSQLException(s"${getClass.getName} not support register ")
31 | 
32 |   override def predict(sparkSession: SparkSession, _model: Any, name: String, params: Map[String, String]): UserDefinedFunction = throw new MLSQLException(s"${getClass.getName} not support register ")
33 | 
34 |   override def supportedVersions: Seq[String] = MLSQLAssert.versions
35 | 
36 |   override def auth(etMethod: ETMethod, path: String, params: Map[String, String]): List[TableAuthResult] = {
37 |     List()
38 |   }
39 | 
40 |   override def modelType: ModelType = ProcessType
41 | 
42 |   override def doc: Doc = Doc(HtmlDoc,
43 |     """
44 |       |
45 |       | This ET is used to stop the execute of the script.
46 |     """.stripMargin)
47 | 
48 | 
49 |   override def codeExample: Code = Code(SQLCode,
50 |     """
51 |       |
52 |       |!throw "exception msg";
53 |       |run command as Throw.`` where msg="";
54 |     """.stripMargin)
55 | }
56 | 


--------------------------------------------------------------------------------
/mlsql-bigdl/.repo/desc.template.plugin:
--------------------------------------------------------------------------------
 1 | moduleName=mlsql-bigdl-{{spark_binary_version}}
 2 | mainClass=tech.mlsql.plugins.bigdl.BigDLApp
 3 | scala_version={{scala_binary_version}}
 4 | spark_version={{spark_binary_version}}
 5 | version=0.1.0-SNAPSHOT
 6 | author=allwefantasy
 7 | mlsqlVersions="1.5.0,1.5.0-SNAPSHOT,1.6.0,1.6.0-SNAPSHOT"
 8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-bigdl
 9 | mlsqlPluginType=app
10 | desc=bigdl
11 | 
12 | 
13 | 
14 | 
15 | 
16 | 
17 | 


--------------------------------------------------------------------------------
/mlsql-bigdl/README.md:
--------------------------------------------------------------------------------
 1 | ## Install
 2 | 
 3 | ```
 4 | !plugin app add - "mlsql-bigdl-2.4";
 5 | ```
 6 | 
 7 | ## Usage
 8 | 
 9 | Check this [Doc](http://docs.mlsql.tech/zh/dl/load_image.html)
10 | 
11 | 
12 | 
13 | 
14 | 


--------------------------------------------------------------------------------
/mlsql-bigdl/desc.plugin:
--------------------------------------------------------------------------------
 1 | moduleName=mlsql-bigdl-3.0
 2 | mainClass=tech.mlsql.plugins.bigdl.BigDLApp
 3 | scala_version=2.12
 4 | spark_version=3.0
 5 | version=0.1.0-SNAPSHOT
 6 | author=allwefantasy
 7 | mlsqlVersions="1.5.0,1.5.0-SNAPSHOT,1.6.0,1.6.0-SNAPSHOT"
 8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-bigdl
 9 | mlsqlPluginType=app
10 | desc=bigdl
11 | 
12 | 
13 | 
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/mlsql-bigdl/src/main/java/com/intel/analytics/bigdl/visualization/LogTrainSummary.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | 
19 | package com.intel.analytics.bigdl.visualization
20 | 
21 | import org.apache.spark.internal.Logging
22 | import streaming.log.WowLog
23 | 
24 | 
25 | class LogTrainSummary(logDir: String,
26 |                       appName: String) extends TrainSummary(logDir, appName) with Logging with WowLog {
27 | 
28 |   override def addScalar(tag: String, value: Float, step: Long): LogTrainSummary.this.type = {
29 |     //    tag match {
30 |     //      case "Throughput" =>
31 |     //        logInfo(format(s"global step: ${step}  Throughput is ${value} records/second. "))
32 |     //      case "Loss" =>
33 |     //        logInfo(format(s"global step: ${step}  Loss is ${value}"))
34 |     //      case _ =>
35 |     //        logInfo(format(s"global step: ${step}  ${tag} is ${value}"))
36 |     //    }
37 | 
38 |     super.addScalar(tag, value, step)
39 |   }
40 | }
41 | 
42 | class LogValidateSummary(logDir: String,
43 |                          appName: String) extends ValidationSummary(logDir, appName) with Logging with WowLog {
44 |   override def addScalar(tag: String, value: Float, step: Long): LogValidateSummary.this.type = {
45 |     //logInfo(format(s"global step: ${step}  ${tag} is ${value}"))
46 |     super.addScalar(tag, value, step)
47 |   }
48 | }
49 | 


--------------------------------------------------------------------------------
/mlsql-bigdl/src/main/java/com/intel/analytics/bigdl/visualization/WowFileWriter.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | 
19 | package com.intel.analytics.bigdl.visualization
20 | 
21 | import com.intel.analytics.bigdl.visualization.tensorboard.FileWriter
22 | 
23 | 
24 | class WowFileWriter(folder: String) extends FileWriter(folder) {
25 | 
26 | }
27 | 


--------------------------------------------------------------------------------
/mlsql-bigdl/src/main/java/tech/mlsql/plugins/bigdl/BigDLApp.scala:
--------------------------------------------------------------------------------
 1 | package tech.mlsql.plugins.bigdl
 2 | 
 3 | import tech.mlsql.ets.register.ETRegister
 4 | import tech.mlsql.version.VersionCompatibility
 5 | 
 6 | /**
 7 |  * 5/4/2020 WilliamZhu(allwefantasy@gmail.com)
 8 |  */
 9 | class BigDLApp extends tech.mlsql.app.App with VersionCompatibility {
10 |   override def run(args: Seq[String]): Unit = {
11 |     ETRegister.register("ImageLoaderExt", classOf[SQLImageLoaderExt].getName)
12 |     ETRegister.register("MnistLoaderExt", classOf[SQLMnistLoaderExt].getName)
13 |     ETRegister.register("BigDLClassifyExt", classOf[SQLBigDLClassifyExt].getName)
14 |     ETRegister.register("LeNet5Ext", classOf[SQLLeNet5Ext].getName)
15 |   }
16 | 
17 |   override def supportedVersions: Seq[String] = Seq("1.5.0-SNAPSHOT", "1.5.0", "1.6.0-SNAPSHOT", "1.6.0")
18 | }
19 | 
20 | 


--------------------------------------------------------------------------------
/mlsql-bigdl/src/main/java/tech/mlsql/plugins/bigdl/WowClassNLLCriterion.scala:
--------------------------------------------------------------------------------
 1 | package tech.mlsql.plugins.bigdl
 2 | 
 3 | import com.intel.analytics.bigdl.nn.ClassNLLCriterion
 4 | import com.intel.analytics.bigdl.tensor.Tensor
 5 | import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric
 6 | import streaming.dsl.mmlib.algs.bigdl.ClassWeightParamExtractor
 7 | 
 8 | 
 9 | object WowClassNLLCriterion {
10 |   def apply(
11 |              paramsExtractor: ClassWeightParamExtractor
12 |            )(implicit ev: TensorNumeric[Float]): ClassNLLCriterion[Float] = {
13 |     val weights = paramsExtractor.weights.map(f => Tensor(f, Array(f.size))).getOrElse(null)
14 |     new ClassNLLCriterion[Float](weights,
15 |       paramsExtractor.sizeAverage.getOrElse(true),
16 |       paramsExtractor.logProbAsInput.getOrElse(true),
17 |       paramsExtractor.paddingValue.getOrElse(-1)
18 |     )
19 |   }
20 | }
21 | 


--------------------------------------------------------------------------------
/mlsql-canal/.repo/desc.template.plugin:
--------------------------------------------------------------------------------
 1 | moduleName=mlsql-canal-{{spark_binary_version}}
 2 | mainClass=tech.mlsql.plugins.canal.CanalApp
 3 | scala_version={{scala_binary_version}}
 4 | spark_version={{spark_binary_version}}
 5 | version=0.1.0-SNAPSHOT
 6 | author=zml1206
 7 | mlsqlVersions=""
 8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-canal
 9 | mlsqlPluginType=app
10 | desc=mlsql-canal
11 | 
12 | 
13 | 
14 | 
15 | 
16 | 
17 | 


--------------------------------------------------------------------------------
/mlsql-canal/.repo/pom.template.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
 3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 5 |     <parent>
 6 |         <artifactId>mlsql-plugins-{{spark_binary_version}}_{{scala_binary_version}}</artifactId>
 7 |         <groupId>tech.mlsql</groupId>
 8 |         <version>0.1.0-SNAPSHOT</version>
 9 |     </parent>
10 |     <modelVersion>4.0.0</modelVersion>
11 | 
12 |     <artifactId>mlsql-canal-{{spark_binary_version}}_{{scala_binary_version}}</artifactId>
13 |     <dependencies>
14 | 
15 |     </dependencies>
16 | 
17 |     <profiles>
18 |         <profile>
19 |             <id>shade</id>
20 |             <build>
21 |                 <plugins>
22 |                     <plugin>
23 |                         <groupId>org.apache.maven.plugins</groupId>
24 |                         <artifactId>maven-shade-plugin</artifactId>
25 |                         <version>3.2.0</version>
26 |                         <configuration>
27 |                             <filters>
28 |                                 <filter>
29 |                                     <artifact>*:*</artifact>
30 |                                     <excludes>
31 |                                         <exclude>META-INF/*.SF</exclude>
32 |                                         <exclude>META-INF/*.DSA</exclude>
33 |                                         <exclude>META-INF/*.RSA</exclude>
34 |                                     </excludes>
35 |                                 </filter>
36 |                             </filters>
37 |                             <createDependencyReducedPom>false</createDependencyReducedPom>
38 |                             <relocations>
39 |                                 <relocation>
40 |                                     <pattern>org.apache.poi</pattern>
41 |                                     <shadedPattern>shadeio.poi</shadedPattern>
42 |                                 </relocation>
43 |                                 <relocation>
44 |                                     <pattern>com.norbitltd.spoiwo</pattern>
45 |                                     <shadedPattern>shadeio.spoiwo</shadedPattern>
46 |                                 </relocation>
47 |                                 <relocation>
48 |                                     <pattern>com.github.pjfanning</pattern>
49 |                                     <shadedPattern>shadeio.pjfanning</shadedPattern>
50 |                                 </relocation>
51 |                                 <relocation>
52 |                                     <pattern>org.apache.commons.compress</pattern>
53 |                                     <shadedPattern>shadeio.commons.compress</shadedPattern>
54 |                                 </relocation>
55 |                             </relocations>
56 |                         </configuration>
57 | 
58 |                         <executions>
59 |                             <execution>
60 |                                 <phase>package</phase>
61 |                                 <goals>
62 |                                     <goal>shade</goal>
63 |                                 </goals>
64 |                             </execution>
65 |                         </executions>
66 |                     </plugin>
67 |                 </plugins>
68 |             </build>
69 |         </profile>
70 |     </profiles>
71 | 
72 | </project>


--------------------------------------------------------------------------------
/mlsql-canal/README.md:
--------------------------------------------------------------------------------
 1 | mlsql-canal
 2 | 
 3 | Used in streaming, parse canal binlog, store it to delta lake, support ddl.
 4 | Only support spark 3.X.
 5 | 
 6 | ## Install
 7 | 
 8 | ```
 9 | !plugin ds add - "mlsql-canal-3.0";
10 | ```
11 | 
12 | or install as app:
13 | 
14 | ```
15 | !plugin app add "tech.mlsql.plugins.canal.CanalApp" "mlsql-canal-3.0";
16 | ```
17 | 
18 | 
19 | ## Usage
20 | 
21 | ```sql
22 | set streamName="binlog_to_delta";
23 | 
24 | load kafka.`binlog-canal_test`
25 | options `kafka.bootstrap.servers` = "***"
26 |  and `maxOffsetsPerTrigger`="600000"
27 | as kafka_record;
28 | 
29 | select cast(value as string) as value from kafka_record
30 | as kafka_value;
31 | 
32 | save append kafka_value
33 | as custom.``
34 | options mode = "Append"
35 | and duration = "20"
36 | and sourceTable = "kafka_value"
37 | and checkpointLocation = "checkpoint/binlog_to_delta"
38 | and code = '''
39 | run kafka_value
40 | as BinlogToDelta.``
41 | options dbTable = "canal_test.test";
42 | ''';
43 | ```


--------------------------------------------------------------------------------
/mlsql-canal/desc.plugin:
--------------------------------------------------------------------------------
 1 | moduleName=mlsql-canal-3.0
 2 | mainClass=tech.mlsql.plugins.canal.CanalApp
 3 | scala_version=2.12
 4 | spark_version=3.0
 5 | version=0.1.0-SNAPSHOT
 6 | author=zml1206
 7 | mlsqlVersions=""
 8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-canal
 9 | mlsqlPluginType=app
10 | desc=mlsql-canal
11 | 
12 | 
13 | 
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/mlsql-canal/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
 3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 5 |     <parent>
 6 |         <artifactId>mlsql-plugins-3.0_2.12</artifactId>
 7 |         <groupId>tech.mlsql</groupId>
 8 |         <version>0.1.0-SNAPSHOT</version>
 9 |     </parent>
10 |     <modelVersion>4.0.0</modelVersion>
11 | 
12 |     <artifactId>mlsql-canal-3.0_2.12</artifactId>
13 |     <dependencies>
14 | 
15 |     </dependencies>
16 | 
17 |     <profiles>
18 |         <profile>
19 |             <id>shade</id>
20 |             <build>
21 |                 <plugins>
22 |                     <plugin>
23 |                         <groupId>org.apache.maven.plugins</groupId>
24 |                         <artifactId>maven-shade-plugin</artifactId>
25 |                         <version>3.2.0</version>
26 |                         <configuration>
27 |                             <filters>
28 |                                 <filter>
29 |                                     <artifact>*:*</artifact>
30 |                                     <excludes>
31 |                                         <exclude>META-INF/*.SF</exclude>
32 |                                         <exclude>META-INF/*.DSA</exclude>
33 |                                         <exclude>META-INF/*.RSA</exclude>
34 |                                     </excludes>
35 |                                 </filter>
36 |                             </filters>
37 |                             <createDependencyReducedPom>false</createDependencyReducedPom>
38 |                             <relocations>
39 |                                 <relocation>
40 |                                     <pattern>org.apache.poi</pattern>
41 |                                     <shadedPattern>shadeio.poi</shadedPattern>
42 |                                 </relocation>
43 |                                 <relocation>
44 |                                     <pattern>com.norbitltd.spoiwo</pattern>
45 |                                     <shadedPattern>shadeio.spoiwo</shadedPattern>
46 |                                 </relocation>
47 |                                 <relocation>
48 |                                     <pattern>com.github.pjfanning</pattern>
49 |                                     <shadedPattern>shadeio.pjfanning</shadedPattern>
50 |                                 </relocation>
51 |                                 <relocation>
52 |                                     <pattern>org.apache.commons.compress</pattern>
53 |                                     <shadedPattern>shadeio.commons.compress</shadedPattern>
54 |                                 </relocation>
55 |                             </relocations>
56 |                         </configuration>
57 | 
58 |                         <executions>
59 |                             <execution>
60 |                                 <phase>package</phase>
61 |                                 <goals>
62 |                                     <goal>shade</goal>
63 |                                 </goals>
64 |                             </execution>
65 |                         </executions>
66 |                     </plugin>
67 |                 </plugins>
68 |             </build>
69 |         </profile>
70 |     </profiles>
71 | 
72 | </project>


--------------------------------------------------------------------------------
/mlsql-canal/src/main/scala/tech/mlsql/plugins/canal/CanalApp.scala:
--------------------------------------------------------------------------------
 1 | package tech.mlsql.plugins.canal
 2 | 
 3 | import tech.mlsql.ets.register.ETRegister
 4 | import tech.mlsql.version.VersionCompatibility
 5 | 
 6 | /**
 7 |   * Created by zhuml on 2021/6/11.
 8 |   */
 9 | class CanalApp extends tech.mlsql.app.App with VersionCompatibility {
10 | 
11 |   override def run(args: Seq[String]): Unit = {
12 |     ETRegister.register("BinlogToDelta", "tech.mlsql.plugins.canal.ets.BinlogToDelta")
13 |   }
14 | 
15 |   override def supportedVersions: Seq[String] = Seq("1.6.0-SNAPSHOT")
16 | 
17 | }
18 | 


--------------------------------------------------------------------------------
/mlsql-canal/src/main/scala/tech/mlsql/plugins/canal/ets/BinlogToDelta.scala:
--------------------------------------------------------------------------------
 1 | package tech.mlsql.plugins.canal.ets
 2 | 
 3 | import org.apache.spark.ml.param.Param
 4 | import org.apache.spark.sql.expressions.UserDefinedFunction
 5 | import org.apache.spark.sql.mlsql.session.MLSQLException
 6 | import org.apache.spark.sql.{DataFrame, SparkSession}
 7 | import streaming.dsl.mmlib._
 8 | import streaming.dsl.mmlib.algs.param.WowParams
 9 | import tech.mlsql.common.utils.log.Logging
10 | import tech.mlsql.plugins.canal.sink.{BinlogWritter, DeltaSink}
11 | 
12 | /**
13 |   * Created by zhuml on 2021/6/11.
14 |   */
15 | class BinlogToDelta(override val uid: String) extends SQLAlg with WowParams with Logging {
16 | 
17 |   override def train(df: DataFrame, path: String, params: Map[String, String]): DataFrame = {
18 | 
19 |     val spark = df.sparkSession
20 | 
21 |     params.get(dbTable.name)
22 |       .map(m => set(dbTable, m)).getOrElse {
23 |       throw new MLSQLException(s"${dbTable.name} is required")
24 |     }
25 |     params.get(maxTs.name)
26 |       .map(m => set(maxTs, m)).getOrElse {
27 |       set(maxTs, "0")
28 |     }
29 | 
30 |     val sink = new DeltaSink(spark, $(dbTable))
31 |     new BinlogWritter(sink, df, $(maxTs).toLong).write
32 | 
33 |     spark.emptyDataFrame
34 |   }
35 | 
36 |   override def load(sparkSession: SparkSession, path: String, params: Map[String, String]): Any = {
37 |     throw new RuntimeException(s"${
38 |       getClass.getName
39 |     } not support load function.")
40 |   }
41 | 
42 |   override def predict(sparkSession: SparkSession, _model: Any, name: String, params: Map[String,
43 |     String]): UserDefinedFunction = {
44 |     throw new RuntimeException(s"${
45 |       getClass.getName
46 |     } not support predict function.")
47 |   }
48 | 
49 |   override def explainParams(sparkSession: SparkSession): DataFrame = {
50 |     _explainParams(sparkSession)
51 |   }
52 | 
53 |   final val dbTable: Param[String] = new Param[String](this, "dbTable", "db.table")
54 |   final val maxTs: Param[String] = new Param[String](this, "maxTs", "delta table max ts")
55 | 
56 |   override def doc: Doc = Doc(MarkDownDoc,
57 |     """
58 |       |BinlogToDelta CDC数据变更捕获解析同步delta模块
59 |       |
60 |       |```sql
61 |       |run table as BinlogToDelta.``
62 |       |options daTable="a.b"
63 |       |   as t;
64 |       |```
65 |       |
66 | """.stripMargin)
67 | 
68 |   override def modelType: ModelType = ProcessType
69 | 
70 |   def this() = this(WowParams.randomUID())
71 | }
72 | 
73 | 
74 | 


--------------------------------------------------------------------------------
/mlsql-canal/src/main/scala/tech/mlsql/plugins/canal/mysql/JdbcTypeParser.scala:
--------------------------------------------------------------------------------
 1 | package tech.mlsql.plugins.canal.mysql
 2 | 
 3 | import com.alibaba.druid.sql.ast.{SQLDataType, SQLDataTypeImpl}
 4 | import org.apache.spark.sql.types.{DataType, DecimalType, StructField, StructType}
 5 | 
 6 | /**
 7 |   * Created by zhuml on 2021/6/11.
 8 |   */
 9 | object JdbcTypeParser {
10 | 
11 |   val UNSIGNED = """.*(unsigned)""".r
12 | 
13 |   // 判断是否为有符号数
14 |   def isSigned(typeName: String) = {
15 |     typeName.trim match {
16 |       case UNSIGNED(unsigned) => false
17 |       case _ => true
18 |     }
19 |   }
20 | 
21 |   val FIXED_DECIMAL = """decimal\(\s*(\d+)\s*,\s*(\-?\d+)\s*\)""".r
22 |   val FIXED_NUMERIC = """numeric\(\s*(\d+)\s*,\s*(\-?\d+)\s*\)""".r
23 |   val FIXED_SCALE = """\w*\(\s*(\d+)\s*\)""".r
24 | 
25 | 
26 |   // decimal/numeric 数据类型 具有precision固定精度（最大位数）和scale小数位数（点右侧的位数）的十进制数。
27 |   def parsePrecisionScale(name: String) = {
28 |     name match {
29 |       case "decimal" | "numeric" => Array(DecimalType.SYSTEM_DEFAULT.precision, DecimalType.SYSTEM_DEFAULT.scale)
30 |       case FIXED_DECIMAL(precision, scale) => Array(precision.toInt, scale.toInt)
31 |       case FIXED_NUMERIC(precision, scale) => Array(precision.toInt, scale.toInt)
32 |       case FIXED_SCALE(scale) => Array(scale.toInt, 0)
33 |       case _ => Array(0, 0)
34 |     }
35 |   }
36 | 
37 |   def getMysqlStructType(sqlTypeMap: Map[String, Int], mysqlTypeMap: Map[String, String]): StructType = {
38 | 
39 |     val fields = mysqlTypeMap.map(k => {
40 |       val sqlType = sqlTypeMap(k._1)
41 |       val Array(precision, scale) = parsePrecisionScale(k._2)
42 |       val signed = isSigned(k._2)
43 |       val columnType = getCatalystTypePrivate(sqlType, precision, scale, signed).asInstanceOf[DataType]
44 |       StructField(k._1, columnType)
45 |     }).toArray
46 |     new StructType(fields)
47 |   }
48 | 
49 |   def getSqlTypeCode(name: String): Integer = {
50 |     val _type = """\w*""".r.findFirstIn(name).getOrElse("").toUpperCase
51 |     MysqlType.valueOf(_type).getVendorTypeNumber
52 |   }
53 | 
54 |   def sqlTypeToDataType(sqlDataType: SQLDataType): DataType = {
55 |     val name = sqlDataType.getName
56 |     val Array(precision, scale) = parsePrecisionScale(name)
57 |     val sqlType = getSqlTypeCode(name)
58 |     getCatalystTypePrivate(sqlType, precision, scale, !sqlDataType.asInstanceOf[SQLDataTypeImpl].isUnsigned).asInstanceOf[DataType]
59 |   }
60 | 
61 |   // JDBC type to Catalyst type
62 |   lazy val getCatalystTypePrivate = {
63 |     import scala.reflect.runtime.{universe => ru}
64 |     val classMirror = ru.runtimeMirror(getClass.getClassLoader)
65 |     val JdbcUtils = classMirror.staticModule("org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils")
66 |     val methods = classMirror.reflectModule(JdbcUtils)
67 |     val instanceMirror = classMirror.reflect(methods.instance)
68 |     val method = methods.symbol.typeSignature.member(ru.TermName("getCatalystType")).asMethod
69 | 
70 |     instanceMirror.reflectMethod(method)
71 |   }
72 | 
73 | 
74 | }
75 | 


--------------------------------------------------------------------------------
/mlsql-canal/src/main/scala/tech/mlsql/plugins/canal/sink/BinlogConstants.scala:
--------------------------------------------------------------------------------
 1 | package tech.mlsql.plugins.canal.sink
 2 | 
 3 | /**
 4 |   * Created by zhuml on 2021/6/11.
 5 |   */
 6 | object BinlogConstants {
 7 | 
 8 |   val TS_FIELD = "___ts___"
 9 |   val DELETE_FIELD = "___delete___"
10 | }
11 | 
12 | case class BinlogRecord(data: Array[Map[String, String]],
13 |                         database: String,
14 |                         es: String,
15 |                         id: Long,
16 |                         isDdl: Boolean,
17 |                         mysqlType: Map[String, String],
18 |                         old: Array[Map[String, String]],
19 |                         pkNames: Array[String],
20 |                         sql: String,
21 |                         sqlType: Map[String, Int],
22 |                         table: String,
23 |                         ts: Long,
24 |                         `type`: String)
25 | 


--------------------------------------------------------------------------------
/mlsql-canal/src/main/scala/tech/mlsql/plugins/canal/sink/BinlogWritter.scala:
--------------------------------------------------------------------------------
 1 | package tech.mlsql.plugins.canal.sink
 2 | 
 3 | import org.apache.spark.sql.DataFrame
 4 | import tech.mlsql.plugins.canal.mysql.statement.DDLStatementParser
 5 | import tech.mlsql.plugins.canal.util.JacksonUtil
 6 | 
 7 | /**
 8 |   * Created by zhuml on 2021/6/11.
 9 |   */
10 | class BinlogWritter(@transient sink: Sink, df: DataFrame, maxTs: Long) extends Serializable {
11 | 
12 |   val spark = df.sparkSession
13 | 
14 |   def write = {
15 |     sink.addTsIfNotExsit
16 |     val filterDF = filter()
17 |     //segment merge by ddl
18 |     val ddls = filterDF.filter(r => r.isDdl && Array("ALTER", "TRUNCATE").contains(r.`type`)).collect()
19 |     val dmlDS = filterDF.filter(r => !r.isDdl && Array("INSERT", "UPDATE", "DELETE").contains(r.`type`.toUpperCase))
20 |     var tsMin = 0L
21 |     var tsMax = 0L
22 |     ddls.foreach(ddl => {
23 |       val ddlParser = new DDLStatementParser(sink.tableLoad, ddl.sql)
24 |       ddlParser.parseDF()
25 |       if (ddlParser.isUpdate) {
26 |         tsMax = ddl.ts
27 |         sink.mergeData(dmlDS.filter(r => r.ts >= tsMin && r.ts < tsMax))
28 |         sink.updateSchema(ddlParser.df)
29 |         tsMin = tsMax
30 |       }
31 |     })
32 |     sink.mergeData(dmlDS.filter(r => r.ts >= tsMin))
33 |   }
34 | 
35 |   def filter() = {
36 |     import spark.implicits._
37 |     val table = sink.table
38 |     df.map(r => JacksonUtil.fromJson(r.getString(0), classOf[BinlogRecord]))
39 |       .filter(r => r.ts >= maxTs
40 |         && s"${r.database}.${r.table}".equals(table))
41 |   }
42 | }
43 | 


--------------------------------------------------------------------------------
/mlsql-canal/src/main/scala/tech/mlsql/plugins/canal/sink/DeltaSink.scala:
--------------------------------------------------------------------------------
 1 | package tech.mlsql.plugins.canal.sink
 2 | 
 3 | import io.delta.tables.DeltaTable
 4 | import org.apache.spark.sql.{DataFrame, Dataset, SparkSession}
 5 | import tech.mlsql.common.utils.path.PathFun
 6 | import tech.mlsql.datalake.DataLake
 7 | 
 8 | /**
 9 |   * Created by zhuml on 2021/6/11.
10 |   */
11 | class DeltaSink(spark: SparkSession, dbTable: String) extends Sink(dbTable: String) {
12 | 
13 |   val dataLake = new DataLake(spark)
14 | 
15 |   val finalPath = if (dataLake.isEnable) {
16 |     dataLake.identifyToPath(dbTable)
17 |   } else {
18 |     PathFun(dbTable).add(dbTable).toPath
19 |   }
20 | 
21 |   override def tableLoad() = spark.read.format("delta").load(finalPath)
22 | 
23 |   override def updateSchema(df: DataFrame): Unit = {
24 |     df.write
25 |       .format("delta")
26 |       .mode("overwrite")
27 |       .option("overwriteSchema", "true")
28 |       .save(finalPath)
29 |   }
30 | 
31 |   override def mergeData(ds: Dataset[BinlogRecord]): Unit = {
32 |     val records = ds.take(1)
33 |     if (records.length > 0) {
34 |       val record = records(0)
35 |       val schema = tableLoad.drop(BinlogConstants.TS_FIELD).schema
36 |       val changesDF = duplicate(ds, schema)
37 |       mergeToDelta(changesDF, record.pkNames, BinlogConstants.DELETE_FIELD)
38 |     }
39 |   }
40 | 
41 |   def mergeToDelta(df: DataFrame, pkNames: Array[String], deleteField: String): Unit = {
42 |     val deltaTable = DeltaTable.forPath(spark, finalPath)
43 |     val condition = pkNames.map(pk => s"s.${pk} = t.${pk}").mkString(" and ")
44 |     deltaTable.as("t")
45 |       .merge(
46 |         df.as("s"), condition)
47 |       .whenMatched(s"s.${deleteField} = true")
48 |       .delete()
49 |       .whenMatched().updateAll()
50 |       .whenNotMatched(s"s.${deleteField} = false").insertAll()
51 |       .execute()
52 |   }
53 | }
54 | 


--------------------------------------------------------------------------------
/mlsql-canal/src/main/scala/tech/mlsql/plugins/canal/sink/Sink.scala:
--------------------------------------------------------------------------------
 1 | package tech.mlsql.plugins.canal.sink
 2 | 
 3 | import org.apache.spark.sql.functions._
 4 | import org.apache.spark.sql.types._
 5 | import org.apache.spark.sql.{DataFrame, Dataset, functions => F}
 6 | import tech.mlsql.plugins.canal.util.JacksonUtil
 7 | 
 8 | /**
 9 |   * Created by zhuml on 2021/6/11.
10 |   */
11 | abstract class Sink(val table: String) {
12 | 
13 |   def tableLoad: DataFrame
14 | 
15 |   def updateSchema(df: DataFrame)
16 | 
17 |   def mergeData(ds: Dataset[BinlogRecord])
18 | 
19 |   def addTsIfNotExsit = {
20 |     {
21 |       val df = tableLoad
22 |       if (!df.schema.fieldNames.contains(BinlogConstants.TS_FIELD)) {
23 |         updateSchema(df.withColumn(BinlogConstants.TS_FIELD, typedLit[Long](0)))
24 |       }
25 |     }
26 |   }
27 | 
28 |   // duplicate binlog and parser data
29 |   def duplicate(df: Dataset[BinlogRecord],
30 |                 schema: StructType): DataFrame = {
31 |     import df.sparkSession.implicits._
32 |     val schemaMap = schema.fields.map(s => s.name -> s.dataType).toMap
33 | 
34 |     val f = F.udf((dataJson: String) => {
35 |       val dataMap = JacksonUtil.fromJson(dataJson, classOf[Map[String, String]])
36 |         .map(data => {
37 |           if (data._2 != null) {
38 |             schemaMap.get(data._1) match {
39 |               case Some(IntegerType) => (data._1, data._2.toInt)
40 |               case Some(LongType) => (data._1, data._2.toLong)
41 |               case Some(DoubleType) => (data._1, data._2.toDouble)
42 |               case Some(FloatType) => (data._1, data._2.toFloat)
43 |               case _ => data
44 |             }
45 |           } else {
46 |             data
47 |           }
48 |         })
49 |       JacksonUtil.toJson(dataMap)
50 |     })
51 | 
52 |     df.flatMap(r => {
53 |       r.data.map(data => {
54 |         (r.pkNames.map(data.get(_)), (r.ts, r.`type`, JacksonUtil.toJson(data)))
55 |       })
56 |     }).groupBy("_1").agg(max("_2").as("latest"))
57 |       .withColumn(("data"), f(F.col("latest._3")))
58 |       .select(from_json($"data", schema).as("data"), $"latest._1".as(BinlogConstants.TS_FIELD), $"latest._2".as(BinlogConstants.DELETE_FIELD))
59 |       .selectExpr("data.*", s"${BinlogConstants.TS_FIELD}", s"if(${BinlogConstants.DELETE_FIELD}='DELETE',true,false) as ${BinlogConstants.DELETE_FIELD}")
60 |   }
61 | }
62 | 


--------------------------------------------------------------------------------
/mlsql-canal/src/main/scala/tech/mlsql/plugins/canal/util/JacksonUtil.scala:
--------------------------------------------------------------------------------
 1 | package tech.mlsql.plugins.canal.util
 2 | 
 3 | import com.fasterxml.jackson.databind.ObjectMapper
 4 | import com.fasterxml.jackson.module.scala.DefaultScalaModule
 5 | 
 6 | import scala.util.control.NonFatal
 7 | 
 8 | object JacksonUtil {
 9 | 
10 |   private val _mapper = new ObjectMapper()
11 |   _mapper.registerModule(DefaultScalaModule)
12 | 
13 |   def toJson[T](obj: T): String = {
14 |     _mapper.writeValueAsString(obj)
15 |   }
16 | 
17 |   def fromJson[T](json: String, `class`: Class[T]): T = {
18 |     try {
19 |       _mapper.readValue(json, `class`)
20 |     } catch {
21 |       case NonFatal(e) =>
22 |         null.asInstanceOf[T]
23 |     }
24 |   }
25 | 
26 |   def prettyPrint[T](obj: T): String = {
27 |     _mapper.writerWithDefaultPrettyPrinter().writeValueAsString(obj)
28 |   }
29 | 
30 | }
31 | 


--------------------------------------------------------------------------------
/mlsql-cli/.repo/desc.template.plugin:
--------------------------------------------------------------------------------
 1 | moduleName=mlsql-cli-{{spark_binary_version}}
 2 | mainClass=tech.mlsql.plugins.cli.app.MLSQLCli
 3 | scala_version={{scala_binary_version}}
 4 | spark_version={{spark_binary_version}}
 5 | version=0.1.0-SNAPSHOT
 6 | author=allwefantasy
 7 | mlsqlVersions=""
 8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-cli
 9 | mlsqlPluginType=app
10 | desc=mlsql-cli
11 | 
12 | 
13 | 
14 | 
15 | 
16 | 
17 | 


--------------------------------------------------------------------------------
/mlsql-cli/desc.plugin:
--------------------------------------------------------------------------------
 1 | moduleName=mlsql-cli-3.0
 2 | mainClass=tech.mlsql.plugins.cli.app.MLSQLCli
 3 | scala_version=2.12
 4 | spark_version=3.0
 5 | version=0.1.0-SNAPSHOT
 6 | author=allwefantasy
 7 | mlsqlVersions=""
 8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-cli
 9 | mlsqlPluginType=app
10 | desc=mlsql-cli
11 | 
12 | 
13 | 
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/mlsql-cli/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
 3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 5 |     <parent>
 6 |         <artifactId>mlsql-plugins-3.0_2.12</artifactId>
 7 |         <groupId>tech.mlsql</groupId>
 8 |         <version>0.1.0-SNAPSHOT</version>
 9 |     </parent>
10 |     <modelVersion>4.0.0</modelVersion>
11 | 
12 |     <artifactId>mlsql-cli-3.0_2.12</artifactId>
13 |  
14 |     <profiles>
15 |         <profile>
16 |             <id>shade</id>
17 |             <build>
18 |                 <plugins>
19 |                     <plugin>
20 |                         <groupId>org.apache.maven.plugins</groupId>
21 |                         <artifactId>maven-shade-plugin</artifactId>
22 |                         <version>3.2.0</version>
23 |                         <configuration>
24 |                             <filters>
25 |                                 <filter>
26 |                                     <artifact>*:*</artifact>
27 |                                     <excludes>
28 |                                         <exclude>META-INF/*.SF</exclude>
29 |                                         <exclude>META-INF/*.DSA</exclude>
30 |                                         <exclude>META-INF/*.RSA</exclude>
31 |                                     </excludes>
32 |                                 </filter>
33 |                             </filters>
34 |                             <createDependencyReducedPom>false</createDependencyReducedPom>
35 |                             <relocations>
36 |                                 <relocation>
37 |                                     <pattern>org.apache.poi</pattern>
38 |                                     <shadedPattern>shadeio.poi</shadedPattern>
39 |                                 </relocation>
40 |                                 <relocation>
41 |                                     <pattern>com.norbitltd.spoiwo</pattern>
42 |                                     <shadedPattern>shadeio.spoiwo</shadedPattern>
43 |                                 </relocation>
44 |                                 <relocation>
45 |                                     <pattern>com.github.pjfanning</pattern>
46 |                                     <shadedPattern>shadeio.pjfanning</shadedPattern>
47 |                                 </relocation>
48 |                                 <relocation>
49 |                                     <pattern>org.apache.commons.compress</pattern>
50 |                                     <shadedPattern>shadeio.commons.compress</shadedPattern>
51 |                                 </relocation>
52 |                             </relocations>
53 |                         </configuration>
54 | 
55 |                         <executions>
56 |                             <execution>
57 |                                 <phase>package</phase>
58 |                                 <goals>
59 |                                     <goal>shade</goal>
60 |                                 </goals>
61 |                             </execution>
62 |                         </executions>
63 |                     </plugin>
64 |                 </plugins>
65 |             </build>
66 |         </profile>
67 |     </profiles>
68 | 
69 |     <dependencies>
70 |         <dependency>
71 |             <groupId>info.picocli</groupId>
72 |             <artifactId>picocli</artifactId>
73 |             <version>4.0.1</version>
74 |         </dependency>
75 |     </dependencies>
76 | 
77 | </project>


--------------------------------------------------------------------------------
/mlsql-cli/src/main/java/tech/mlsql/plugin/cli/app/CliCommands.java:
--------------------------------------------------------------------------------
 1 | package tech.mlsql.plugin.cli.app;
 2 | 
 3 | /**
 4 |  * 25/8/2021 WilliamZhu(allwefantasy@gmail.com)
 5 |  */
 6 | public class CliCommands {
 7 |     public static final String DEFAULT = "default";
 8 |     public static final String HELP = "help";
 9 |     public static final String VERSION = "version";
10 |     public static final String RUN = "run";
11 |     public static final String HOME = "home";
12 | }
13 | 


--------------------------------------------------------------------------------
/mlsql-cli/src/main/java/tech/mlsql/plugin/cli/app/CliException.java:
--------------------------------------------------------------------------------
 1 | package tech.mlsql.plugin.cli.app;
 2 | 
 3 | import java.util.ArrayList;
 4 | import java.util.List;
 5 | 
 6 | /**
 7 |  * 25/8/2021 WilliamZhu(allwefantasy@gmail.com)
 8 |  */
 9 | public class CliException extends RuntimeException {
10 |     private List<String> detailedMessages = new ArrayList<>();
11 | 
12 |     public List<String> getDetailedMessages() {
13 |         return detailedMessages;
14 |     }
15 | 
16 |     void addMessage(String message) {
17 |         detailedMessages.add(message);
18 |     }
19 | 
20 |     public List<String> getMessages() {
21 |         return detailedMessages;
22 |     }
23 | }
24 | 
25 | 
26 | 


--------------------------------------------------------------------------------
/mlsql-cli/src/main/java/tech/mlsql/plugin/cli/app/CliExceptionUtils.java:
--------------------------------------------------------------------------------
 1 | package tech.mlsql.plugin.cli.app;
 2 | 
 3 | /**
 4 |  * 25/8/2021 WilliamZhu(allwefantasy@gmail.com)
 5 |  */
 6 | public class CliExceptionUtils {
 7 |     public static CliException createUsageExceptionWithHelp(String errorMsg) {
 8 |         CliException launcherException = new CliException();
 9 |         launcherException.addMessage("mlsql: " + errorMsg);
10 |         launcherException.addMessage("Run 'mlsql help' for usage.");
11 |         return launcherException;
12 |     }
13 | }
14 | 


--------------------------------------------------------------------------------
/mlsql-cli/src/main/java/tech/mlsql/plugin/cli/app/MLSQLCli.java:
--------------------------------------------------------------------------------
 1 | package tech.mlsql.plugin.cli.app;
 2 | 
 3 | import picocli.CommandLine;
 4 | import tech.mlsql.core.version.MLSQLVersion;
 5 | import tech.mlsql.core.version.VersionInfo;
 6 | 
 7 | import java.io.PrintStream;
 8 | import java.util.ArrayList;
 9 | import java.util.List;
10 | 
11 | /**
12 |  * 25/8/2021 WilliamZhu(allwefantasy@gmail.com)
13 |  */
14 | public class MLSQLCli {
15 | 
16 |     private static PrintStream errStream = System.err;
17 |     private static PrintStream outStream = System.out;
18 | 
19 |     public static void main(String[] args) {
20 | 
21 |     }
22 | 
23 |     @CommandLine.Command(description = "Default Command.", name = "default")
24 |     private static class DefaultCmd implements MLSQLCmd {
25 | 
26 |         @CommandLine.Option(names = {"--help", "-h", "?"}, hidden = true, description = "for more information")
27 |         private boolean helpFlag;
28 | 
29 |         @CommandLine.Option(names = {"--version", "-v"}, hidden = true)
30 |         private boolean versionFlag;
31 | 
32 |         @CommandLine.Parameters(arity = "0..1")
33 |         private List<String> argList = new ArrayList<>();
34 | 
35 |         @Override
36 |         public void execute() {
37 |             if (versionFlag) {
38 |                 printVersionInfo();
39 |                 return;
40 |             }
41 | 
42 |             if (!argList.isEmpty()) {
43 |                 printUsageInfo(argList.get(0));
44 |                 return;
45 |             }
46 | 
47 |             printUsageInfo(CliCommands.HELP);
48 |         }
49 | 
50 |         @Override
51 |         public String getName() {
52 |             return "default";
53 |         }
54 | 
55 |         @Override
56 |         public void printLongDesc(StringBuilder out) {
57 | 
58 |         }
59 | 
60 |         @Override
61 |         public void printUsage(StringBuilder out) {
62 | 
63 |         }
64 | 
65 |         @Override
66 |         public void setParentCmdParser(CommandLine parentCmdParser) {
67 |         }
68 |     }
69 | 
70 |     private static void printUsageInfo(String commandName) {
71 |         String usageInfo = MLSQLCmd.getCommandUsageInfo(commandName);
72 |         errStream.println(usageInfo);
73 |     }
74 | 
75 |     private static void printVersionInfo() {
76 |         VersionInfo verison = MLSQLVersion.version();
77 |         String output = "MLSQL: " + verison.version() + "; Spark Core: None";
78 |         outStream.print(output);
79 |     }
80 | }
81 | 
82 | 
83 | 


--------------------------------------------------------------------------------
/mlsql-cli/src/main/java/tech/mlsql/plugin/cli/app/MLSQLCmd.java:
--------------------------------------------------------------------------------
 1 | package tech.mlsql.plugin.cli.app;
 2 | 
 3 | import picocli.CommandLine;
 4 | 
 5 | import java.io.BufferedReader;
 6 | import java.io.IOException;
 7 | import java.io.InputStream;
 8 | import java.io.InputStreamReader;
 9 | import java.nio.charset.StandardCharsets;
10 | 
11 | public interface MLSQLCmd {
12 | 
13 | 
14 |     void execute();
15 | 
16 | 
17 |     String getName();
18 | 
19 | 
20 |     void printLongDesc(StringBuilder out);
21 | 
22 | 
23 |     void printUsage(StringBuilder out);
24 | 
25 | 
26 |     void setParentCmdParser(CommandLine parentCmdParser);
27 | 
28 | 
29 |     static String getCommandUsageInfo(String commandName) {
30 |         if (commandName == null) {
31 |             throw CliExceptionUtils.createUsageExceptionWithHelp("invalid command");
32 |         }
33 | 
34 |         String fileName = "cli-help/mlsql-" + commandName + ".help";
35 |         try {
36 |             return readFileAsString(fileName);
37 |         } catch (IOException e) {
38 |             throw CliExceptionUtils.createUsageExceptionWithHelp("usage info not available for command: " + commandName);
39 |         }
40 |     }
41 | 
42 |     static String readFileAsString(String path) throws IOException {
43 |         InputStream is = ClassLoader.getSystemResourceAsStream(path);
44 |         InputStreamReader inputStreamREader = null;
45 |         BufferedReader br = null;
46 |         StringBuilder sb = new StringBuilder();
47 |         try {
48 |             inputStreamREader = new InputStreamReader(is, StandardCharsets.UTF_8);
49 |             br = new BufferedReader(inputStreamREader);
50 |             String content = br.readLine();
51 |             if (content == null) {
52 |                 return sb.toString();
53 |             }
54 | 
55 |             sb.append(content);
56 | 
57 |             while ((content = br.readLine()) != null) {
58 |                 sb.append('\n').append(content);
59 |             }
60 |         } finally {
61 |             if (inputStreamREader != null) {
62 |                 try {
63 |                     inputStreamREader.close();
64 |                 } catch (IOException ignore) {
65 |                 }
66 |             }
67 |             if (br != null) {
68 |                 try {
69 |                     br.close();
70 |                 } catch (IOException ignore) {
71 |                 }
72 |             }
73 |         }
74 |         return sb.toString();
75 |     }
76 | }


--------------------------------------------------------------------------------
/mlsql-cube/.repo/desc.template.plugin:
--------------------------------------------------------------------------------
 1 | moduleName=mlsql-cube-{{spark_binary_version}}
 2 | mainClass=tech.mlsql.plugins.shell.app.MLSQLCube
 3 | scala_version={{scala_binary_version}}
 4 | spark_version={{spark_binary_version}}
 5 | version=0.1.0-SNAPSHOT
 6 | author=allwefantasy
 7 | mlsqlVersions=""
 8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-cube
 9 | mlsqlPluginType=app
10 | desc=mlsql-shell
11 | 
12 | 
13 | 
14 | 
15 | 
16 | 
17 | 


--------------------------------------------------------------------------------
/mlsql-cube/.repo/pom.template.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
 3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 5 |     <parent>
 6 |         <artifactId>mlsql-plugins-{{spark_binary_version}}_{{scala_binary_version}}</artifactId>
 7 |         <groupId>tech.mlsql</groupId>
 8 |         <version>0.1.0-SNAPSHOT</version>
 9 |     </parent>
10 |     <modelVersion>4.0.0</modelVersion>
11 | 
12 |     <artifactId>mlsql-cube-{{spark_binary_version}}_{{scala_binary_version}}</artifactId>
13 |     <dependencies>
14 |        
15 | 
16 |     </dependencies>
17 | 
18 |     <profiles>
19 |         <profile>
20 |             <id>shade</id>
21 |             <build>
22 |                 <plugins>
23 |                     <plugin>
24 |                         <groupId>org.apache.maven.plugins</groupId>
25 |                         <artifactId>maven-shade-plugin</artifactId>
26 |                         <version>3.2.0</version>
27 |                         <configuration>
28 |                             <filters>
29 |                                 <filter>
30 |                                     <artifact>*:*</artifact>
31 |                                     <excludes>
32 |                                         <exclude>META-INF/*.SF</exclude>
33 |                                         <exclude>META-INF/*.DSA</exclude>
34 |                                         <exclude>META-INF/*.RSA</exclude>
35 |                                     </excludes>
36 |                                 </filter>
37 |                             </filters>
38 |                             <createDependencyReducedPom>false</createDependencyReducedPom>
39 |                             <relocations>
40 |                                 <relocation>
41 |                                     <pattern>org.apache.poi</pattern>
42 |                                     <shadedPattern>shadeio.poi</shadedPattern>
43 |                                 </relocation>
44 |                                 <relocation>
45 |                                     <pattern>com.norbitltd.spoiwo</pattern>
46 |                                     <shadedPattern>shadeio.spoiwo</shadedPattern>
47 |                                 </relocation>
48 |                                 <relocation>
49 |                                     <pattern>com.github.pjfanning</pattern>
50 |                                     <shadedPattern>shadeio.pjfanning</shadedPattern>
51 |                                 </relocation>
52 |                                 <relocation>
53 |                                     <pattern>org.apache.commons.compress</pattern>
54 |                                     <shadedPattern>shadeio.commons.compress</shadedPattern>
55 |                                 </relocation>
56 |                             </relocations>
57 |                         </configuration>
58 | 
59 |                         <executions>
60 |                             <execution>
61 |                                 <phase>package</phase>
62 |                                 <goals>
63 |                                     <goal>shade</goal>
64 |                                 </goals>
65 |                             </execution>
66 |                         </executions>
67 |                     </plugin>
68 |                 </plugins>
69 |             </build>
70 |         </profile>
71 |     </profiles>
72 | 
73 | </project>


--------------------------------------------------------------------------------
/mlsql-cube/desc.plugin:
--------------------------------------------------------------------------------
 1 | moduleName=mlsql-cube-3.0
 2 | mainClass=tech.mlsql.plugins.shell.app.MLSQLCube
 3 | scala_version=2.12
 4 | spark_version=3.0
 5 | version=0.1.0-SNAPSHOT
 6 | author=allwefantasy
 7 | mlsqlVersions=""
 8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-cube
 9 | mlsqlPluginType=app
10 | desc=mlsql-shell
11 | 
12 | 
13 | 
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/mlsql-cube/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
 3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 5 |     <parent>
 6 |         <artifactId>mlsql-plugins-3.0_2.12</artifactId>
 7 |         <groupId>tech.mlsql</groupId>
 8 |         <version>0.1.0-SNAPSHOT</version>
 9 |     </parent>
10 |     <modelVersion>4.0.0</modelVersion>
11 | 
12 |     <artifactId>mlsql-cube-3.0_2.12</artifactId>
13 |     <dependencies>
14 |        
15 | 
16 |     </dependencies>
17 | 
18 |     <profiles>
19 |         <profile>
20 |             <id>shade</id>
21 |             <build>
22 |                 <plugins>
23 |                     <plugin>
24 |                         <groupId>org.apache.maven.plugins</groupId>
25 |                         <artifactId>maven-shade-plugin</artifactId>
26 |                         <version>3.2.0</version>
27 |                         <configuration>
28 |                             <filters>
29 |                                 <filter>
30 |                                     <artifact>*:*</artifact>
31 |                                     <excludes>
32 |                                         <exclude>META-INF/*.SF</exclude>
33 |                                         <exclude>META-INF/*.DSA</exclude>
34 |                                         <exclude>META-INF/*.RSA</exclude>
35 |                                     </excludes>
36 |                                 </filter>
37 |                             </filters>
38 |                             <createDependencyReducedPom>false</createDependencyReducedPom>
39 |                             <relocations>
40 |                                 <relocation>
41 |                                     <pattern>org.apache.poi</pattern>
42 |                                     <shadedPattern>shadeio.poi</shadedPattern>
43 |                                 </relocation>
44 |                                 <relocation>
45 |                                     <pattern>com.norbitltd.spoiwo</pattern>
46 |                                     <shadedPattern>shadeio.spoiwo</shadedPattern>
47 |                                 </relocation>
48 |                                 <relocation>
49 |                                     <pattern>com.github.pjfanning</pattern>
50 |                                     <shadedPattern>shadeio.pjfanning</shadedPattern>
51 |                                 </relocation>
52 |                                 <relocation>
53 |                                     <pattern>org.apache.commons.compress</pattern>
54 |                                     <shadedPattern>shadeio.commons.compress</shadedPattern>
55 |                                 </relocation>
56 |                             </relocations>
57 |                         </configuration>
58 | 
59 |                         <executions>
60 |                             <execution>
61 |                                 <phase>package</phase>
62 |                                 <goals>
63 |                                     <goal>shade</goal>
64 |                                 </goals>
65 |                             </execution>
66 |                         </executions>
67 |                     </plugin>
68 |                 </plugins>
69 |             </build>
70 |         </profile>
71 |     </profiles>
72 | 
73 | </project>


--------------------------------------------------------------------------------
/mlsql-ds/.repo/desc.template.plugin:
--------------------------------------------------------------------------------
 1 | moduleName=mlsql-ds-{{spark_binary_version}}
 2 | mainClass=tech.mlsql.plugins.ds.app.MLSQLDs
 3 | scala_version={{scala_binary_version}}
 4 | spark_version={{spark_binary_version}}
 5 | version=0.1.0-SNAPSHOT
 6 | author=allwefantasy
 7 | mlsqlVersions=""
 8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-ds
 9 | mlsqlPluginType=app
10 | desc=ds
11 | 
12 | 
13 | 
14 | 
15 | 
16 | 
17 | 


--------------------------------------------------------------------------------
/mlsql-ds/desc.plugin:
--------------------------------------------------------------------------------
 1 | moduleName=mlsql-ds-3.0
 2 | mainClass=tech.mlsql.plugins.ds.app.MLSQLDs
 3 | scala_version=2.12
 4 | spark_version=3.0
 5 | version=0.1.0-SNAPSHOT
 6 | author=allwefantasy
 7 | mlsqlVersions=""
 8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-ds
 9 | mlsqlPluginType=app
10 | desc=ds
11 | 
12 | 
13 | 
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/mlsql-ds/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
 3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 5 |     <parent>
 6 |         <artifactId>mlsql-plugins-3.0_2.12</artifactId>
 7 |         <groupId>tech.mlsql</groupId>
 8 |         <version>0.1.0-SNAPSHOT</version>
 9 |     </parent>
10 |     <modelVersion>4.0.0</modelVersion>
11 | 
12 |     <artifactId>mlsql-ds-3.0_2.12</artifactId>
13 |     <dependencies>
14 |         <dependency>
15 |             <groupId>com.databricks</groupId>
16 |             <artifactId>spark-xml_2.12</artifactId>
17 |             <version>0.13.0</version>
18 |         </dependency>
19 | 
20 |     </dependencies>
21 | 
22 |     <profiles>
23 |         <profile>
24 |             <id>shade</id>
25 |             <build>
26 |                 <plugins>
27 |                     <plugin>
28 |                         <groupId>org.apache.maven.plugins</groupId>
29 |                         <artifactId>maven-shade-plugin</artifactId>
30 |                         <version>3.2.0</version>
31 |                         <configuration>
32 |                             <filters>
33 |                                 <filter>
34 |                                     <artifact>*:*</artifact>
35 |                                     <excludes>
36 |                                         <exclude>META-INF/*.SF</exclude>
37 |                                         <exclude>META-INF/*.DSA</exclude>
38 |                                         <exclude>META-INF/*.RSA</exclude>
39 |                                     </excludes>
40 |                                 </filter>
41 |                             </filters>
42 |                             <createDependencyReducedPom>false</createDependencyReducedPom>
43 |                             <relocations>
44 |                                 <relocation>
45 |                                     <pattern>org.apache.poi</pattern>
46 |                                     <shadedPattern>shadeio.poi</shadedPattern>
47 |                                 </relocation>
48 |                                 <relocation>
49 |                                     <pattern>com.norbitltd.spoiwo</pattern>
50 |                                     <shadedPattern>shadeio.spoiwo</shadedPattern>
51 |                                 </relocation>
52 |                                 <relocation>
53 |                                     <pattern>com.github.pjfanning</pattern>
54 |                                     <shadedPattern>shadeio.pjfanning</shadedPattern>
55 |                                 </relocation>
56 |                                 <relocation>
57 |                                     <pattern>org.apache.commons.compress</pattern>
58 |                                     <shadedPattern>shadeio.commons.compress</shadedPattern>
59 |                                 </relocation>
60 |                             </relocations>
61 |                         </configuration>
62 | 
63 |                         <executions>
64 |                             <execution>
65 |                                 <phase>package</phase>
66 |                                 <goals>
67 |                                     <goal>shade</goal>
68 |                                 </goals>
69 |                             </execution>
70 |                         </executions>
71 |                     </plugin>
72 |                 </plugins>
73 |             </build>
74 |         </profile>
75 |     </profiles>
76 | 
77 | </project>


--------------------------------------------------------------------------------
/mlsql-ds/src/main/java/tech/mlsql/plugins/ds/app/MLSQLDs.scala:
--------------------------------------------------------------------------------
 1 | package tech.mlsql.plugins.ds.app
 2 | 
 3 | import streaming.core.datasource.MLSQLRegistry
 4 | import tech.mlsql.common.utils.classloader.ClassLoaderTool
 5 | import tech.mlsql.common.utils.log.Logging
 6 | import tech.mlsql.version.VersionCompatibility
 7 | 
 8 | /**
 9 |  * 1/6/2021 WilliamZhu(allwefantasy@gmail.com)
10 |  */
11 | class MLSQLDs extends tech.mlsql.app.App with VersionCompatibility with Logging {
12 |   override def run(args: Seq[String]): Unit = {
13 |     registerDS(classOf[MLSQLXml].getName)
14 |   }
15 | 
16 | 
17 |   def registerDS(name: String) = {
18 |     val dataSource = ClassLoaderTool.classForName(name).newInstance()
19 |     if (dataSource.isInstanceOf[MLSQLRegistry]) {
20 |       dataSource.asInstanceOf[MLSQLRegistry].register()
21 |     }
22 |   }
23 | 
24 |   override def supportedVersions: Seq[String] = {
25 |     MLSQLDs.versions
26 |   }
27 | }
28 | 
29 | object MLSQLDs {
30 |   val versions = Seq(">=2.1.0")
31 | }


--------------------------------------------------------------------------------
/mlsql-ds/src/main/java/tech/mlsql/plugins/ds/app/MLSQLXml.scala:
--------------------------------------------------------------------------------
 1 | package tech.mlsql.plugins.ds.app
 2 | 
 3 | import org.apache.spark.sql.SparkSession
 4 | import streaming.core.datasource._
 5 | import streaming.dsl.ScriptSQLExec
 6 | import streaming.dsl.mmlib.algs.param.{BaseParams, WowParams}
 7 | import tech.mlsql.version.VersionCompatibility
 8 | 
 9 | /**
10 |  * 29/9/2021 WilliamZhu(allwefantasy@gmail.com)
11 |  */
12 | class MLSQLXml(override val uid: String)
13 |   extends MLSQLBaseFileSource
14 |     with WowParams with VersionCompatibility {
15 |   def this() = this(BaseParams.randomUID())
16 | 
17 |   override def sourceInfo(config: DataAuthConfig): SourceInfo = {
18 |     val context = ScriptSQLExec.contextGetOrForTest()
19 |     val owner = config.config.get("owner").getOrElse(context.owner)
20 |     SourceInfo(shortFormat, "", resourceRealPath(context.execListener, Option(owner), config.path))
21 |   }
22 | 
23 |   override def explainParams(spark: SparkSession) = {
24 |     _explainParams(spark)
25 |   }
26 | 
27 |   override def register(): Unit = {
28 |     DataSourceRegistry.register(MLSQLDataSourceKey(fullFormat, MLSQLSparkDataSourceType), this)
29 |     DataSourceRegistry.register(MLSQLDataSourceKey(shortFormat, MLSQLSparkDataSourceType), this)
30 |   }
31 | 
32 |   override def fullFormat: String = "com.databricks.spark.xml"
33 | 
34 |   override def shortFormat: String = "xml"
35 | 
36 |   override def supportedVersions: Seq[String] = {
37 |     MLSQLDs.versions
38 |   }
39 | }


--------------------------------------------------------------------------------
/mlsql-excel/.repo/desc.template.plugin:
--------------------------------------------------------------------------------
 1 | moduleName=mlsql-excel-{{spark_binary_version}}
 2 | mainClass=tech.mlsql.plugins.ds.MLSQLExcel
 3 | scala_version={{scala_binary_version}}
 4 | spark_version={{spark_binary_version}}
 5 | version=0.1.0-SNAPSHOT
 6 | author=allwefantasy
 7 | mlsqlVersions="1.5.0,1.5.0-SNAPSHOT,1.6.0,1.6.0-SNAPSHOT"
 8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-excel
 9 | mlsqlPluginType=ds
10 | desc=excel
11 | 
12 | 
13 | 
14 | 
15 | 
16 | 
17 | 


--------------------------------------------------------------------------------
/mlsql-excel/README.md:
--------------------------------------------------------------------------------
 1 | ## Install
 2 | 
 3 | ```
 4 | !plugin ds add - "mlsql-excel-2.4";
 5 | ```
 6 | 
 7 | or install as app:
 8 | 
 9 | ```
10 | !plugin app add "tech.mlsql.plugins.ds.MLSQLApp" "mlsql-excel-2.4";
11 | ```
12 | 
13 | 
14 | ## Usage
15 | 
16 | ```sql
17 | load excel.`/tmp/upload/example_en.xlsx` 
18 | where useHeader="true" and 
19 | maxRowsInMemory="100" 
20 | and dataAddress="A1:C8"
21 | as data;
22 | 
23 | select * from data as output;
24 | ```
25 | 
26 | 
27 | 
28 | 
29 | 
30 | 


--------------------------------------------------------------------------------
/mlsql-excel/desc.plugin:
--------------------------------------------------------------------------------
 1 | moduleName=mlsql-excel-3.0
 2 | mainClass=tech.mlsql.plugins.ds.MLSQLExcel
 3 | scala_version=2.12
 4 | spark_version=3.0
 5 | version=0.1.0-SNAPSHOT
 6 | author=allwefantasy
 7 | mlsqlVersions="1.5.0,1.5.0-SNAPSHOT,1.6.0,1.6.0-SNAPSHOT"
 8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-excel
 9 | mlsqlPluginType=ds
10 | desc=excel
11 | 
12 | 
13 | 
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/mlsql-excel/src/main/java/com/crealytics/spark/excel/DefaultSource.scala:
--------------------------------------------------------------------------------
 1 | package com.crealytics.spark.excel
 2 | 
 3 | import org.apache.hadoop.fs.Path
 4 | import org.apache.spark.sql.sources._
 5 | import org.apache.spark.sql.types.StructType
 6 | import org.apache.spark.sql.{DataFrame, SQLContext, SaveMode}
 7 | 
 8 | class DefaultSource extends RelationProvider with SchemaRelationProvider with CreatableRelationProvider {
 9 | 
10 |   /** Creates a new relation for retrieving data from an Excel file
11 |     */
12 |   override def createRelation(sqlContext: SQLContext, parameters: Map[String, String]): ExcelRelation =
13 |     createRelation(sqlContext, parameters, null)
14 | 
15 |   /** Creates a new relation for retrieving data from an Excel file
16 |     */
17 |   override def createRelation(
18 |     sqlContext: SQLContext,
19 |     parameters: Map[String, String],
20 |     schema: StructType
21 |   ): ExcelRelation = {
22 |     val wbReader = WorkbookReader(parameters, sqlContext.sparkContext.hadoopConfiguration)
23 |     val dataLocator = DataLocator(parameters)
24 |     ExcelRelation(
25 |       header = checkParameter(parameters, "header").toBoolean,
26 |       treatEmptyValuesAsNulls = parameters.get("treatEmptyValuesAsNulls").fold(false)(_.toBoolean),
27 |       usePlainNumberFormat = parameters.get("usePlainNumberFormat").fold(false)(_.toBoolean),
28 |       userSchema = Option(schema),
29 |       inferSheetSchema = parameters.get("inferSchema").fold(false)(_.toBoolean),
30 |       addColorColumns = parameters.get("addColorColumns").fold(false)(_.toBoolean),
31 |       timestampFormat = parameters.get("timestampFormat"),
32 |       excerptSize = parameters.get("excerptSize").fold(10)(_.toInt),
33 |       dataLocator = dataLocator,
34 |       workbookReader = wbReader
35 |     )(sqlContext)
36 |   }
37 | 
38 |   override def createRelation(
39 |     sqlContext: SQLContext,
40 |     mode: SaveMode,
41 |     parameters: Map[String, String],
42 |     data: DataFrame
43 |   ): BaseRelation = {
44 |     val path = checkParameter(parameters, "path")
45 |     val header = checkParameter(parameters, "header").toBoolean
46 |     val filesystemPath = new Path(path)
47 |     val fs = filesystemPath.getFileSystem(sqlContext.sparkContext.hadoopConfiguration)
48 |     new ExcelFileSaver(
49 |       fs,
50 |       filesystemPath,
51 |       data,
52 |       saveMode = mode,
53 |       header = header,
54 |       dataLocator = DataLocator(parameters)
55 |     ).save()
56 | 
57 |     createRelation(sqlContext, parameters, data.schema)
58 |   }
59 | 
60 |   // Forces a Parameter to exist, otherwise an exception is thrown.
61 |   private def checkParameter(map: Map[String, String], param: String): String = {
62 |     if (!map.contains(param)) {
63 |       throw new IllegalArgumentException(s"Parameter ${'"'}$param${'"'} is missing in options.")
64 |     } else {
65 |       map.apply(param)
66 |     }
67 |   }
68 | }
69 | 


--------------------------------------------------------------------------------
/mlsql-excel/src/main/java/com/crealytics/spark/excel/DefaultSource15.scala:
--------------------------------------------------------------------------------
1 | package com.crealytics.spark.excel
2 | 
3 | import org.apache.spark.sql.sources.DataSourceRegister
4 | 
5 | class DefaultSource15 extends DefaultSource with DataSourceRegister {
6 |   override def shortName(): String = "excel"
7 | }
8 | 


--------------------------------------------------------------------------------
/mlsql-excel/src/main/java/com/crealytics/spark/excel/ExcelFileSaver.scala:
--------------------------------------------------------------------------------
 1 | package com.crealytics.spark.excel
 2 | 
 3 | import com.norbitltd.spoiwo.model._
 4 | import com.norbitltd.spoiwo.natures.streaming.xlsx.Model2XlsxConversions._
 5 | import org.apache.hadoop.fs.{FSDataInputStream, FileSystem, Path}
 6 | import org.apache.poi.xssf.usermodel.XSSFWorkbook
 7 | import org.apache.spark.sql.{DataFrame, SaveMode}
 8 | import java.io.BufferedOutputStream
 9 | 
10 | import org.apache.poi.xssf.streaming.SXSSFWorkbook
11 | 
12 | import scala.collection.JavaConverters._
13 | 
14 | object ExcelFileSaver {
15 |   final val DEFAULT_SHEET_NAME = "Sheet1"
16 |   final val DEFAULT_DATE_FORMAT = "yy-m-d h:mm"
17 |   final val DEFAULT_TIMESTAMP_FORMAT = "yyyy-mm-dd hh:mm:ss.000"
18 | }
19 | 
20 | class ExcelFileSaver(
21 |   fs: FileSystem,
22 |   location: Path,
23 |   dataFrame: DataFrame,
24 |   saveMode: SaveMode,
25 |   dataLocator: DataLocator,
26 |   header: Boolean = true
27 | ) {
28 |   def save(): Unit = {
29 |     def sheet(workbook: SXSSFWorkbook) = {
30 |       val headerRow = if (header) Some(dataFrame.schema.fields.map(_.name).toSeq) else None
31 |       val dataRows = dataFrame
32 |         .toLocalIterator()
33 |         .asScala
34 |         .map(_.toSeq)
35 |       dataLocator.toSheet(headerRow, dataRows, workbook)
36 |     }
37 |     val fileAlreadyExists = fs.exists(location)
38 |     def writeToWorkbook(workbook: SXSSFWorkbook): Unit = {
39 |       Workbook(sheet(workbook)).writeToExisting(workbook)
40 |       autoClose(new BufferedOutputStream(fs.create(location)))(workbook.write)
41 |     }
42 |     (fileAlreadyExists, saveMode) match {
43 |       case (false, _) | (_, SaveMode.Overwrite) =>
44 |         if (fileAlreadyExists) {
45 |           fs.delete(location, true)
46 |         }
47 |         writeToWorkbook(new SXSSFWorkbook())
48 |       case (true, SaveMode.ErrorIfExists) =>
49 |         sys.error(s"path $location already exists.")
50 |       case (true, SaveMode.Ignore) => ()
51 |       case (true, SaveMode.Append) =>
52 |         val inputStream: FSDataInputStream = fs.open(location)
53 |         val workbook = new SXSSFWorkbook(new XSSFWorkbook(inputStream))
54 |         inputStream.close()
55 |         writeToWorkbook(workbook)
56 |     }
57 |   }
58 | 
59 |   def autoClose[A <: AutoCloseable, B](closeable: A)(fun: (A) => B): B = {
60 |     try {
61 |       fun(closeable)
62 |     } finally {
63 |       closeable.close()
64 |     }
65 |   }
66 | }
67 | 


--------------------------------------------------------------------------------
/mlsql-excel/src/main/java/com/crealytics/spark/excel/PlainNumberFormat.scala:
--------------------------------------------------------------------------------
 1 | package com.crealytics.spark.excel
 2 | 
 3 | import java.math.BigDecimal
 4 | import java.text.FieldPosition
 5 | import java.text.Format
 6 | import java.text.ParsePosition
 7 | 
 8 | /** A format that formats a double as a plain string without rounding and scientific notation.
 9 |   * All other operations are unsupported.
10 |   * @see [[org.apache.poi.ss.usermodel.ExcelGeneralNumberFormat]] and SSNFormat from
11 |   * [[org.apache.poi.ss.usermodel.DataFormatter]] from Apache POI.
12 |   */
13 | object PlainNumberFormat extends Format {
14 | 
15 |   override def format(number: AnyRef, toAppendTo: StringBuffer, pos: FieldPosition): StringBuffer =
16 |     toAppendTo.append(new BigDecimal(number.toString).toPlainString)
17 | 
18 |   override def parseObject(source: String, pos: ParsePosition): AnyRef =
19 |     throw new UnsupportedOperationException()
20 | }
21 | 


--------------------------------------------------------------------------------
/mlsql-excel/src/main/java/com/crealytics/spark/excel/Utils.scala:
--------------------------------------------------------------------------------
 1 | package com.crealytics.spark.excel
 2 | import scala.util.{Success, Try}
 3 | 
 4 | object Utils {
 5 |   implicit class RichTry[T](t: Try[T]) {
 6 |     def toEither: Either[Throwable, T] = t.transform(s => Success(Right(s)), f => Success(Left(f))).get
 7 |   }
 8 | 
 9 |   case class MapIncluding[K](keys: Seq[K], optionally: Seq[K] = Seq()) {
10 |     def unapply[V](m: Map[K, V]): Option[(Seq[V], Seq[Option[V]])] =
11 |       if (keys.forall(m.contains)) {
12 |         Some((keys.map(m), optionally.map(m.get)))
13 |       } else {
14 |         None
15 |       }
16 |   }
17 |   sealed trait MapRequirements[K] {
18 |     type ResultType[V]
19 |     def unapplySeq[V](m: Map[K, V]): Option[ResultType[V]]
20 |   }
21 |   case class RequiredKeys[K](keys: K*) extends MapRequirements[K] {
22 |     type ResultType[V] = Seq[V]
23 |     def unapplySeq[V](m: Map[K, V]): Option[Seq[V]] =
24 |       if (keys.forall(m.contains)) {
25 |         Some(keys.map(m))
26 |       } else {
27 |         None
28 |       }
29 |   }
30 |   case class OptionalKeys[K](keys: K*) extends MapRequirements[K] {
31 |     type ResultType[V] = Seq[Option[V]]
32 |     def unapplySeq[V](m: Map[K, V]): Option[Seq[Option[V]]] = Some(keys.map(m.get))
33 |   }
34 |   case class MapWith[K](
35 |     requiredKeys: RequiredKeys[K] = RequiredKeys[K](),
36 |     optionalKeys: OptionalKeys[K] = OptionalKeys[K]()
37 |   ) {
38 |     def unapply[V](m: Map[K, V]): Option[(requiredKeys.ResultType[V], optionalKeys.ResultType[V])] =
39 |       for {
40 |         req <- requiredKeys.unapplySeq(m)
41 |         opt <- optionalKeys.unapplySeq(m)
42 |       } yield (req, opt)
43 |   }
44 | }
45 | 


--------------------------------------------------------------------------------
/mlsql-excel/src/main/java/com/crealytics/spark/excel/WorkbookReader.scala:
--------------------------------------------------------------------------------
 1 | package com.crealytics.spark.excel
 2 | 
 3 | import java.io.InputStream
 4 | 
 5 | import com.crealytics.spark.excel.Utils.MapIncluding
 6 | import com.github.pjfanning.xlsx.StreamingReader
 7 | import org.apache.hadoop.conf.Configuration
 8 | import org.apache.hadoop.fs.{FileSystem, Path}
 9 | import org.apache.poi.ss.usermodel.{Workbook, WorkbookFactory}
10 | 
11 | trait WorkbookReader {
12 |   protected def openWorkbook(): Workbook
13 |   def withWorkbook[T](f: Workbook => T): T = {
14 |     val workbook = openWorkbook()
15 |     val res = f(workbook)
16 |     workbook.close()
17 |     res
18 |   }
19 |   def sheetNames: Seq[String] = {
20 |     withWorkbook(workbook =>
21 |       for (sheetIx <- (0 until workbook.getNumberOfSheets())) yield {
22 |         workbook.getSheetAt(sheetIx).getSheetName()
23 |       }
24 |     )
25 |   }
26 | }
27 | 
28 | object WorkbookReader {
29 |   val WithLocationMaxRowsInMemoryAndPassword =
30 |     MapIncluding(Seq("path"), optionally = Seq("maxRowsInMemory", "workbookPassword"))
31 | 
32 |   def apply(parameters: Map[String, String], hadoopConfiguration: Configuration): WorkbookReader = {
33 |     def readFromHadoop(location: String) = {
34 |       val path = new Path(location)
35 |       FileSystem.get(path.toUri, hadoopConfiguration).open(path)
36 |     }
37 |     parameters match {
38 |       case WithLocationMaxRowsInMemoryAndPassword(Seq(location), Seq(Some(maxRowsInMemory), passwordOption)) =>
39 |         new StreamingWorkbookReader(readFromHadoop(location), passwordOption, maxRowsInMemory.toInt)
40 |       case WithLocationMaxRowsInMemoryAndPassword(Seq(location), Seq(None, passwordOption)) =>
41 |         new DefaultWorkbookReader(readFromHadoop(location), passwordOption)
42 |     }
43 |   }
44 | }
45 | class DefaultWorkbookReader(inputStreamProvider: => InputStream, workbookPassword: Option[String])
46 |     extends WorkbookReader {
47 |   protected def openWorkbook(): Workbook =
48 |     workbookPassword
49 |       .fold(WorkbookFactory.create(inputStreamProvider))(password =>
50 |         WorkbookFactory.create(inputStreamProvider, password)
51 |       )
52 | }
53 | 
54 | class StreamingWorkbookReader(inputStreamProvider: => InputStream, workbookPassword: Option[String], maxRowsInMem: Int)
55 |     extends WorkbookReader {
56 |   override protected def openWorkbook(): Workbook = {
57 |     val builder = StreamingReader
58 |       .builder()
59 |       .rowCacheSize(maxRowsInMem)
60 |       .bufferSize(4096)
61 |     workbookPassword
62 |       .fold(builder)(password => builder.password(password))
63 |       .open(inputStreamProvider)
64 |   }
65 | }
66 | 


--------------------------------------------------------------------------------
/mlsql-ext-ets/.repo/desc.template.plugin:
--------------------------------------------------------------------------------
 1 | moduleName=mlsql-ext-ets-{{spark_binary_version}}
 2 | mainClass=tech.mlsql.plugins.ext.ets.app.MLSQLETApp
 3 | scala_version={{scala_binary_version}}
 4 | spark_version={{spark_binary_version}}
 5 | version=0.1.0-SNAPSHOT
 6 | author=allwefantasy
 7 | mlsqlVersions=""
 8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-ext-ets
 9 | mlsqlPluginType=app
10 | desc=mlsql-ext-ets
11 | 
12 | 
13 | 
14 | 
15 | 
16 | 
17 | 


--------------------------------------------------------------------------------
/mlsql-ext-ets/desc.plugin:
--------------------------------------------------------------------------------
 1 | moduleName=mlsql-ext-ets-3.0
 2 | mainClass=tech.mlsql.plugins.ext.ets.app.MLSQLETApp
 3 | scala_version=2.12
 4 | spark_version=3.0
 5 | version=0.1.0-SNAPSHOT
 6 | author=allwefantasy
 7 | mlsqlVersions=""
 8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-ext-ets
 9 | mlsqlPluginType=app
10 | desc=mlsql-ext-ets
11 | 
12 | 
13 | 
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/mlsql-ext-ets/src/main/java/tech/mlsql/plugins/ext/ets/app/MLSQLETApp.scala:
--------------------------------------------------------------------------------
 1 | package tech.mlsql.plugins.ext.ets.app
 2 | 
 3 | import tech.mlsql.common.utils.log.Logging
 4 | import tech.mlsql.version.VersionCompatibility
 5 | 
 6 | /**
 7 |  * 31/5/2021 WilliamZhu(allwefantasy@gmail.com)
 8 |  */
 9 | class MLSQLETApp extends tech.mlsql.app.App with VersionCompatibility with Logging {
10 |   override def run(args: Seq[String]): Unit = {
11 | 
12 |   }
13 | 
14 | 
15 |   override def supportedVersions: Seq[String] = {
16 |     MLSQLETApp.versions
17 |   }
18 | }
19 | 
20 | object MLSQLETApp {
21 |   val versions = Seq("2.1.0", "2.1.0-SNAPSHOT", "2.0.0", "2.0.1")
22 | }


--------------------------------------------------------------------------------
/mlsql-ke/.repo/desc.template.plugin:
--------------------------------------------------------------------------------
 1 | moduleName=mlsql-ke-{{spark_binary_version}}
 2 | mainClass=tech.mlsql.plugins.ke.app.MLSQLKE
 3 | scala_version={{scala_binary_version}}
 4 | spark_version={{spark_binary_version}}
 5 | version=0.1.0-SNAPSHOT
 6 | author=allwefantasy
 7 | mlsqlVersions=""
 8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-ke
 9 | mlsqlPluginType=app
10 | desc=mlsql-ke
11 | 
12 | 
13 | 
14 | 
15 | 
16 | 
17 | 


--------------------------------------------------------------------------------
/mlsql-ke/desc.plugin:
--------------------------------------------------------------------------------
 1 | moduleName=mlsql-ke-3.0
 2 | mainClass=tech.mlsql.plugins.ke.app.MLSQLKE
 3 | scala_version=2.12
 4 | spark_version=3.0
 5 | version=0.1.0-SNAPSHOT
 6 | author=allwefantasy
 7 | mlsqlVersions=""
 8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-ke
 9 | mlsqlPluginType=app
10 | desc=mlsql-ke
11 | 
12 | 
13 | 
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/mlsql-ke/src/main/java/tech/mlsql/plugins/ke/app/MLSQLKE.scala:
--------------------------------------------------------------------------------
 1 | package tech.mlsql.plugins.ke.app
 2 | 
 3 | import tech.mlsql.common.utils.log.Logging
 4 | import tech.mlsql.ets.register.ETRegister
 5 | import tech.mlsql.plugins.ke.ets.{KEAPISchedule, KEAutoModel, KEBuildSegment}
 6 | import tech.mlsql.version.VersionCompatibility
 7 | 
 8 | /**
 9 |  * 2/6/2021 WilliamZhu(allwefantasy@gmail.com)
10 |  */
11 | class MLSQLKE extends tech.mlsql.app.App with VersionCompatibility with Logging {
12 |   override def run(args: Seq[String]): Unit = {
13 |     ETRegister.register("KeApi", classOf[KEAPISchedule].getName)
14 |     ETRegister.register("KeAutoModel", classOf[KEAutoModel].getName)
15 |     ETRegister.register("KeBuildSegment", classOf[KEBuildSegment].getName)
16 |   }
17 | 
18 | 
19 |   override def supportedVersions: Seq[String] = {
20 |     MLSQLKE.versions
21 |   }
22 | }
23 | 
24 | object MLSQLKE {
25 |   val versions = Seq("2.1.0", "2.1.0-SNAPSHOT", "2.0.0", "2.0.1")
26 | }
27 | 


--------------------------------------------------------------------------------
/mlsql-ke/src/main/java/tech/mlsql/plugins/ke/ets/KEBuildSegment.scala:
--------------------------------------------------------------------------------
 1 | package tech.mlsql.plugins.ke.ets
 2 | 
 3 | import com.alibaba.fastjson.{JSON, JSONObject}
 4 | import org.apache.spark.ml.util.Identifiable
 5 | import org.apache.spark.sql.DataFrame
 6 | import streaming.dsl.{ConnectMeta, DBMappingKey}
 7 | import streaming.dsl.mmlib.algs.param.WowParams
 8 | import tech.mlsql.common.utils.log.Logging
 9 | 
10 | class KEBuildSegment(override val uid: String) extends KEAPISchedule with WowParams with Logging {
11 | 
12 |   def this() = this(Identifiable.randomUID("tech.mlsql.plugins.ets.BuildSegment"))
13 | 
14 |   override def train(df: DataFrame, path: String, params: Map[String, String]): DataFrame = {
15 |     val jsonObj = new JSONObject
16 |     val split = path.split("\\.")
17 |     val connectName = split(0)
18 |     jsonObj.put("project", split(1))
19 |     if (params.contains("start")) {
20 |       jsonObj.put("start", params("start"))
21 |     }
22 |     if (params.contains("end")) {
23 |       jsonObj.put("end", params("end"))
24 |     }
25 |     if (params.contains("sub_partition_values")) {
26 |       jsonObj.put("sub_partition_values", JSON.parseArray(params("sub_partition_values")))
27 |     }
28 |     if (params.contains("build_all_indexes")) {
29 |       jsonObj.put("build_all_indexes", params("build_all_indexes").toBoolean)
30 |     }
31 |     if (params.contains("build_all_sub_partitions")) {
32 |       jsonObj.put("build_all_sub_partitions", params("build_all_sub_partitions").toBoolean)
33 |     }
34 |     if (params.contains("priority")) {
35 |       jsonObj.put("priority", params("priority").toInt)
36 |     }
37 |     var url = new String
38 |     ConnectMeta.presentThenCall(DBMappingKey("ke", connectName), options => {
39 |       url = "http://" + options("host") + ":" + options("port") + "/kylin/api/models/" + params("model") + "/segments"
40 |     })
41 |     sendPostAPI(df, params, jsonObj, url, connectName)
42 |   }
43 | }
44 | 
45 | 


--------------------------------------------------------------------------------
/mlsql-language-server/.repo/desc.template.plugin:
--------------------------------------------------------------------------------
 1 | moduleName=mlsql-language-server-{{spark_binary_version}}
 2 | mainClass=tech.mlsql.plugins.ke.app.MLSQLLanguageServer
 3 | scala_version={{scala_binary_version}}
 4 | spark_version={{spark_binary_version}}
 5 | version=0.1.0-SNAPSHOT
 6 | author=allwefantasy
 7 | mlsqlVersions=""
 8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-language-server
 9 | mlsqlPluginType=app
10 | desc=mlsql-language-server
11 | 
12 | 
13 | 
14 | 
15 | 
16 | 
17 | 


--------------------------------------------------------------------------------
/mlsql-language-server/build.sh:
--------------------------------------------------------------------------------
1 | SOURCE=/Users/allwefantasy/Volumes/Samsung_T5/allwefantasy/CSDNWorkSpace/mlsqlplugins/mlsql-language-server/build/
2 | TARGET=/Users/allwefantasy/projects/mlsql/src/mlsql-lang/mlsql-app_2.4-2.1.0-SNAPSHOT/plugin
3 | #conda activate mlsql-plugin-tool
4 | mlsql_plugin_tool build --module_name mlsql-language-server --spark spark243
5 | scp ${SOURCE}/mlsql-language-server-2.4_2.11-0.1.0-SNAPSHOT.jar ${TARGET}/
6 | 


--------------------------------------------------------------------------------
/mlsql-language-server/desc.plugin:
--------------------------------------------------------------------------------
 1 | moduleName=mlsql-language-server-3.0
 2 | mainClass=tech.mlsql.plugins.ke.app.MLSQLLanguageServer
 3 | scala_version=2.12
 4 | spark_version=3.0
 5 | version=0.1.0-SNAPSHOT
 6 | author=allwefantasy
 7 | mlsqlVersions=""
 8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-language-server
 9 | mlsqlPluginType=app
10 | desc=mlsql-language-server
11 | 
12 | 
13 | 
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/mlsql-language-server/src/main/java/tech/mlsql/plugins/langserver/AutoSuggestWrapper.scala:
--------------------------------------------------------------------------------
 1 | package tech.mlsql.plugins.langserver
 2 | 
 3 | import net.csdn.common.exception.RenderFinish
 4 | import net.csdn.common.jline.ANSI.Renderer.RenderException
 5 | import net.csdn.modules.http.DefaultRestRequest
 6 | import net.csdn.modules.mock.MockRestResponse
 7 | import streaming.rest.RestController
 8 | import tech.mlsql.autosuggest.statement.SuggestItem
 9 | import tech.mlsql.common.utils.log.Logging
10 | import tech.mlsql.common.utils.serder.json.JSONTool
11 | 
12 | import scala.collection.JavaConverters._
13 | 
14 | /**
15 |  * 1/9/2021 WilliamZhu(allwefantasy@gmail.com)
16 |  */
17 | class AutoSuggestWrapper(params: java.util.Map[String, String]) extends Logging {
18 |   def run() = {
19 |     try {
20 |       params.put("executeMode", "autoSuggest")
21 |       logInfo(JSONTool.toJsonStr(params.asScala.toMap))
22 | 
23 |       val restRequest = new DefaultRestRequest("POST", params)
24 |       val restReponse = new MockRestResponse()
25 |       val controller = new RestController()
26 |       net.csdn.modules.http.RestController.enhanceApplicationController(controller, restRequest, restReponse)
27 |       try {
28 |         controller.script
29 |       } catch {
30 |         case _: RenderFinish =>
31 |       }
32 |       val jsonStr = restReponse.content()
33 |       JSONTool.parseJson[List[SuggestItem]](jsonStr).asJava
34 |     } catch {
35 |       case e: Exception =>
36 |         logInfo("Suggest fail", e)
37 |         List[SuggestItem]().asJava
38 |     }
39 | 
40 | 
41 |   }
42 | }
43 | 


--------------------------------------------------------------------------------
/mlsql-language-server/src/main/java/tech/mlsql/plugins/langserver/LSContext.java:
--------------------------------------------------------------------------------
 1 | package tech.mlsql.plugins.langserver;
 2 | 
 3 | import net.sf.json.JSONObject;
 4 | 
 5 | import java.util.HashMap;
 6 | import java.util.Map;
 7 | 
 8 | /**
 9 |  * 2/9/2021 WilliamZhu(allwefantasy@gmail.com)
10 |  */
11 | public class LSContext {
12 |     final public static Map<String, String> initParams = new HashMap<>();
13 | 
14 |     public static void parse(String jsonStr) {
15 |         JSONObject obj = JSONObject.fromObject(jsonStr);
16 |         for (Object key : obj.keySet()) {
17 |             initParams.put(key.toString(), obj.getString(key.toString()));
18 |         }
19 |     }
20 | }
21 | 


--------------------------------------------------------------------------------
/mlsql-language-server/src/main/java/tech/mlsql/plugins/langserver/MLSQLWorkspaceService.java:
--------------------------------------------------------------------------------
 1 | package tech.mlsql.plugins.langserver;
 2 | 
 3 | import org.eclipse.lsp4j.DidChangeConfigurationParams;
 4 | import org.eclipse.lsp4j.DidChangeWatchedFilesParams;
 5 | import org.eclipse.lsp4j.services.WorkspaceService;
 6 | 
 7 | /**
 8 |  * 25/8/2021 WilliamZhu(allwefantasy@gmail.com)
 9 |  */
10 | public class MLSQLWorkspaceService implements WorkspaceService {
11 |     @Override
12 |     public void didChangeConfiguration(DidChangeConfigurationParams params) {
13 | 
14 |     }
15 | 
16 |     @Override
17 |     public void didChangeWatchedFiles(DidChangeWatchedFilesParams params) {
18 | 
19 |     }
20 | }
21 | 


--------------------------------------------------------------------------------
/mlsql-language-server/src/main/java/tech/mlsql/plugins/langserver/commons/client/Message.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2018, WSO2 Inc. (http://wso2.com) All Rights Reserved.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  * http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | package tech.mlsql.plugins.langserver.commons.client;
17 | /**
18 |  * {@link Message} Parsed log message sent to client.
19 |  *
20 |  */
21 | public class Message {
22 |     private String id;
23 |     private String direction;
24 |     private String headers;
25 |     private String httpMethod;
26 |     private String path;
27 |     private String contentType;
28 |     private String payload;
29 |     private String headerType;
30 | 
31 |     public Message(String id, String direction, String headers, String httpMethod, String path, String contentType,
32 |                    String payload, String headerType) {
33 |         this.id = id;
34 |         this.direction = direction;
35 |         this.headers = headers;
36 |         this.httpMethod = httpMethod;
37 |         this.path = path;
38 |         this.contentType = contentType;
39 |         this.payload = payload;
40 |         this.headerType = headerType;
41 |     }
42 | 
43 |     public void setId(String id) {
44 |         this.id = id;
45 |     }
46 | 
47 |     public String getId() {
48 |         return id;
49 |     }
50 | 
51 |     public String getDirection() {
52 |         return direction;
53 |     }
54 | 
55 |     public String getHeaders() {
56 |         return headers;
57 |     }
58 | 
59 |     public String getHttpMethod() {
60 |         return httpMethod;
61 |     }
62 | 
63 |     public String getPath() {
64 |         return path;
65 |     }
66 | 
67 |     public String getContentType() {
68 |         return contentType;
69 |     }
70 | 
71 |     public String getPayload() {
72 |         return payload;
73 |     }
74 | 
75 |     public String getHeaderType() {
76 |         return headerType;
77 |     }
78 | }
79 | 
80 | 


--------------------------------------------------------------------------------
/mlsql-language-server/src/main/java/tech/mlsql/plugins/langserver/commons/client/TraceRecord.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2018, WSO2 Inc. (http://wso2.com) All Rights Reserved.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  * http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | package tech.mlsql.plugins.langserver.commons.client;
17 | 
18 | import com.google.gson.JsonObject;
19 | 
20 | import java.util.UUID;
21 | 
22 | /**
23 |  * Model class for trace log.
24 |  */
25 | public class TraceRecord {
26 |     private Message message;
27 |     private String rawMessage;
28 |     private String id;
29 |     private String millis;
30 |     private String sequence;
31 |     private String logger;
32 |     private String sourceClass;
33 |     private String sourceMethod;
34 |     private String thread;
35 | 
36 |     public TraceRecord(Message message, JsonObject record, String rawMessage) {
37 |         this.message = message;
38 |         this.rawMessage = rawMessage;
39 |         this.id = UUID.randomUUID().toString();
40 |         this.millis = record.get("millis").getAsString();
41 |         this.sequence = record.get("sequenceNumber").getAsString();
42 |         this.logger = record.get("loggerName").getAsString();
43 |         this.sourceClass = record.get("sourceClassName").getAsString();
44 |         this.sourceMethod = record.get("sourceMethodName").getAsString();
45 |         this.thread = record.get("threadID").getAsString();
46 |     }
47 | 
48 |     public String getRawMessage() {
49 |         return rawMessage;
50 |     }
51 | 
52 |     public String getLogger() {
53 |         return logger;
54 |     }
55 | 
56 |     public String getSequence() {
57 |         return sequence;
58 |     }
59 | 
60 |     public String getThread() {
61 |         return thread;
62 |     }
63 | 
64 |     public String getMillis() {
65 |         return millis;
66 |     }
67 | 
68 |     public String getSourceMethod() {
69 |         return sourceMethod;
70 |     }
71 | 
72 |     public String getSourceClass() {
73 |         return sourceClass;
74 |     }
75 | 
76 |     public Message getMessage() {
77 |         return message;
78 |     }
79 | 
80 |     public String getId() {
81 |         return id;
82 |     }
83 | }
84 | 


--------------------------------------------------------------------------------
/mlsql-language-server/src/main/java/tech/mlsql/plugins/langserver/launchers/stdio/Launcher.java:
--------------------------------------------------------------------------------
 1 | package tech.mlsql.plugins.langserver.launchers.stdio;
 2 | 
 3 | import org.eclipse.lsp4j.services.LanguageClient;
 4 | import tech.mlsql.plugins.langserver.MLSQLLanguageServer;
 5 | 
 6 | import java.io.IOException;
 7 | import java.io.InputStream;
 8 | import java.io.PrintWriter;
 9 | import java.util.concurrent.ExecutionException;
10 | 
11 | 
12 | /**
13 |  * 25/8/2021 WilliamZhu(allwefantasy@gmail.com)
14 |  */
15 | public class Launcher {
16 |     public static void main(String[] args) throws InterruptedException, ExecutionException {
17 |         
18 |         MLSQLLanguageServer server = new MLSQLLanguageServer();
19 | 
20 |         boolean lspInspectorTrace = false;
21 | 
22 | 
23 |         org.eclipse.lsp4j.jsonrpc.Launcher<LanguageClient> launcher = null;
24 | 
25 |         if (lspInspectorTrace) {
26 |             launcher = org.eclipse.lsp4j.jsonrpc.Launcher.createLauncher(server, LanguageClient.class, exitOnClose(System.in), System.out,
27 |                     true, new PrintWriter(System.err));
28 |         } else {
29 |             launcher = org.eclipse.lsp4j.jsonrpc.Launcher.createLauncher(server, LanguageClient.class, System.in, System.out);
30 |         }
31 | 
32 | 
33 |         LanguageClient client = launcher.getRemoteProxy();
34 |         server.connect(client);
35 |         launcher.startListening().get();
36 | 
37 |     }
38 | 
39 |     private static InputStream exitOnClose(InputStream delegate) {
40 |         return new InputStream() {
41 |             @Override
42 |             public int read() throws IOException {
43 |                 return exitIfNegative(delegate.read());
44 |             }
45 | 
46 |             int exitIfNegative(int result) {
47 |                 if (result < 0) {
48 |                     System.err.println("Input stream has closed. Exiting...");
49 |                     System.exit(0);
50 |                 }
51 |                 return result;
52 |             }
53 |         };
54 |     }
55 | }
56 | 


--------------------------------------------------------------------------------
/mlsql-language-server/src/main/java/tech/mlsql/plugins/langserver/launchers/stdio/MLSQLDesktopApp.scala:
--------------------------------------------------------------------------------
 1 | package tech.mlsql.plugins.langserver.launchers.stdio
 2 | 
 3 | import streaming.core.StreamingApp
 4 | import tech.mlsql.common.utils.path.PathFun
 5 | 
 6 | import scala.collection.mutable.ArrayBuffer
 7 | 
 8 | /**
 9 |  * 26/8/2021 WilliamZhu(allwefantasy@gmail.com)
10 |  */
11 | object MLSQLDesktopApp {
12 |   def main(args: Array[String]): Unit = {
13 |     val defaultMap = arrayToMap(Array(
14 |       "-streaming.master", "local[*]",
15 |       "-streaming.name", "MLSQL-desktop",
16 |       "-streaming.rest", "true",
17 |       "-streaming.thrift", "false",
18 |       "-streaming.platform", "spark",
19 |       "-streaming.spark.service", "true",
20 |       "-streaming.job.cancel", "true",
21 |       "-streaming.datalake.path", PathFun.joinPath(".","data"),
22 |       "-streaming.driver.port", "9003",
23 |       "-streaming.plugin.clzznames", "tech.mlsql.plugins.ds.MLSQLExcelApp,tech.mlsql.plugins.shell.app.MLSQLShell,tech.mlsql.plugins.assert.app.MLSQLAssert"
24 |     ))
25 |     val extraMap = arrayToMap(args)
26 |     StreamingApp.main( mapToArray(defaultMap ++ extraMap))
27 |   }
28 | 
29 |   def arrayToMap(args: Array[String]): Map[String, String] = {
30 |     val res = scala.collection.mutable.HashMap[String, String]()
31 |     var i = 0;
32 |     while (i < args.length) {
33 |       res += (args(i) -> args(i + 1))
34 |       i += 2
35 |     }
36 |     res.toMap
37 |   }
38 | 
39 |   def mapToArray(args:Map[String,String]):Array[String] = {
40 |     args.flatMap{item=>
41 |       val (key,value) = item
42 |       Array(key,value)
43 |     } .toArray
44 |   }
45 | }
46 | 
47 | class MLSQLDesktopApp
48 | 


--------------------------------------------------------------------------------
/mlsql-mllib/.repo/desc.template.plugin:
--------------------------------------------------------------------------------
 1 | moduleName=mlsql-mllib-{{spark_binary_version}}
 2 | mainClass=tech.mlsql.plugins.mllib.app.MLSQLMllib
 3 | scala_version={{scala_binary_version}}
 4 | spark_version={{spark_binary_version}}
 5 | version=0.1.0-SNAPSHOT
 6 | author=allwefantasy
 7 | mlsqlVersions=""
 8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-mllib
 9 | mlsqlPluginType=app
10 | desc=mllib
11 | 
12 | 
13 | 
14 | 
15 | 
16 | 
17 | 


--------------------------------------------------------------------------------
/mlsql-mllib/.repo/pom.template.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
 3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 5 |     <parent>
 6 |         <artifactId>mlsql-plugins-{{spark_binary_version}}_{{scala_binary_version}}</artifactId>
 7 |         <groupId>tech.mlsql</groupId>
 8 |         <version>0.1.0-SNAPSHOT</version>
 9 |     </parent>
10 |     <modelVersion>4.0.0</modelVersion>
11 | 
12 |     <artifactId>mlsql-mllib-{{spark_binary_version}}_{{scala_binary_version}}</artifactId>
13 |     <dependencies>
14 |        
15 | 
16 |     </dependencies>
17 | 
18 |     <profiles>
19 |         <profile>
20 |             <id>shade</id>
21 |             <build>
22 |                 <plugins>
23 |                     <plugin>
24 |                         <groupId>org.apache.maven.plugins</groupId>
25 |                         <artifactId>maven-shade-plugin</artifactId>
26 |                         <version>3.2.0</version>
27 |                         <configuration>
28 |                             <filters>
29 |                                 <filter>
30 |                                     <artifact>*:*</artifact>
31 |                                     <excludes>
32 |                                         <exclude>META-INF/*.SF</exclude>
33 |                                         <exclude>META-INF/*.DSA</exclude>
34 |                                         <exclude>META-INF/*.RSA</exclude>
35 |                                     </excludes>
36 |                                 </filter>
37 |                             </filters>
38 |                             <createDependencyReducedPom>false</createDependencyReducedPom>
39 |                             <relocations>
40 |                                 <relocation>
41 |                                     <pattern>org.apache.poi</pattern>
42 |                                     <shadedPattern>shadeio.poi</shadedPattern>
43 |                                 </relocation>
44 |                                 <relocation>
45 |                                     <pattern>com.norbitltd.spoiwo</pattern>
46 |                                     <shadedPattern>shadeio.spoiwo</shadedPattern>
47 |                                 </relocation>
48 |                                 <relocation>
49 |                                     <pattern>com.github.pjfanning</pattern>
50 |                                     <shadedPattern>shadeio.pjfanning</shadedPattern>
51 |                                 </relocation>
52 |                                 <relocation>
53 |                                     <pattern>org.apache.commons.compress</pattern>
54 |                                     <shadedPattern>shadeio.commons.compress</shadedPattern>
55 |                                 </relocation>
56 |                             </relocations>
57 |                         </configuration>
58 | 
59 |                         <executions>
60 |                             <execution>
61 |                                 <phase>package</phase>
62 |                                 <goals>
63 |                                     <goal>shade</goal>
64 |                                 </goals>
65 |                             </execution>
66 |                         </executions>
67 |                     </plugin>
68 |                 </plugins>
69 |             </build>
70 |         </profile>
71 |     </profiles>
72 | 
73 | </project>


--------------------------------------------------------------------------------
/mlsql-mllib/README.md:
--------------------------------------------------------------------------------
 1 | # mlsql-mmlib
 2 | 
 3 | This plugin provide ET wrapper for spark-mllib.
 4 | 
 5 | ## Install from store
 6 | 
 7 | Execute following command in web console:
 8 | 
 9 | ```
10 | !plugin app add "tech.mlsql.plugins.mllib.app.MLSQLMllib" "mlsql-mllib-2.4";
11 | ```
12 | 
13 | Check installation:
14 | 
15 | ```
16 | !show et/ClassificationEvaluator;
17 | !show etc/RegressionEvaluator;
18 | ```
19 | 
20 | ## Install Manually
21 | 
22 | Firstly, build shade jar in your terminal:
23 | 
24 | ```shell
25 | pip install mlsql_plugin_tool
26 | mlsql_plugin_tool build --module_name mlsql-mllib --spark spark243
27 | ```
28 | 
29 | then change start script of MLSQL Engine,
30 | 
31 | Add Jar:
32 | 
33 | ```
34 | --jars YOUR_JAR_PATH
35 | ```
36 | 
37 | Register Class:
38 | 
39 | ```
40 | -streaming.plugin.clzznames tech.mlsql.plugins.mllib.app.MLSQLMllib
41 | ```
42 | 
43 | If there are more than one class, use comma to seperate them. For example:
44 | 
45 | ```
46 | -streaming.plugin.clzznames classA,classB,classC
47 | ```
48 | 
49 | ## Usage
50 | 
51 | Classification:
52 | 
53 | ```sql
54 | predict data as RandomForest.`/tmp/model` as predicted_table;
55 | run predicted_table as ClassificationEvaluator.``;
56 | ```
57 | 
58 | Regression:
59 | 
60 | ```sql
61 | predict data as LinearRegressionExt.`/tmp/model` as predicted_table;
62 | run predicted_table as RegressionEvaluator.``;
63 | ```
64 | 
65 | 
66 | 
67 | 
68 | 
69 | 
70 | 
71 | 


--------------------------------------------------------------------------------
/mlsql-mllib/desc.plugin:
--------------------------------------------------------------------------------
 1 | moduleName=mlsql-mllib-3.0
 2 | mainClass=tech.mlsql.plugins.mllib.app.MLSQLMllib
 3 | scala_version=2.12
 4 | spark_version=3.0
 5 | version=0.1.0-SNAPSHOT
 6 | author=allwefantasy
 7 | mlsqlVersions=""
 8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-mllib
 9 | mlsqlPluginType=app
10 | desc=mllib
11 | 
12 | 
13 | 
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/mlsql-mllib/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
 3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 5 |     <parent>
 6 |         <artifactId>mlsql-plugins-3.0_2.12</artifactId>
 7 |         <groupId>tech.mlsql</groupId>
 8 |         <version>0.1.0-SNAPSHOT</version>
 9 |     </parent>
10 |     <modelVersion>4.0.0</modelVersion>
11 | 
12 |     <artifactId>mlsql-mllib-3.0_2.12</artifactId>
13 |     <dependencies>
14 |        
15 | 
16 |     </dependencies>
17 | 
18 |     <profiles>
19 |         <profile>
20 |             <id>shade</id>
21 |             <build>
22 |                 <plugins>
23 |                     <plugin>
24 |                         <groupId>org.apache.maven.plugins</groupId>
25 |                         <artifactId>maven-shade-plugin</artifactId>
26 |                         <version>3.2.0</version>
27 |                         <configuration>
28 |                             <filters>
29 |                                 <filter>
30 |                                     <artifact>*:*</artifact>
31 |                                     <excludes>
32 |                                         <exclude>META-INF/*.SF</exclude>
33 |                                         <exclude>META-INF/*.DSA</exclude>
34 |                                         <exclude>META-INF/*.RSA</exclude>
35 |                                     </excludes>
36 |                                 </filter>
37 |                             </filters>
38 |                             <createDependencyReducedPom>false</createDependencyReducedPom>
39 |                             <relocations>
40 |                                 <relocation>
41 |                                     <pattern>org.apache.poi</pattern>
42 |                                     <shadedPattern>shadeio.poi</shadedPattern>
43 |                                 </relocation>
44 |                                 <relocation>
45 |                                     <pattern>com.norbitltd.spoiwo</pattern>
46 |                                     <shadedPattern>shadeio.spoiwo</shadedPattern>
47 |                                 </relocation>
48 |                                 <relocation>
49 |                                     <pattern>com.github.pjfanning</pattern>
50 |                                     <shadedPattern>shadeio.pjfanning</shadedPattern>
51 |                                 </relocation>
52 |                                 <relocation>
53 |                                     <pattern>org.apache.commons.compress</pattern>
54 |                                     <shadedPattern>shadeio.commons.compress</shadedPattern>
55 |                                 </relocation>
56 |                             </relocations>
57 |                         </configuration>
58 | 
59 |                         <executions>
60 |                             <execution>
61 |                                 <phase>package</phase>
62 |                                 <goals>
63 |                                     <goal>shade</goal>
64 |                                 </goals>
65 |                             </execution>
66 |                         </executions>
67 |                     </plugin>
68 |                 </plugins>
69 |             </build>
70 |         </profile>
71 |     </profiles>
72 | 
73 | </project>


--------------------------------------------------------------------------------
/mlsql-mllib/src/main/java/tech/mlsql/plugins/mllib/app/MLSQLMllib.scala:
--------------------------------------------------------------------------------
 1 | package tech.mlsql.plugins.mllib.app
 2 | 
 3 | import tech.mlsql.common.utils.log.Logging
 4 | import tech.mlsql.dsl.CommandCollection
 5 | import tech.mlsql.ets.register.ETRegister
 6 | import tech.mlsql.plugins.mllib.ets._
 7 | import tech.mlsql.version.VersionCompatibility
 8 | 
 9 | /**
10 |  * 31/5/2021 WilliamZhu(allwefantasy@gmail.com)
11 |  */
12 | class MLSQLMllib extends tech.mlsql.app.App with VersionCompatibility with Logging {
13 |   override def run(args: Seq[String]): Unit = {
14 |     ETRegister.register("ClassificationEvaluator", classOf[ClassificationEvaluator].getName)
15 |     ETRegister.register("RegressionEvaluator", classOf[RegressionEvaluator].getName)
16 |     ETRegister.register("AutoMLExt", classOf[AutoMLExt].getName)
17 |     ETRegister.register("SampleDatasetExt", classOf[SampleDatasetExt].getName)
18 |     ETRegister.register("TakeRandomSampleExt", classOf[TakeRandomSampleExt].getName)
19 |     ETRegister.register("ColumnsExt", classOf[ColumnsExt].getName)
20 | 
21 |     // !columns drop fields from tableName;
22 |     CommandCollection.refreshCommandMapping(Map("columns" ->
23 |       """
24 |         |run {3} as ColumnsExt.`` where action="{0}" and fields="{1}"
25 |         |""".stripMargin))
26 | 
27 |   }
28 | 
29 | 
30 |   override def supportedVersions: Seq[String] = {
31 |     MLSQLMllib.versions
32 |   }
33 | }
34 | 
35 | object MLSQLMllib {
36 |   val versions = Seq(">=2.0.0", "2.1.0", "2.1.0-SNAPSHOT", "2.0.0", "2.0.1")
37 | }


--------------------------------------------------------------------------------
/mlsql-mllib/src/main/java/tech/mlsql/plugins/mllib/ets/ClassificationEvaluator.scala:
--------------------------------------------------------------------------------
 1 | package tech.mlsql.plugins.mllib.ets
 2 | 
 3 | import org.apache.spark.ml.param.Param
 4 | import org.apache.spark.sql.expressions.UserDefinedFunction
 5 | import org.apache.spark.sql.{DataFrame, SparkSession}
 6 | import streaming.dsl.auth.TableAuthResult
 7 | import streaming.dsl.mmlib._
 8 | import streaming.dsl.mmlib.algs.classfication.BaseClassification
 9 | import streaming.dsl.mmlib.algs.param.{BaseParams, WowParams}
10 | import streaming.dsl.mmlib.algs.{CodeExampleText, Functions, MetricValue}
11 | import tech.mlsql.dsl.auth.ETAuth
12 | import tech.mlsql.dsl.auth.dsl.mmlib.ETMethod.ETMethod
13 | import tech.mlsql.plugins.mllib.app.MLSQLMllib
14 | import tech.mlsql.version.VersionCompatibility
15 | 
16 | /**
17 |  * 31/5/2021 WilliamZhu(allwefantasy@gmail.com)
18 |  */
19 | class ClassificationEvaluator(override val uid: String) extends SQLAlg
20 |   with VersionCompatibility with Functions with WowParams with ETAuth with BaseClassification {
21 |   def this() = this(BaseParams.randomUID())
22 | 
23 |   /**
24 |    * run table as  ClassificationEvaluator.`` where labelCol="label";
25 |    */
26 |   override def train(df: DataFrame, path: String, params: Map[String, String]): DataFrame = {
27 |     batchPredict(df, path, params)
28 |   }
29 | 
30 |   override def batchPredict(df: DataFrame, path: String, params: Map[String, String]): DataFrame = {
31 |     val items = multiclassClassificationEvaluate(df, (evaluator) => {
32 |       evaluator.setLabelCol(params.getOrElse(labelCol.name, "label"))
33 |       evaluator.setPredictionCol("prediction")
34 |     })
35 |     import df.sparkSession.implicits._
36 |     df.sparkSession.createDataset[MetricValue](items).toDF()
37 |   }
38 | 
39 |   override def load(sparkSession: SparkSession, path: String, params: Map[String, String]): Any = ???
40 | 
41 |   override def predict(sparkSession: SparkSession, _model: Any, name: String, params: Map[String, String]): UserDefinedFunction = ???
42 | 
43 |   override def supportedVersions: Seq[String] = {
44 |     MLSQLMllib.versions
45 |   }
46 | 
47 |   override def auth(etMethod: ETMethod, path: String, params: Map[String, String]): List[TableAuthResult] = {
48 |     List()
49 |   }
50 | 
51 |   override def modelType: ModelType = AlgType
52 | 
53 |   override def doc: Doc = Doc(HtmlDoc,
54 |     """
55 |       |Compute f1|weightedPrecision|weightedRecall|accuracy for predicted table.
56 |     """.stripMargin)
57 | 
58 | 
59 |   override def codeExample: Code = Code(SQLCode, CodeExampleText.jsonStr +
60 |     """
61 |       |predict data as RandomForest.`/tmp/model` as predicted_table;
62 |       |run predicted_table as ClassificationEvaluator.``;
63 |     """.stripMargin)
64 | 
65 |   override def explainParams(sparkSession: SparkSession): DataFrame = {
66 |     _explainParams(sparkSession)
67 |   }
68 | 
69 |   final val labelCol: Param[String] = new Param[String](this, "labelCol", "default: label")
70 | 
71 | }
72 | 


--------------------------------------------------------------------------------
/mlsql-mllib/src/main/java/tech/mlsql/plugins/mllib/ets/ColumnsExt.scala:
--------------------------------------------------------------------------------
 1 | package tech.mlsql.plugins.mllib.ets
 2 | 
 3 | import org.apache.spark.ml.param.{Param, StringArrayParam}
 4 | import org.apache.spark.sql.expressions.UserDefinedFunction
 5 | import org.apache.spark.sql.{DataFrame, SparkSession}
 6 | import streaming.dsl.mmlib.algs.classfication.BaseClassification
 7 | import streaming.dsl.mmlib.algs.param.BaseParams
 8 | import streaming.dsl.mmlib.algs.{Functions, MllibFunctions}
 9 | import streaming.dsl.mmlib._
10 | 
11 | /**
12 |  * 10/10/2021 WilliamZhu(allwefantasy@gmail.com)
13 |  */
14 | class ColumnsExt(override val uid: String) extends SQLAlg
15 |   with Functions
16 |   with MllibFunctions
17 |   with BaseClassification
18 |   with PluginBaseETAuth {
19 | 
20 |   def this() = this(BaseParams.randomUID())
21 | 
22 |   override def train(df: DataFrame, path: String, params: Map[String, String]): DataFrame = {
23 |     val _action = params.getOrElse(action.name, $(action).toString)
24 |     val _fields = params.getOrElse(fields.name, $(fields).mkString(",")).split(",")
25 |     val dfName = params("__dfname__")
26 |     if (_fields.length == 0) return df
27 |     _action match {
28 |       case "drop" | "remove" =>
29 |         val newdf = df.drop(_fields: _*)
30 |         newdf.createOrReplaceTempView(dfName)
31 |         newdf
32 |     }
33 |   }
34 | 
35 | 
36 |   override def skipOriginalDFName: Boolean = false
37 | 
38 |   override def batchPredict(df: DataFrame, path: String, params: Map[String, String]): DataFrame = {
39 |     train(df, path, params)
40 |   }
41 | 
42 |   override def modelType: ModelType = ProcessType
43 | 
44 |   override def doc: Doc = Doc(MarkDownDoc,
45 |     """
46 |       |
47 |       |""".stripMargin)
48 | 
49 |   override def codeExample: Code = Code(SQLCode,
50 |     """
51 |       |select 1 as a,2 as b as mockTable;
52 |       |!columns drop a from mockTable;
53 |       |select * from mockTable as output;
54 |       |""".stripMargin)
55 | 
56 | 
57 |   override def load(sparkSession: SparkSession, path: String, params: Map[String, String]): Any = ???
58 | 
59 |   override def predict(sparkSession: SparkSession, _model: Any, name: String, params: Map[String, String]): UserDefinedFunction = ???
60 | 
61 |   override def etName: String = "__columns_operator__"
62 | 
63 |   final val action: Param[String] =
64 |     new Param[String](this, name = "action", doc = "")
65 |   setDefault(action, "drop")
66 | 
67 |   final val fields: StringArrayParam =
68 |     new StringArrayParam(this, name = "fields", doc = "")
69 |   setDefault(fields, Array[String]())
70 | 
71 | }
72 | 


--------------------------------------------------------------------------------
/mlsql-mllib/src/main/java/tech/mlsql/plugins/mllib/ets/PluginBaseETAuth.scala:
--------------------------------------------------------------------------------
 1 | package tech.mlsql.plugins.mllib.ets
 2 | 
 3 | import streaming.dsl.ScriptSQLExec
 4 | import streaming.dsl.auth._
 5 | import tech.mlsql.dsl.auth.ETAuth
 6 | import tech.mlsql.dsl.auth.dsl.mmlib.ETMethod.ETMethod
 7 | 
 8 | /**
 9 |  * 27/9/2021 WilliamZhu(allwefantasy@gmail.com)
10 |  */
11 | trait PluginBaseETAuth extends ETAuth {
12 |   override def auth(etMethod: ETMethod, path: String, params: Map[String, String]): List[TableAuthResult] = {
13 |     val vtable = MLSQLTable(
14 |       Option(DB_DEFAULT.MLSQL_SYSTEM.toString),
15 |       Option(etName),
16 |       OperateType.SELECT,
17 |       Option("select"),
18 |       TableType.SYSTEM)
19 | 
20 |     val context = ScriptSQLExec.contextGetOrForTest()
21 |     context.execListener.getTableAuth match {
22 |       case Some(tableAuth) =>
23 |         tableAuth.auth(List(vtable))
24 |       case None =>
25 |         List(TableAuthResult(granted = true, ""))
26 |     }
27 |   }
28 | 
29 |   def etName: String
30 | }
31 | 


--------------------------------------------------------------------------------
/mlsql-mllib/src/main/java/tech/mlsql/plugins/mllib/ets/RegressionEvaluator.scala:
--------------------------------------------------------------------------------
 1 | package tech.mlsql.plugins.mllib.ets
 2 | 
 3 | import org.apache.spark.ml.param.Param
 4 | import org.apache.spark.sql.expressions.UserDefinedFunction
 5 | import org.apache.spark.sql.{DataFrame, SparkSession}
 6 | import streaming.dsl.auth.TableAuthResult
 7 | import streaming.dsl.mmlib._
 8 | import streaming.dsl.mmlib.algs.classfication.BaseClassification
 9 | import streaming.dsl.mmlib.algs.param.{BaseParams, WowParams}
10 | import streaming.dsl.mmlib.algs.{CodeExampleText, Functions, MetricValue}
11 | import tech.mlsql.dsl.auth.ETAuth
12 | import tech.mlsql.dsl.auth.dsl.mmlib.ETMethod.ETMethod
13 | import tech.mlsql.plugins.mllib.app.MLSQLMllib
14 | import tech.mlsql.version.VersionCompatibility
15 | 
16 | /**
17 |  * 1/6/2021 WilliamZhu(allwefantasy@gmail.com)
18 |  */
19 | class RegressionEvaluator(override val uid: String) extends SQLAlg
20 |   with VersionCompatibility with Functions with WowParams with ETAuth with BaseClassification {
21 |   def this() = this(BaseParams.randomUID())
22 | 
23 |   /**
24 |    * run table as  RegressionEvaluator.`` where labelCol="label";
25 |    */
26 |   override def train(df: DataFrame, path: String, params: Map[String, String]): DataFrame = {
27 |     batchPredict(df, path, params)
28 |   }
29 | 
30 |   override def batchPredict(df: DataFrame, path: String, params: Map[String, String]): DataFrame = {
31 |     val items = "mse|rmse|r2|mae".split("\\|").map { metricName =>
32 |       val evaluator = new org.apache.spark.ml.evaluation.RegressionEvaluator().setMetricName(metricName)
33 |       evaluator.setLabelCol(params.getOrElse(labelCol.name, "label"))
34 |       evaluator.setPredictionCol("prediction")
35 |       MetricValue(metricName, evaluator.evaluate(df))
36 |     }.toList
37 | 
38 |     import df.sparkSession.implicits._
39 |     df.sparkSession.createDataset[MetricValue](items).toDF()
40 |   }
41 | 
42 |   override def load(sparkSession: SparkSession, path: String, params: Map[String, String]): Any = ???
43 | 
44 |   override def predict(sparkSession: SparkSession, _model: Any, name: String, params: Map[String, String]): UserDefinedFunction = ???
45 | 
46 |   override def supportedVersions: Seq[String] = {
47 |     MLSQLMllib.versions
48 |   }
49 | 
50 |   override def auth(etMethod: ETMethod, path: String, params: Map[String, String]): List[TableAuthResult] = {
51 |     List()
52 |   }
53 | 
54 |   override def modelType: ModelType = AlgType
55 | 
56 |   override def doc: Doc = Doc(HtmlDoc,
57 |     """
58 |       |Compute mse|rmse|r2|mae for predicted table.
59 |     """.stripMargin)
60 | 
61 | 
62 |   override def codeExample: Code = Code(SQLCode, CodeExampleText.jsonStr +
63 |     """
64 |       |predict data as LinearRegressionExt.`/tmp/model` as predicted_table;
65 |       |run predicted_table as RegressionEvaluator.``;
66 |     """.stripMargin)
67 | 
68 |   override def explainParams(sparkSession: SparkSession): DataFrame = {
69 |     _explainParams(sparkSession)
70 |   }
71 | 
72 |   final val labelCol: Param[String] = new Param[String](this, "labelCol", "default: label")
73 | 
74 | }


--------------------------------------------------------------------------------
/mlsql-mllib/src/main/java/tech/mlsql/plugins/mllib/ets/TakeRandomSampleExt.scala:
--------------------------------------------------------------------------------
 1 | package tech.mlsql.plugins.mllib.ets
 2 | 
 3 | import org.apache.spark.ml.param.Param
 4 | import org.apache.spark.sql.expressions.UserDefinedFunction
 5 | import org.apache.spark.sql.{DataFrame, SparkSession}
 6 | import streaming.dsl.mmlib._
 7 | import streaming.dsl.mmlib.algs.classfication.BaseClassification
 8 | import streaming.dsl.mmlib.algs.param.BaseParams
 9 | import streaming.dsl.mmlib.algs.{Functions, MllibFunctions}
10 | 
11 | /**
12 |  * 27/9/2021 WilliamZhu(allwefantasy@gmail.com)
13 |  */
14 | class TakeRandomSampleExt(override val uid: String) extends SQLAlg
15 |   with Functions
16 |   with MllibFunctions
17 |   with BaseClassification
18 |   with PluginBaseETAuth {
19 | 
20 |   def this() = this(BaseParams.randomUID())
21 | 
22 |   override def train(df: DataFrame, path: String, params: Map[String, String]): DataFrame = {
23 |     val _size = params.getOrElse(size.name, $(size).toString).toLong
24 |     val _fraction = params.getOrElse(fraction.name, $(fraction).toString).toDouble
25 | 
26 |     val newdf = (_fraction, _size) match {
27 |       case (-1, -1) =>
28 |         df
29 |       case (-1, s) =>
30 |         val count = df.count()
31 |         df.sample(Math.min(s * 1.0 / count + 0.2, 1.0)).limit(s.toInt)
32 |       case (f, -1) =>
33 |         df.sample(f)
34 | 
35 |       case (f, s) =>
36 |         df.sample(Math.min(f + 0.1, 1.0)).limit(s.toInt)
37 |     }
38 |     if (_fraction != -1) {
39 |       df.sample(_fraction)
40 |     }
41 | 
42 |     newdf
43 |   }
44 | 
45 | 
46 |   override def batchPredict(df: DataFrame, path: String, params: Map[String, String]): DataFrame = {
47 |     train(df, path, params)
48 |   }
49 | 
50 |   override def load(sparkSession: SparkSession, path: String, params: Map[String, String]): Any = ???
51 | 
52 |   override def predict(sparkSession: SparkSession, _model: Any, name: String, params: Map[String, String]): UserDefinedFunction = ???
53 | 
54 |   override def etName: String = "__take_random_sample_operator__"
55 | 
56 |   override def modelType: ModelType = ProcessType
57 | 
58 |   override def doc: Doc = Doc(MarkDownDoc,
59 |     """
60 |       |
61 |       |""".stripMargin)
62 | 
63 |   override def codeExample: Code = Code(SQLCode,
64 |     """
65 |       |
66 |       |
67 |       |""".stripMargin)
68 | 
69 |   final val fraction: Param[Double] = new Param[Double](this, name = "fraction", doc = "")
70 |   setDefault(fraction, -1.0D)
71 | 
72 |   final val size: Param[Long] = new Param[Long](this, "size", "")
73 |   setDefault(size, -1L)
74 | 
75 | 
76 | }
77 | 


--------------------------------------------------------------------------------
/mlsql-shell/.repo/desc.template.plugin:
--------------------------------------------------------------------------------
 1 | moduleName=mlsql-shell-{{spark_binary_version}}
 2 | mainClass=tech.mlsql.plugins.shell.app.MLSQLShell
 3 | scala_version={{scala_binary_version}}
 4 | spark_version={{spark_binary_version}}
 5 | version=0.1.0-SNAPSHOT
 6 | author=allwefantasy
 7 | mlsqlVersions=""
 8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-shell
 9 | mlsqlPluginType=app
10 | desc=mlsql-shell
11 | 
12 | 
13 | 
14 | 
15 | 
16 | 
17 | 


--------------------------------------------------------------------------------
/mlsql-shell/.repo/pom.template.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
 3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 5 |     <parent>
 6 |         <artifactId>mlsql-plugins-{{spark_binary_version}}_{{scala_binary_version}}</artifactId>
 7 |         <groupId>tech.mlsql</groupId>
 8 |         <version>0.1.0-SNAPSHOT</version>
 9 |     </parent>
10 |     <modelVersion>4.0.0</modelVersion>
11 | 
12 |     <artifactId>mlsql-shell-{{spark_binary_version}}_{{scala_binary_version}}</artifactId>
13 |     <dependencies>
14 |        
15 | 
16 |     </dependencies>
17 | 
18 |     <profiles>
19 |         <profile>
20 |             <id>shade</id>
21 |             <build>
22 |                 <plugins>
23 |                     <plugin>
24 |                         <groupId>org.apache.maven.plugins</groupId>
25 |                         <artifactId>maven-shade-plugin</artifactId>
26 |                         <version>3.2.0</version>
27 |                         <configuration>
28 |                             <filters>
29 |                                 <filter>
30 |                                     <artifact>*:*</artifact>
31 |                                     <excludes>
32 |                                         <exclude>META-INF/*.SF</exclude>
33 |                                         <exclude>META-INF/*.DSA</exclude>
34 |                                         <exclude>META-INF/*.RSA</exclude>
35 |                                     </excludes>
36 |                                 </filter>
37 |                             </filters>
38 |                             <createDependencyReducedPom>false</createDependencyReducedPom>
39 |                             <relocations>
40 |                                 <relocation>
41 |                                     <pattern>org.apache.poi</pattern>
42 |                                     <shadedPattern>shadeio.poi</shadedPattern>
43 |                                 </relocation>
44 |                                 <relocation>
45 |                                     <pattern>com.norbitltd.spoiwo</pattern>
46 |                                     <shadedPattern>shadeio.spoiwo</shadedPattern>
47 |                                 </relocation>
48 |                                 <relocation>
49 |                                     <pattern>com.github.pjfanning</pattern>
50 |                                     <shadedPattern>shadeio.pjfanning</shadedPattern>
51 |                                 </relocation>
52 |                                 <relocation>
53 |                                     <pattern>org.apache.commons.compress</pattern>
54 |                                     <shadedPattern>shadeio.commons.compress</shadedPattern>
55 |                                 </relocation>
56 |                             </relocations>
57 |                         </configuration>
58 | 
59 |                         <executions>
60 |                             <execution>
61 |                                 <phase>package</phase>
62 |                                 <goals>
63 |                                     <goal>shade</goal>
64 |                                 </goals>
65 |                             </execution>
66 |                         </executions>
67 |                     </plugin>
68 |                 </plugins>
69 |             </build>
70 |         </profile>
71 |     </profiles>
72 | 
73 | </project>


--------------------------------------------------------------------------------
/mlsql-shell/README.md:
--------------------------------------------------------------------------------
 1 | # mlsql-shell
 2 | 
 3 | This plugin provide execute shell command in MLSQL Engine Driver Side.
 4 | 
 5 | ![](http://store.mlsql.tech/upload_images/6d09a596-cb0a-495c-9a95-6bbcc63be9ab.png)
 6 | 
 7 | ## Install from store
 8 | 
 9 | Execute following command in web console:
10 | 
11 | ```
12 | !plugin app add - "mlsql-shell-2.4";
13 | ```
14 | 
15 | Check installation:
16 | 
17 | ```
18 | !sh pip install pyjava;
19 | ```
20 | 
21 | 
22 | ## Install Manually
23 | 
24 | Firstly, build shade jar in your terminal:
25 | 
26 | ```shell
27 | pip install mlsql_plugin_tool
28 | mlsql_plugin_tool build --module_name mlsql-shell --spark spark243
29 | ```
30 | 
31 | then change start script of MLSQL Engine,
32 | 
33 | Add Jar:
34 | 
35 | ```
36 | --jars YOUR_JAR_PATH
37 | ```
38 | 
39 | Register Class:
40 | 
41 | ```
42 | -streaming.plugin.clzznames tech.mlsql.plugins.shell.app.MLSQLShell
43 | ```
44 | 
45 | If there are more than one class, use comma to seperate them. For example:
46 | 
47 | ```
48 | -streaming.plugin.clzznames classA,classB,classC
49 | ```
50 | 
51 | ## Usage
52 | 
53 | ```sql
54 | !sh pip install pyjava;
55 | !sh echo "yes";
56 | !sh wget "https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-shell";
57 | ```
58 | 
59 | 
60 | 
61 | 
62 | 
63 | 
64 | 
65 | 
66 | 
67 | 


--------------------------------------------------------------------------------
/mlsql-shell/desc.plugin:
--------------------------------------------------------------------------------
 1 | moduleName=mlsql-shell-3.0
 2 | mainClass=tech.mlsql.plugins.shell.app.MLSQLShell
 3 | scala_version=2.12
 4 | spark_version=3.0
 5 | version=0.1.0-SNAPSHOT
 6 | author=allwefantasy
 7 | mlsqlVersions=""
 8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/mlsql-shell
 9 | mlsqlPluginType=app
10 | desc=mlsql-shell
11 | 
12 | 
13 | 
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/mlsql-shell/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
 3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 5 |     <parent>
 6 |         <artifactId>mlsql-plugins-3.0_2.12</artifactId>
 7 |         <groupId>tech.mlsql</groupId>
 8 |         <version>0.1.0-SNAPSHOT</version>
 9 |     </parent>
10 |     <modelVersion>4.0.0</modelVersion>
11 | 
12 |     <artifactId>mlsql-shell-3.0_2.12</artifactId>
13 |     <dependencies>
14 |        
15 | 
16 |     </dependencies>
17 | 
18 |     <profiles>
19 |         <profile>
20 |             <id>shade</id>
21 |             <build>
22 |                 <plugins>
23 |                     <plugin>
24 |                         <groupId>org.apache.maven.plugins</groupId>
25 |                         <artifactId>maven-shade-plugin</artifactId>
26 |                         <version>3.2.0</version>
27 |                         <configuration>
28 |                             <filters>
29 |                                 <filter>
30 |                                     <artifact>*:*</artifact>
31 |                                     <excludes>
32 |                                         <exclude>META-INF/*.SF</exclude>
33 |                                         <exclude>META-INF/*.DSA</exclude>
34 |                                         <exclude>META-INF/*.RSA</exclude>
35 |                                     </excludes>
36 |                                 </filter>
37 |                             </filters>
38 |                             <createDependencyReducedPom>false</createDependencyReducedPom>
39 |                             <relocations>
40 |                                 <relocation>
41 |                                     <pattern>org.apache.poi</pattern>
42 |                                     <shadedPattern>shadeio.poi</shadedPattern>
43 |                                 </relocation>
44 |                                 <relocation>
45 |                                     <pattern>com.norbitltd.spoiwo</pattern>
46 |                                     <shadedPattern>shadeio.spoiwo</shadedPattern>
47 |                                 </relocation>
48 |                                 <relocation>
49 |                                     <pattern>com.github.pjfanning</pattern>
50 |                                     <shadedPattern>shadeio.pjfanning</shadedPattern>
51 |                                 </relocation>
52 |                                 <relocation>
53 |                                     <pattern>org.apache.commons.compress</pattern>
54 |                                     <shadedPattern>shadeio.commons.compress</shadedPattern>
55 |                                 </relocation>
56 |                             </relocations>
57 |                         </configuration>
58 | 
59 |                         <executions>
60 |                             <execution>
61 |                                 <phase>package</phase>
62 |                                 <goals>
63 |                                     <goal>shade</goal>
64 |                                 </goals>
65 |                             </execution>
66 |                         </executions>
67 |                     </plugin>
68 |                 </plugins>
69 |             </build>
70 |         </profile>
71 |     </profiles>
72 | 
73 | </project>


--------------------------------------------------------------------------------
/mlsql-shell/src/main/java/tech/mlsql/plugins/shell/app/MLSQLShell.scala:
--------------------------------------------------------------------------------
 1 | package tech.mlsql.plugins.shell.app
 2 | 
 3 | import tech.mlsql.common.utils.log.Logging
 4 | import tech.mlsql.dsl.CommandCollection
 5 | import tech.mlsql.ets.register.ETRegister
 6 | import tech.mlsql.plugins.shell.ets.{CopyFromLocal, ShellExecute}
 7 | import tech.mlsql.version.VersionCompatibility
 8 | 
 9 | /**
10 |  * 2/6/2021 WilliamZhu(allwefantasy@gmail.com)
11 |  */
12 | class MLSQLShell extends tech.mlsql.app.App with VersionCompatibility with Logging {
13 |   override def run(args: Seq[String]): Unit = {
14 |     ETRegister.register("ShellExecute", classOf[ShellExecute].getName)
15 |     CommandCollection.refreshCommandMapping(Map("sh" ->
16 |       """
17 |         |run command as ShellExecute.`` where parameters='''{:all}'''
18 |         |""".stripMargin))
19 | 
20 |     ETRegister.register("CopyFromLocal", classOf[CopyFromLocal].getName)
21 |     CommandCollection.refreshCommandMapping(Map("copyFromLocal" ->
22 |       """
23 |         |run command as CopyFromLocal.`{1}` where src="{0}"
24 |         |""".stripMargin))
25 |   }
26 | 
27 | 
28 |   override def supportedVersions: Seq[String] = {
29 |     MLSQLShell.versions
30 |   }
31 | }
32 | 
33 | object MLSQLShell {
34 |   val versions = Seq("2.1.0", "2.1.0-SNAPSHOT", "2.0.0", "2.0.1")
35 | }
36 | 


--------------------------------------------------------------------------------
/mlsql-shell/src/main/java/tech/mlsql/plugins/shell/ets/CopyFromLocal.scala:
--------------------------------------------------------------------------------
 1 | package tech.mlsql.plugins.shell.ets
 2 | 
 3 | import org.apache.spark.sql.expressions.UserDefinedFunction
 4 | import org.apache.spark.sql.{DataFrame, SparkSession}
 5 | import streaming.dsl.ScriptSQLExec
 6 | import streaming.dsl.auth._
 7 | import streaming.dsl.mmlib.SQLAlg
 8 | import streaming.dsl.mmlib.algs.Functions
 9 | import streaming.dsl.mmlib.algs.param.{BaseParams, WowParams}
10 | import tech.mlsql.common.utils.serder.json.JSONTool
11 | import tech.mlsql.dsl.auth.ETAuth
12 | import tech.mlsql.dsl.auth.dsl.mmlib.ETMethod.ETMethod
13 | import tech.mlsql.plugins.shell.app.MLSQLShell
14 | import tech.mlsql.tool.HDFSOperatorV2
15 | import tech.mlsql.version.VersionCompatibility
16 | 
17 | /**
18 |  * 2/6/2021 WilliamZhu(allwefantasy@gmail.com)
19 |  */
20 | class CopyFromLocal(override val uid: String) extends SQLAlg
21 |   with VersionCompatibility with Functions with WowParams with ETAuth {
22 |   def this() = this(BaseParams.randomUID())
23 | 
24 |   /**
25 |    * !copyFromLocal src dst;
26 |    */
27 |   override def train(df: DataFrame, path: String, params: Map[String, String]): DataFrame = {
28 | 
29 |     HDFSOperatorV2.copyToHDFS(params("src"), path, false, false)
30 |     import df.sparkSession.implicits._
31 |     df.sparkSession.createDataset[String](Seq().toSeq).toDF("content")
32 |   }
33 | 
34 |   override def skipPathPrefix: Boolean = false
35 | 
36 |   override def load(sparkSession: SparkSession, path: String, params: Map[String, String]): Any = ???
37 | 
38 |   override def predict(sparkSession: SparkSession, _model: Any, name: String, params: Map[String, String]): UserDefinedFunction = ???
39 | 
40 |   override def supportedVersions: Seq[String] = MLSQLShell.versions
41 | 
42 |   override def auth(etMethod: ETMethod, path: String, params: Map[String, String]): List[TableAuthResult] = {
43 |     val vtable = MLSQLTable(
44 |       db = Option(DB_DEFAULT.MLSQL_SYSTEM.toString),
45 |       table = Option("__copy_from_local__"),
46 |       operateType = OperateType.EMPTY,
47 |       sourceType = Option("_mlsql_"),
48 |       tableType = TableType.SYSTEM)
49 | 
50 |     val context = ScriptSQLExec.contextGetOrForTest()
51 |     context.execListener.getTableAuth match {
52 |       case Some(tableAuth) =>
53 |         tableAuth.auth(List(vtable))
54 |       case None => List(TableAuthResult(true, ""))
55 |     }
56 |   }
57 | }
58 | 


--------------------------------------------------------------------------------
/mlsql-shell/src/main/java/tech/mlsql/plugins/shell/ets/ShellExecute.scala:
--------------------------------------------------------------------------------
 1 | package tech.mlsql.plugins.shell.ets
 2 | 
 3 | import org.apache.spark.sql.expressions.UserDefinedFunction
 4 | import org.apache.spark.sql.{DataFrame, SparkSession}
 5 | import streaming.dsl.ScriptSQLExec
 6 | import streaming.dsl.auth._
 7 | import streaming.dsl.mmlib.SQLAlg
 8 | import streaming.dsl.mmlib.algs.Functions
 9 | import streaming.dsl.mmlib.algs.param.{BaseParams, WowParams}
10 | import tech.mlsql.common.utils.serder.json.JSONTool
11 | import tech.mlsql.common.utils.shell.ShellCommand
12 | import tech.mlsql.dsl.auth.ETAuth
13 | import tech.mlsql.dsl.auth.dsl.mmlib.ETMethod.ETMethod
14 | import tech.mlsql.plugins.shell.app.MLSQLShell
15 | import tech.mlsql.version.VersionCompatibility
16 | 
17 | import scala.collection.mutable.ArrayBuffer
18 | 
19 | /**
20 |  * 2/6/2021 WilliamZhu(allwefantasy@gmail.com)
21 |  */
22 | class ShellExecute(override val uid: String) extends SQLAlg
23 |   with VersionCompatibility with Functions with WowParams with ETAuth {
24 |   def this() = this(BaseParams.randomUID())
25 | 
26 |   /**
27 |    * !sh pip install pyjava;
28 |    */
29 |   override def train(df: DataFrame, path: String, params: Map[String, String]): DataFrame = {
30 |     val args = JSONTool.parseJson[List[String]](params("parameters"))
31 |     import df.sparkSession.implicits._
32 | 
33 |     args.head match {
34 |       case "script" =>
35 |         val res = ShellCommand.exec(args.last)
36 |         df.sparkSession.createDataset[String](Seq(res)).toDF("content")
37 |       case _ =>
38 | 
39 |         val process = os.proc(args).spawn()
40 |         val result = ArrayBuffer[String]()
41 | 
42 |         var errLine = process.stderr.readLine()
43 | 
44 |         while (errLine != null) {
45 |           logInfo(format(errLine))
46 |           result.append(errLine)
47 |           errLine = process.stderr.readLine()
48 |         }
49 | 
50 | 
51 |         var line = process.stdout.readLine()
52 |         while (line != null) {
53 |           logInfo(format(line))
54 |           result.append(line)
55 |           line = process.stdout.readLine()
56 |         }
57 | 
58 |         df.sparkSession.createDataset[String](result.toSeq).toDF("content")
59 |     }
60 | 
61 | 
62 |   }
63 | 
64 |   override def skipPathPrefix: Boolean = false
65 | 
66 |   override def load(sparkSession: SparkSession, path: String, params: Map[String, String]): Any = ???
67 | 
68 |   override def predict(sparkSession: SparkSession, _model: Any, name: String, params: Map[String, String]): UserDefinedFunction = ???
69 | 
70 |   override def supportedVersions: Seq[String] = MLSQLShell.versions
71 | 
72 |   override def auth(etMethod: ETMethod, path: String, params: Map[String, String]): List[TableAuthResult] = {
73 |     val vtable = MLSQLTable(
74 |       db = Option(DB_DEFAULT.MLSQL_SYSTEM.toString),
75 |       table = Option("__shell_execute__"),
76 |       operateType = OperateType.EMPTY,
77 |       sourceType = Option("_mlsql_"),
78 |       tableType = TableType.SYSTEM)
79 | 
80 |     val context = ScriptSQLExec.contextGetOrForTest()
81 |     context.execListener.getTableAuth match {
82 |       case Some(tableAuth) =>
83 |         tableAuth.auth(List(vtable))
84 |       case None => List(TableAuthResult(true, ""))
85 |     }
86 |   }
87 | }
88 | 


--------------------------------------------------------------------------------
/run-script/.repo/pom.template.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
 3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 5 |     <parent>
 6 |         <artifactId>mlsql-plugins-{{spark_binary_version}}_{{scala_binary_version}}</artifactId>
 7 |         <groupId>tech.mlsql</groupId>
 8 |         <version>0.1.0-SNAPSHOT</version>
 9 |     </parent>
10 |     <modelVersion>4.0.0</modelVersion>
11 | 
12 |     <artifactId>run-script-${spark.binary.version}_${scala.binary.version}</artifactId>
13 | 
14 | 
15 | </project>


--------------------------------------------------------------------------------
/run-script/README.md:
--------------------------------------------------------------------------------
 1 | ## Install 
 2 | 
 3 | ```sql
 4 | !plugin et add - "run-script-2.4" named runScript;
 5 | ```
 6 | 
 7 | ## Usage
 8 | 
 9 | ```sql
10 | set code1='''
11 | select 1 as a as b;
12 | ''';
13 | !runScript '''${code1}''' named output;
14 | ```


--------------------------------------------------------------------------------
/run-script/desc.plugin:
--------------------------------------------------------------------------------
 1 | moduleName=run-script-2.4
 2 | mainClass=tech.mlsql.plugins.et.RunScript
 3 | version=0.1.0-SNAPSHOT
 4 | author=allwefantasy
 5 | mlsqlVersions="1.5.0,1.5.0-SNAPSHOT,1.6.0,1.6.0-SNAPSHOT"
 6 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/run-script
 7 | scala_version=2.11
 8 | spark_version=2.4
 9 | mlsqlPluginType=et
10 | desc=wow
11 | 
12 | 
13 | 
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/run-script/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
 3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 5 |     <parent>
 6 |         <artifactId>mlsql-plugins-3.0_2.12</artifactId>
 7 |         <groupId>tech.mlsql</groupId>
 8 |         <version>0.1.0-SNAPSHOT</version>
 9 |     </parent>
10 |     <modelVersion>4.0.0</modelVersion>
11 | 
12 |     <artifactId>run-script-${spark.binary.version}_${scala.binary.version}</artifactId>
13 | 
14 | 
15 | </project>


--------------------------------------------------------------------------------
/run-script/src/main/java/tech/mlsql/plugins/et/RunScript.scala:
--------------------------------------------------------------------------------
 1 | package tech.mlsql.plugins.et
 2 | 
 3 | import org.apache.spark.sql.expressions.UserDefinedFunction
 4 | import org.apache.spark.sql.{DataFrame, SparkSession}
 5 | import streaming.dsl.ScriptSQLExec
 6 | import streaming.dsl.auth.TableAuthResult
 7 | import streaming.dsl.mmlib._
 8 | import streaming.dsl.mmlib.algs.Functions
 9 | import streaming.dsl.mmlib.algs.param.{BaseParams, WowParams}
10 | import tech.mlsql.common.utils.serder.json.JSONTool
11 | import tech.mlsql.dsl.auth.ETAuth
12 | import tech.mlsql.dsl.auth.dsl.mmlib.ETMethod.ETMethod
13 | import tech.mlsql.ets.ScriptRunner
14 | import tech.mlsql.version.VersionCompatibility
15 | 
16 | 
17 | class RunScript(override val uid: String) extends SQLAlg with VersionCompatibility with Functions with WowParams with ETAuth {
18 |   def this() = this(BaseParams.randomUID())
19 | 
20 |   // 
21 |   override def train(df: DataFrame, path: String, params: Map[String, String]): DataFrame = {
22 | 
23 |     val context = ScriptSQLExec.context()
24 |     val command = JSONTool.parseJson[List[String]](params("parameters")).toArray
25 |     val sparkOpt = Option(df.sparkSession)
26 |     command match {
27 |       case Array(script, "named", tableName) =>
28 |         var jobRes: DataFrame = ScriptRunner.rubSubJob(
29 |           script,
30 |           (_df: DataFrame) => {},
31 |           sparkOpt,
32 |           true,
33 |           true).get
34 |         jobRes.createOrReplaceTempView(tableName)
35 |         jobRes
36 |       case _ => throw new RuntimeException("try !runScript code named table1")
37 |     }
38 | 
39 |   }
40 | 
41 |   override def auth(etMethod: ETMethod, path: String, params: Map[String, String]): List[TableAuthResult] = {
42 |     List()
43 |   }
44 | 
45 |   override def supportedVersions: Seq[String] = {
46 |     Seq("1.5.0-SNAPSHOT", "1.5.0", "1.6.0-SNAPSHOT", "1.6.0")
47 |   }
48 | 
49 | 
50 |   override def doc: Doc = Doc(MarkDownDoc,
51 |     s"""
52 |        |When you want to get the result from command and used
53 |        | in next command(SQL), you can use !last command.
54 |        |
55 |        |For example:
56 |        |
57 |        |```
58 |        |${codeExample.code}
59 |        |```
60 |     """.stripMargin)
61 | 
62 | 
63 |   override def codeExample: Code = Code(SQLCode,
64 |     """
65 |       |!hdfs /tmp;
66 |       |!last named hdfsTmpTable;
67 |       |select * from hdfsTmpTable;
68 |     """.stripMargin)
69 | 
70 |   override def batchPredict(df: DataFrame, path: String, params: Map[String, String]): DataFrame = train(df, path, params)
71 | 
72 |   override def load(sparkSession: SparkSession, path: String, params: Map[String, String]): Any = ???
73 | 
74 |   override def predict(sparkSession: SparkSession, _model: Any, name: String, params: Map[String, String]): UserDefinedFunction = ???
75 | 
76 | 
77 | }
78 | 


--------------------------------------------------------------------------------
/save-then-load/.repo/pom.template.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
 3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 5 |     <parent>
 6 |         <artifactId>mlsql-plugins-{{spark_binary_version}}_{{scala_binary_version}}</artifactId>
 7 |         <groupId>tech.mlsql</groupId>
 8 |         <version>0.1.0-SNAPSHOT</version>
 9 |     </parent>
10 |     <modelVersion>4.0.0</modelVersion>
11 | 
12 |     <artifactId>save-then-load-${spark.binary.version}_${scala.binary.version}</artifactId>
13 | 
14 | 
15 | </project>


--------------------------------------------------------------------------------
/save-then-load/README.md:
--------------------------------------------------------------------------------
 1 | ## Install 
 2 | 
 3 | ```sql
 4 | !plugin et add - "save-then-load-2.4" named saveThenLoad;
 5 | ```
 6 | 
 7 | ## Usage
 8 | 
 9 | This plugin will save the table into delta table and load it again.
10 | 
11 | ```sql
12 | !saveThenLoad tableName;
13 | select * from tableName as output;
14 | ```


--------------------------------------------------------------------------------
/save-then-load/desc.plugin:
--------------------------------------------------------------------------------
 1 | moduleName=save-then-load-2.4
 2 | mainClass=tech.mlsql.plugins.et.SaveThenLoad
 3 | version=0.1.0-SNAPSHOT
 4 | author=allwefantasy
 5 | mlsqlVersions="1.5.0,1.5.0-SNAPSHOT,1.6.0,1.6.0-SNAPSHOT"
 6 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/save-then-load
 7 | scala_version=2.11
 8 | spark_version=2.4
 9 | mlsqlPluginType=et
10 | desc=wow
11 | 
12 | 
13 | 
14 | 
15 | 
16 | 
17 | 


--------------------------------------------------------------------------------
/save-then-load/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
 3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 5 |     <parent>
 6 |         <artifactId>mlsql-plugins-3.0_2.12</artifactId>
 7 |         <groupId>tech.mlsql</groupId>
 8 |         <version>0.1.0-SNAPSHOT</version>
 9 |     </parent>
10 |     <modelVersion>4.0.0</modelVersion>
11 | 
12 |     <artifactId>save-then-load-${spark.binary.version}_${scala.binary.version}</artifactId>
13 | 
14 | 
15 | </project>


--------------------------------------------------------------------------------
/save-then-load/src/main/java/tech/mlsql/plugins/et/SaveThenLoad.scala:
--------------------------------------------------------------------------------
 1 | package tech.mlsql.plugins.et
 2 | 
 3 | import org.apache.spark.sql.expressions.UserDefinedFunction
 4 | import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}
 5 | import streaming.core.datasource.impl.MLSQLDelta
 6 | import streaming.core.datasource.{DataSinkConfig, DataSourceConfig}
 7 | import streaming.dsl.auth.TableAuthResult
 8 | import streaming.dsl.mmlib._
 9 | import streaming.dsl.mmlib.algs.Functions
10 | import streaming.dsl.mmlib.algs.param.{BaseParams, WowParams}
11 | import tech.mlsql.common.utils.serder.json.JSONTool
12 | import tech.mlsql.dsl.auth.ETAuth
13 | import tech.mlsql.dsl.auth.dsl.mmlib.ETMethod.ETMethod
14 | import tech.mlsql.version.VersionCompatibility
15 | 
16 | /**
17 |  * 13/1/2020 WilliamZhu(allwefantasy@gmail.com)
18 |  */
19 | class SaveThenLoad(override val uid: String) extends SQLAlg with VersionCompatibility with Functions with WowParams with ETAuth {
20 |   def this() = this(BaseParams.randomUID())
21 | 
22 |   override def train(df: DataFrame, path: String, params: Map[String, String]): DataFrame = {
23 |     val command = JSONTool.parseJson[List[String]](params("parameters")).toArray
24 |     val session = df.sparkSession
25 |     command match {
26 |       case Array(tableName) =>
27 |         val ds = new MLSQLDelta()
28 |         ds.save(session.table(tableName).write, DataSinkConfig(s"__tmp__.${tableName}", Map(), SaveMode.Overwrite, Option(df)))
29 |         val newDF = ds.load(session.read, DataSourceConfig(s"__tmp__.${tableName}", Map(), Option(df)))
30 |         newDF.createOrReplaceTempView(tableName)
31 |         newDF
32 |       case _ => throw new RuntimeException("!saveThenLoad tableName;")
33 |     }
34 |   }
35 | 
36 |   override def auth(etMethod: ETMethod, path: String, params: Map[String, String]): List[TableAuthResult] = {
37 |     List()
38 |   }
39 | 
40 |   override def supportedVersions: Seq[String] = {
41 |     Seq("1.5.0-SNAPSHOT", "1.5.0", "1.6.0-SNAPSHOT", "1.6.0")
42 |   }
43 | 
44 | 
45 |   override def doc: Doc = Doc(MarkDownDoc,
46 |     s"""
47 |        |
48 |        |```
49 |        |${codeExample.code}
50 |        |```
51 |     """.stripMargin)
52 | 
53 | 
54 |   override def codeExample: Code = Code(SQLCode,
55 |     """
56 |       |example
57 |     """.stripMargin)
58 | 
59 |   override def batchPredict(df: DataFrame, path: String, params: Map[String, String]): DataFrame = train(df, path, params)
60 | 
61 |   override def load(sparkSession: SparkSession, path: String, params: Map[String, String]): Any = ???
62 | 
63 |   override def predict(sparkSession: SparkSession, _model: Any, name: String, params: Map[String, String]): UserDefinedFunction = ???
64 | 
65 | 
66 | }
67 | 


--------------------------------------------------------------------------------
/stream-persist/.repo/desc.template.plugin:
--------------------------------------------------------------------------------
 1 | moduleName=stream-persist-app-{{spark_binary_version}}
 2 | mainClass=tech.mlsq.streambootstrapatstartup.StreamApp
 3 | scala_version={{scala_binary_version}}
 4 | spark_version={{spark_binary_version}}
 5 | version=0.1.0-SNAPSHOT
 6 | author=allwefantasy
 7 | mlsqlVersions="1.5.0,1.5.0-SNAPSHOT,1.6.0,1.6.0-SNAPSHOT"
 8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/stream-boostrap-at-startup
 9 | mlsqlPluginType=app
10 | desc=wow
11 | 
12 | 


--------------------------------------------------------------------------------
/stream-persist/.repo/pom.template.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
 3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 5 |     <parent>
 6 |         <artifactId>mlsql-plugins-{{spark_binary_version}}_{{scala_binary_version}}</artifactId>
 7 |         <groupId>tech.mlsql</groupId>
 8 |         <version>0.1.0-SNAPSHOT</version>
 9 |     </parent>
10 |     <modelVersion>4.0.0</modelVersion>
11 | 
12 |     <artifactId>stream-persist-${spark.binary.version}_${scala.binary.version}</artifactId>
13 | 
14 | </project>


--------------------------------------------------------------------------------
/stream-persist/README.md:
--------------------------------------------------------------------------------
 1 | ## Install command:
 2 | 
 3 | ```
 4 | !plugin app add - "stream-persist-app-2.4";
 5 | ```
 6 | 
 7 | The first plugin is APP plugin and the second is ET plugin.
 8 | 
 9 | 
10 | ## Usage
11 | 
12 | Use ET Plugin to persist stream job.
13 | 
14 | ```sql
15 | !streamPersist persist streamExample;
16 | 
17 | !streamPersist remove streamExample;
18 | 
19 | !streamPersist list;
20 | ```
21 | 
22 | And then once MLSQL Engine is restarted, and the stream job streamExample will be 
23 | boosted at the startup of MLSQL.
24 | 
25 | 
26 | 
27 | 


--------------------------------------------------------------------------------
/stream-persist/db.sql:
--------------------------------------------------------------------------------
1 | CREATE TABLE `w_streams` (
2 |   `id` int(11) unsigned NOT NULL AUTO_INCREMENT,
3 |   `name` varchar(256) DEFAULT NULL,
4 |   `content` text,
5 |   `owner` varchar(256) DEFAULT NULL,
6 |   `home` varchar(256) DEFAULT NULL,
7 |   PRIMARY KEY (`id`)
8 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8;


--------------------------------------------------------------------------------
/stream-persist/desc.plugin:
--------------------------------------------------------------------------------
 1 | moduleName=stream-persist-app-3.0
 2 | mainClass=tech.mlsq.streambootstrapatstartup.StreamApp
 3 | scala_version=2.12
 4 | spark_version=3.0
 5 | version=0.1.0-SNAPSHOT
 6 | author=allwefantasy
 7 | mlsqlVersions="1.5.0,1.5.0-SNAPSHOT,1.6.0,1.6.0-SNAPSHOT"
 8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/stream-boostrap-at-startup
 9 | mlsqlPluginType=app
10 | desc=wow
11 | 


--------------------------------------------------------------------------------
/stream-persist/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
 3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 5 |     <parent>
 6 |         <artifactId>mlsql-plugins-3.0_2.12</artifactId>
 7 |         <groupId>tech.mlsql</groupId>
 8 |         <version>0.1.0-SNAPSHOT</version>
 9 |     </parent>
10 |     <modelVersion>4.0.0</modelVersion>
11 | 
12 |     <artifactId>stream-persist-${spark.binary.version}_${scala.binary.version}</artifactId>
13 | 
14 | </project>


--------------------------------------------------------------------------------
/stream-persist/src/main/java/tech/mlsq/streambootstrapatstartup/StreamApp.scala:
--------------------------------------------------------------------------------
 1 | package tech.mlsq.streambootstrapatstartup
 2 | 
 3 | import _root_.streaming.core.strategy.platform.{PlatformManager, SparkRuntime}
 4 | import _root_.streaming.dsl.{MLSQLExecuteContext, ScriptSQLExec, ScriptSQLExecListener}
 5 | import org.apache.spark.sql.SparkSession
 6 | import tech.mlsql.common.utils.log.Logging
 7 | import tech.mlsql.dsl.CommandCollection
 8 | import tech.mlsql.ets.ScriptRunner
 9 | import tech.mlsql.ets.register.ETRegister
10 | import tech.mlsql.job.{JobManager, MLSQLJobType}
11 | import tech.mlsql.store.DBStore
12 | import tech.mlsql.version.VersionCompatibility
13 | 
14 | /**
15 |  * 2019-09-20 WilliamZhu(allwefantasy@gmail.com)
16 |  */
17 | class StreamApp extends tech.mlsql.app.App with VersionCompatibility with Logging {
18 | 
19 | 
20 |   override def run(args: Seq[String]): Unit = {
21 |     val root = runtime.sparkSession
22 |     import root.implicits._
23 | 
24 |     ETRegister.register("StreamPersistCommand", classOf[StreamPersistCommand].getName)
25 |     CommandCollection.refreshCommandMapping(Map("streamPersist" -> "StreamPersistCommand"))
26 | 
27 |     val thread = new Thread("start MLSQL stream") {
28 |       override def run(): Unit = {
29 |         while (!PlatformManager.RUNTIME_IS_READY.get()) {
30 |           Thread.sleep(3000)
31 |           logInfo("Waiting MLSQL runtime ready to start streams.")
32 |         }
33 |         logInfo("Starting to start streams.")
34 |         val streams = DBStore.store.tryReadTable(root, StreamAppConfig.TABLE, () => root.createDataset[Stream](Seq()).toDF())
35 |         streams.as[Stream].collect().foreach { stream =>
36 |           val session = getSessionByOwner(stream.owner)
37 |           val job = JobManager.getJobInfo(stream.owner, stream.name, MLSQLJobType.STREAM, stream.content, -1)
38 |           setUpScriptSQLExecListener(stream.owner, session, job.groupId, stream.home)
39 |           ScriptRunner.runJob(stream.content, job, (df) => {
40 | 
41 |           })
42 |         }
43 |       }
44 |     }
45 |     thread.start()
46 | 
47 |   }
48 | 
49 |   def setUpScriptSQLExecListener(owner: String, sparkSession: SparkSession, groupId: String, home: String) = {
50 |     val context = new ScriptSQLExecListener(sparkSession, "", Map[String, String](owner -> home))
51 |     ScriptSQLExec.setContext(new MLSQLExecuteContext(context, owner, context.pathPrefix(None), groupId, Map()))
52 |     context.addEnv("SKIP_AUTH", "true")
53 |     context.addEnv("HOME", context.pathPrefix(None))
54 |     context.addEnv("OWNER", owner)
55 |     context
56 |   }
57 | 
58 |   def getSessionByOwner(owner: String) = {
59 |     runtime.getSession(owner)
60 |   }
61 | 
62 |   def runtime = {
63 |     PlatformManager.getRuntime.asInstanceOf[SparkRuntime]
64 |   }
65 | 
66 |   override def supportedVersions: Seq[String] = {
67 |     Seq("1.5.0-SNAPSHOT", "1.5.0", "1.6.0-SNAPSHOT", "1.6.0")
68 |   }
69 | }
70 | 
71 | object StreamAppConfig {
72 |   val TABLE = "__mlsql__.streams"
73 | }
74 | 
75 | case class Stream(name: String, content: String, owner: String, home: String)
76 | 
77 | object StreamApp {
78 | }
79 | 


--------------------------------------------------------------------------------
/stream-persist/src/main/java/tech/mlsq/streambootstrapatstartup/StreamPersistCommand.scala:
--------------------------------------------------------------------------------
 1 | package tech.mlsq.streambootstrapatstartup
 2 | 
 3 | import _root_.streaming.dsl.ScriptSQLExec
 4 | import _root_.streaming.dsl.mmlib.SQLAlg
 5 | import _root_.streaming.dsl.mmlib.algs.Functions
 6 | import _root_.streaming.dsl.mmlib.algs.param.{BaseParams, WowParams}
 7 | import org.apache.spark.sql.expressions.UserDefinedFunction
 8 | import org.apache.spark.sql.{DataFrame, SparkSession}
 9 | import org.apache.spark.sql.mlsql.session.MLSQLException
10 | import tech.mlsql.common.utils.serder.json.JSONTool
11 | import tech.mlsql.datalake.DataLake
12 | import tech.mlsql.job.JobManager
13 | import tech.mlsql.store.DBStore
14 | import tech.mlsql.version.VersionCompatibility
15 | 
16 | /**
17 |  * 2019-09-20 WilliamZhu(allwefantasy@gmail.com)
18 |  */
19 | class StreamPersistCommand(override val uid: String) extends SQLAlg with VersionCompatibility with Functions with WowParams {
20 |   def this() = this(BaseParams.randomUID())
21 | 
22 |   override def train(df: DataFrame, path: String, params: Map[String, String]): DataFrame = {
23 |     val spark = df.sparkSession
24 |     val dataLake = new DataLake(spark)
25 |     require(dataLake.isEnable, "data lake should be enabled.")
26 |     import spark.implicits._
27 | 
28 |     val command = JSONTool.parseJson[List[String]](params("parameters"))
29 |     command match {
30 |       case Seq("persist", streamName) =>
31 |         JobManager.getJobInfo.filter(f => f._2.jobName == streamName).map(f => f._2).headOption match {
32 |           case Some(item) =>
33 |             val data = spark.createDataset(Seq(Stream(streamName, item.jobContent, item.owner, ScriptSQLExec.context().home)))
34 |             DBStore.store.saveTable(spark, data.toDF(), StreamAppConfig.TABLE, Option("name"), false)
35 |             DBStore.store.readTable(spark, StreamAppConfig.TABLE)
36 |           case None => throw new MLSQLException(s"not stream ${streamName} exists")
37 |         }
38 |       case Seq("remove", streamName) =>
39 |         DBStore.store.saveTable(spark, spark.createDataset[Stream](Seq(Stream(streamName, null, null, null))).toDF(), StreamAppConfig.TABLE, Option("name"), true)
40 |         DBStore.store.readTable(spark, StreamAppConfig.TABLE)
41 | 
42 |       case Seq("list") =>
43 |         DBStore.store.readTable(spark, StreamAppConfig.TABLE)
44 |     }
45 | 
46 |   }
47 | 
48 | 
49 |   override def supportedVersions: Seq[String] = {
50 |     Seq("1.5.0-SNAPSHOT", "1.5.0", "1.6.0-SNAPSHOT", "1.6.0")
51 |   }
52 | 
53 |   override def batchPredict(df: DataFrame, path: String, params: Map[String, String]): DataFrame = train(df, path, params)
54 | 
55 |   override def load(sparkSession: SparkSession, path: String, params: Map[String, String]): Any = ???
56 | 
57 |   override def predict(sparkSession: SparkSession, _model: Any, name: String, params: Map[String, String]): UserDefinedFunction = ???
58 | }
59 | 


--------------------------------------------------------------------------------
/table-repartition/.repo/desc.template.plugin:
--------------------------------------------------------------------------------
 1 | moduleName=table-repartition-{{spark_binary_version}}
 2 | mainClass=tech.mlsql.plugins.et.TableRepartition
 3 | scala_version={{scala_binary_version}}
 4 | spark_version={{spark_binary_version}}
 5 | version=0.1.0-SNAPSHOT
 6 | author=allwefantasy
 7 | mlsqlVersions="1.5.0,1.5.0-SNAPSHOT,1.6.0,1.6.0-SNAPSHOT"
 8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/table-repartition
 9 | mlsqlPluginType=et
10 | desc=wow


--------------------------------------------------------------------------------
/table-repartition/.repo/pom.template.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
 3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 5 |     <parent>
 6 |         <artifactId>mlsql-plugins-{{spark_binary_version}}_{{scala_binary_version}}</artifactId>
 7 |         <groupId>tech.mlsql</groupId>
 8 |         <version>0.1.0-SNAPSHOT</version>
 9 |     </parent>
10 |     <modelVersion>4.0.0</modelVersion>
11 | 
12 |     <artifactId>table-repartition-${spark.binary.version}_${scala.binary.version}</artifactId>
13 | 
14 | 
15 | </project>


--------------------------------------------------------------------------------
/table-repartition/README.md:
--------------------------------------------------------------------------------
 1 | ## Install 
 2 | 
 3 | ```sql
 4 | !plugin et add - "table-repartition-2.4";
 5 | ```
 6 | 
 7 | ## Usage
 8 | 
 9 | ```sql
10 | set rawText='''
11 | {"id":9,"content":"Spark好的语言1","label":0.0}
12 | {"id":10,"content":"MLSQL是一个好的语言7","label":0.0}
13 | {"id":13,"content":"MLSQL是一个好的语言7","label":0.0}
14 | ''';
15 | 
16 | load jsonStr.`rawText` as orginal_text_corpus;
17 | 
18 | select id,content,label from orginal_text_corpus as orginal_text_corpus1;
19 | run orginal_text_corpus1 as TableRepartition.`` where partitionNum="2" and partitionType="range" and partitionCols="id"
20 | as newtable;
21 | ```


--------------------------------------------------------------------------------
/table-repartition/desc.plugin:
--------------------------------------------------------------------------------
 1 | moduleName=table-repartition-3.0
 2 | mainClass=tech.mlsql.plugins.et.TableRepartition
 3 | scala_version=2.12
 4 | spark_version=3.0
 5 | version=0.1.0-SNAPSHOT
 6 | author=allwefantasy
 7 | mlsqlVersions="1.5.0,1.5.0-SNAPSHOT,1.6.0,1.6.0-SNAPSHOT"
 8 | githubUrl=https://github.com/allwefantasy/mlsql-plugins/tree/master/table-repartition
 9 | mlsqlPluginType=et
10 | desc=wow


--------------------------------------------------------------------------------
/table-repartition/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
 3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 5 |     <parent>
 6 |         <artifactId>mlsql-plugins-3.0_2.12</artifactId>
 7 |         <groupId>tech.mlsql</groupId>
 8 |         <version>0.1.0-SNAPSHOT</version>
 9 |     </parent>
10 |     <modelVersion>4.0.0</modelVersion>
11 | 
12 |     <artifactId>table-repartition-${spark.binary.version}_${scala.binary.version}</artifactId>
13 | 
14 | 
15 | </project>


--------------------------------------------------------------------------------
/table-repartition/src/main/java/tech/mlsql/plugins/et/TableRepartition.scala:
--------------------------------------------------------------------------------
 1 | package tech.mlsql.plugins.et
 2 | 
 3 | import org.apache.spark.ml.param.{IntParam, Param}
 4 | import org.apache.spark.sql.expressions.UserDefinedFunction
 5 | import org.apache.spark.sql.mlsql.session.MLSQLException
 6 | import org.apache.spark.sql.{DataFrame, SparkSession, functions => F}
 7 | import streaming.dsl.auth.TableAuthResult
 8 | import streaming.dsl.mmlib._
 9 | import streaming.dsl.mmlib.algs.Functions
10 | import streaming.dsl.mmlib.algs.param.{BaseParams, WowParams}
11 | import tech.mlsql.dsl.auth.ETAuth
12 | import tech.mlsql.dsl.auth.dsl.mmlib.ETMethod.ETMethod
13 | import tech.mlsql.version.VersionCompatibility
14 | 
15 | 
16 | class TableRepartition(override val uid: String) extends SQLAlg with VersionCompatibility with Functions with WowParams with ETAuth {
17 |   def this() = this(BaseParams.randomUID())
18 | 
19 |   // 
20 |   override def train(df: DataFrame, path: String, params: Map[String, String]): DataFrame = {
21 | 
22 |     params.get(partitionNum.name).map { item =>
23 |       set(partitionNum, item.toInt)
24 |       item
25 |     }.getOrElse {
26 |       throw new MLSQLException(s"${partitionNum.name} is required")
27 |     }
28 | 
29 |     params.get(partitionType.name).map { item =>
30 |       set(partitionType, item)
31 |       item
32 |     }.getOrElse {
33 |       set(partitionType, "hash")
34 |     }
35 | 
36 |     params.get(partitionCols.name).map { item =>
37 |       set(partitionCols, item)
38 |       item
39 |     }.getOrElse {
40 |       set(partitionCols, "")
41 |     }
42 | 
43 |     $(partitionType) match {
44 |       case "range" =>
45 | 
46 |         require(params.contains(partitionCols.name), "At least one partition-by expression must be specified.")
47 |         df.repartitionByRange($(partitionNum), $(partitionCols).split(",").map(name => F.col(name)): _*)
48 | 
49 |       case _ =>
50 |         df.repartition($(partitionNum))
51 |     }
52 | 
53 | 
54 |   }
55 | 
56 |   override def auth(etMethod: ETMethod, path: String, params: Map[String, String]): List[TableAuthResult] = {
57 |     List()
58 |   }
59 | 
60 |   override def supportedVersions: Seq[String] = {
61 |     Seq("1.5.0-SNAPSHOT", "1.5.0", "1.6.0-SNAPSHOT", "1.6.0")
62 |   }
63 | 
64 | 
65 |   override def doc: Doc = Doc(MarkDownDoc,
66 |     s"""
67 |        |
68 |     """.stripMargin)
69 | 
70 | 
71 |   override def codeExample: Code = Code(SQLCode,
72 |     """
73 |       |
74 |     """.stripMargin)
75 | 
76 |   override def batchPredict(df: DataFrame, path: String, params: Map[String, String]): DataFrame = train(df, path, params)
77 | 
78 |   override def load(sparkSession: SparkSession, path: String, params: Map[String, String]): Any = ???
79 | 
80 |   override def predict(sparkSession: SparkSession, _model: Any, name: String, params: Map[String, String]): UserDefinedFunction = ???
81 | 
82 |   final val partitionNum: IntParam = new IntParam(this, "partitionNum",
83 |     "")
84 |   final val partitionType: Param[String] = new Param[String](this, "partitionType",
85 |     "")
86 | 
87 |   final val partitionCols: Param[String] = new Param[String](this, "partitionCols",
88 |     "")
89 | 
90 |   override def explainParams(sparkSession: SparkSession): DataFrame = _explainParams(sparkSession)
91 | 
92 | }
93 | 


--------------------------------------------------------------------------------