├── README.md ├── .adr.json ├── main.go ├── docs ├── adr │ ├── README.md │ ├── 0001-data-lifecycle-design.md │ └── 0002-thinking-in-rule-engine.md ├── refs │ ├── flink.md │ ├── geo-dsl.md │ └── big-data-task.md └── README.md ├── .gitignore ├── stories └── X-8WXLbWg-data-model-from-lifecycle.feature ├── examples └── log.data ├── LICENSE └── language └── Data.g4 /README.md: -------------------------------------------------------------------------------- 1 | # data 2 | Data as Code 3 | -------------------------------------------------------------------------------- /.adr.json: -------------------------------------------------------------------------------- 1 | {"language":"en","path":"docs/adr/","prefix":"","digits":4} -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "os" 4 | 5 | func main() { 6 | file, _ := os.Open("file.go") // For read access. 7 | file.Name() 8 | } -------------------------------------------------------------------------------- /docs/adr/README.md: -------------------------------------------------------------------------------- 1 | # Architecture Decision Records 2 | 3 | * [1. data-lifecycle-design](0001-data-lifecycle-design.md) 4 | * [2. thinking-in-rule-engine](0002-thinking-in-rule-engine.md) -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Binaries for programs and plugins 2 | *.exe 3 | *.exe~ 4 | *.dll 5 | *.so 6 | *.dylib 7 | 8 | # Test binary, build with `go test -c` 9 | *.test 10 | 11 | # Output of the go coverage tool, specifically when used with LiteIDE 12 | *.out 13 | .idea -------------------------------------------------------------------------------- /docs/refs/flink.md: -------------------------------------------------------------------------------- 1 | # Flink 2 | 3 | 4 | ## Internal DSL 5 | 6 | ### DataStream 7 | 8 | ``` 9 | .map() 10 | .keyBy() 11 | .aggregate() 12 | .process() 13 | .addSink() 14 | ``` 15 | 16 | ## External DSL 17 | 18 | 1. Build Env 19 | 2. Build Stream Graph 20 | 21 | ``` 22 | SourceData 23 | Characteristic 24 | CharacteristicGroup 25 | Event 26 | ``` -------------------------------------------------------------------------------- /stories/X-8WXLbWg-data-model-from-lifecycle.feature: -------------------------------------------------------------------------------- 1 | # id: X-8WXLbWg 2 | # startDate: 2019-11-26T05:27:26Z 3 | # endDate: 2019-11-26T05:27:26Z 4 | # priority: 5 | # status: 6 | # author: 7 | # title: data model from lifecycle 8 | # language: zh-CN 9 | @math 10 | 功能:data model from lifecycle 11 | 12 | 场景: 13 | 假设: 14 | 当: 15 | 并且: 16 | 那么: 17 | -------------------------------------------------------------------------------- /examples/log.data: -------------------------------------------------------------------------------- 1 | // basic type: number, string, array 2 | 3 | type date { 4 | attribute: { 5 | format: "" 6 | } 7 | } 8 | 9 | type Icon { 10 | parameters: [ 11 | iconName: attributes.iconName 12 | ] 13 | attributes: { 14 | iconName: string 15 | } 16 | } 17 | 18 | enum FileMode { 19 | 20 | } 21 | 22 | 23 | type File { 24 | createTime: date 25 | attributes: { 26 | name: string 27 | path: string 28 | size: int 29 | mode: Enum[FileMode] 30 | isDir: bool 31 | icon: Icon("file") 32 | } 33 | } -------------------------------------------------------------------------------- /docs/adr/0001-data-lifecycle-design.md: -------------------------------------------------------------------------------- 1 | # 1. data lifecycle design 2 | 3 | 4 | Date: 2019-11-26 5 | 6 | ## Status 7 | 8 | 2019-11-26 proposed 9 | 10 | ## Context 11 | 12 | Context here... 13 | 14 | ## Decision 15 | 16 | Decision here... 17 | 18 | ``` 19 | sd: { "guid": "ff58025f-6854-4195-9f75-3a3058dd8dcf", 20 | "typeName": 21 | "hive_storagedesc" 22 | } 23 | partitionKeys: null 24 | aliases: null 25 | columns: [ { "guid": "65e2204f-6a23-4130-934a-9679af6a211f", 26 | "typeName": "hive_column" }, 27 | { "guid": "d726de70-faca-46fb-9c99-cf04f6b579a6", 28 | "typeName": "hive_column" }, 29 | ... 30 | ] 31 | ``` 32 | 33 | ## Consequences 34 | 35 | Consequences here... 36 | -------------------------------------------------------------------------------- /docs/adr/0002-thinking-in-rule-engine.md: -------------------------------------------------------------------------------- 1 | # 2. thinking in rule engine 2 | 3 | Date: 2019-11-26 4 | 5 | ## Status 6 | 7 | 2019-11-26 proposed 8 | 9 | ## Context 10 | 11 | Rule Engine is part of Data Process, Thinking in add mini version of Rule Engine. 12 | 13 | A simple example can see in: https://github.com/nikunjy/rules/blob/master/JsonQuery.g4 14 | 15 | Business Rules Management System:[https://www.drools.org/](https://www.drools.org/) 16 | 17 | ``` 18 | eq|==: equals to 19 | ne|!=: not equals to 20 | lt|<: less than 21 | gt|>: greater than 22 | le|<=: less than equal to 23 | ge|>=: greater than equal to 24 | co: contains 25 | sw: starts with 26 | ew: ends with 27 | in: in a list 28 | pr: present 29 | not: not of a logical expression 30 | ``` 31 | 32 | ## Decision 33 | 34 | Decision here... 35 | 36 | ## Consequences 37 | 38 | Consequences here... 39 | -------------------------------------------------------------------------------- /docs/refs/geo-dsl.md: -------------------------------------------------------------------------------- 1 | 2 | Source: http://repositorio.pucrs.br/dspace/bitstream/10923/13813/2/A_High_Level_DSL_for_Geospatial_Visualizations_with_Multi_core_Parallelism_Support.pdf 3 | 4 | ``` 5 | visualization: markedmap; 6 | settings { 7 | latitude:field s12; 8 | longitude:field 11; 9 | marker−text:”Camera:”field 6 image(field 15); 10 | page−title:”Photosb y Camera Brand”; 11 | size:full; 12 | } 13 | data { 14 | file: ”yfcc100m_dataset_all”; 15 | structure { 16 | delimiter: tab; 17 | end−register: newline; 18 | date−format: ”YYYY−MM−DD”; 19 | } 20 | filter: field 4 is between date ”2014−01−01” and date”2014−02−01”; 21 | classification { 22 | class(”Canon”): field 6 contains”Canon”; 23 | class(”Sony”): field 6 contains”Sony”; 24 | class(”Nikon”): field 6 contains”Nikon”; 25 | class(”Panasonic”): field 6 contains ”Panasonic”; 26 | class(”Apple”): field 6 contains”Apple”; 27 | class(”FUJI”): field 6 contains”FUJI”; 28 | } 29 | } 30 | ``` -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Phodal Huang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /language/Data.g4: -------------------------------------------------------------------------------- 1 | grammar Data; 2 | 3 | compilationUnit 4 | : importDeclaration* typeDeclaration* EOF 5 | ; 6 | 7 | importDeclaration 8 | : IMPORT IDENTIFIER 9 | ; 10 | 11 | typeDeclaration 12 | : operationDeclaration 13 | | ruleDeclaration 14 | | typeTypeDeclaration 15 | | customTypeDeclaration 16 | ; 17 | 18 | customTypeDeclaration 19 | : 'type' typeName '{' typeAttribute* '}' 20 | ; 21 | 22 | typeName: IDENTIFIER; 23 | 24 | typeAttribute 25 | : 'name' ':' 26 | ; 27 | 28 | // 29 | 30 | typeTypeDeclaration 31 | : TYPETYPE IDENTIFIER '{' attribute* '}' 32 | ; 33 | 34 | attribute: IDENTIFIER ':' predefinedType; 35 | 36 | predefinedType: IDENTIFIER; 37 | 38 | TYPETYPE 39 | : 'string' 40 | | 'number' 41 | | 'array' 42 | ; 43 | 44 | operationDeclaration 45 | : OPERAPTION 46 | ; 47 | 48 | OPERAPTION: 'operaption'; 49 | 50 | // 51 | 52 | ruleDeclaration: RULE IDENTIFIER TRANSFORM sourceModel TO targetModel; 53 | 54 | 55 | sourceModel: IDENTIFIER; 56 | targetModel: IDENTIFIER; 57 | 58 | RULE: 'rule'; 59 | TRANSFORM: 'transform'; 60 | TO: 'to'; 61 | 62 | IMPORT: 'import'; 63 | IDENTIFIER: Letter LetterOrDigit*; 64 | 65 | // Whitespace and comments 66 | 67 | WS: [ \t\r\n\u000C]+ -> channel(HIDDEN); 68 | COMMENT: '/*' .*? '*/' -> channel(HIDDEN); 69 | LINE_COMMENT: '//' ~[\r\n]* -> channel(HIDDEN); 70 | 71 | fragment LetterOrDigit 72 | : Letter 73 | | [0-9] 74 | ; 75 | 76 | fragment Letter 77 | : [a-zA-Z$_] // these are the "java letters" below 0x7F 78 | | ~[\u0000-\u007F\uD800-\uDBFF] // covers all characters above 0x7F which are not a surrogate 79 | | [\uD800-\uDBFF] [\uDC00-\uDFFF] // covers UTF-16 surrogate pairs encodings for U+10000 to U+10FFFF 80 | ; 81 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # Design 2 | 3 | ## Atlas Type System 4 | 5 | - Types 6 | - Entities 7 | - Attributes 8 | 9 | Data Lifecycle 10 | 11 | - createTime 12 | - containers // hadoop, spark 13 | - all type defines 14 | - filter key 15 | - comment 16 | 17 | 18 | 19 | ## Domain-Specific Meta-Modelling Languages 20 | 21 | [Domain-Specific Meta-Modelling Languages](http://miso.es/pubs/DSMM.pdf) 22 | 23 | DSL Examples: 24 | 25 | 26 | ```dsl 27 | Syntax DSPM_MM for ProcessModel [”.pm_mm”] { 28 | model template Process Syntax@1 for ProcessModel 29 | ”process” ˆId ”{” 30 | (_:TaskTemplate |_:PerformerTemplate |_:SeqTemplate)∗ 31 | ”}”; 32 | 33 | node template TaskTemplate@1 for Task 34 | (”final”)? ”task” ˆId #name 35 | with ”final” set final = true ; 36 | 37 | node template PerformerTemplate@1 for Performer 38 | ”performer” #name 39 | with name is id ; 40 | 41 | node template SeqTemplate@1 for Seq 42 | ”seq” ˆId ”:” #src ”−>” #tar 43 | with src redefinedBy from 44 | tar redefinedBy to; 45 | } 46 | ``` 47 | 48 | ```Process 49 | process SoftwareProcess { 50 | task Analysis ”requirements and analysis” 51 | task Design ”high and low level design” 52 | task Coding ”coding and unit testing” 53 | 54 | performer Analyst 55 | performer Designer 56 | performer Programmer 57 | 58 | seq a2d: Analysis −> Design 59 | seq d2c: Design −> Coding 60 | } 61 | ``` 62 | 63 | ```rule 64 | @metamodel(name=ProcessModel,domain=source) 65 | @metamodel(name=PMS,domain=target) 66 | @model(potency=0) 67 | rule Task2Ticket transform task : Source!Task to ticket : Target!Ticket 68 | { 69 | ticket.name := task.ˆname; 70 | ticket.description := task.name; 71 | ticket.priority := task.duration−1; 72 | 73 | for (ref in task.references(”perfBy”)) 74 | ticket.assignedTo ::= task.value(ref); 75 | } 76 | 77 | @lazy 78 | rule Performer2User transform per : Source!Performer to usr : Target!User 79 | { 80 | usr.name := per.name; 81 | } 82 | ``` 83 | 84 | ```operation 85 | operation Analysis createOutps() { 86 | var req := new RequirementsDoc(); 87 | self.output := req; 88 | } 89 | 90 | operation Design createOutps() { 91 | var dd := new DesignDoc(); 92 | self.output := dd; 93 | } 94 | ``` 95 | 96 | -------------------------------------------------------------------------------- /docs/refs/big-data-task.md: -------------------------------------------------------------------------------- 1 | # Big Data 2 | 3 | 4 | 5 | https://www.researchgate.net/profile/Sergey_Kovalchuk2/publication/291343973_Dynamic_Domain-Specific_Language_for_BigData_Tasks'_Description/links/56a0fcbd08ae24f62701e2d8.pdf 6 | 7 | Patterns: 8 | 9 | ![Big Data Patterns](images/big-data-pattern.png) 10 | 11 | The dynamic DSL being developed within the knowledge-based approach incorporates 12 | the following artifacts: 13 | 14 | - **Domain-specific semantics**. A set of domain-specific objects which are used to 15 | describe the structure of investigated system is defined. This part of knowledge 16 | enables interconnection with WF and simulation procedures on the levels L4-5. 17 | - **Data formats**. To support the integration and high-level definition of the task, the 18 | semantically marked data of format description and atomic analysis procedures 19 | should be described. 20 | - **Data aggregation patterns**. To interpret imperative description of procedures 21 | within the high-level task with MapReduce code generation, the data aggregation 22 | and statistical analysis procedures should be available. 23 | - **Cloud infrastructure knowledge**. should be provided to support a) further 24 | processing of the results by the other parts of composite application; b) call the 25 | local software described in the same way as regular cloud computing services. 26 | 27 | 即: 28 | 29 | 1. 领域特定的语义 30 | 2. 数据格式 31 | 3. 数据聚合模式 32 | 4. 云基础设施知识 33 | 34 | ``` 35 | require BSHFile, projects, inControlPoints, points, searchedhirlam; 36 | 37 | script CycloneSearch runs BDDSL ( 38 | dir = "/data/path", tag = "ERA" 39 | ) 40 | 41 | {: 42 | select Cyclone where ( 43 | trajectory_path = "(65.67, 8.19), (65.97, 23.45), (60.97, 31.45)" 44 | accuracy = "(5, 5)" 45 | ) 46 | out Cyclone.Parameters() as outs 47 | :}; 48 | 49 | step CycloneStatistialAnalysis runs scilab ( 50 | input_folder = CycloneSearch.Result.outs["data/out"], 51 | script_name = "SCI_stat.sce" 52 | ); 53 | 54 | script FloodSimulation runs BDDSL ( 55 | dir = "/data/path", tag = "HIRLAM", 56 | config = CycloneStatistialAnalysis.Result.outs["output.dat"], 57 | points = points, projects = projects, BSHFile = BSHFile, 58 | searchedpattern = searchedhirlam, inControlPoints = inControlPoints 59 | ) 60 | 61 | {: 62 | select Forecast where ( 63 | preasure_field = searchedpattern 64 | ) 65 | simulate package cyclonegenerator with parameters ( 66 | inConfigFile = configFile 67 | forecastField = Forecast.field 68 | ) 69 | simulate VSO FloodSim from cyclonegenerator.Result [points, projects, 70 | BSHFile, inControlPoints] to FloodSimulation 71 | out FloodSim.Result as out 72 | :}; 73 | 74 | step FloodStatistialAnalysis runs scilab 75 | ( 76 | input_folder = FloodSimulaiton.Result.outs["/out/"], 77 | script_name = "FLOOD_stat.sce" 78 | ); 79 | ``` --------------------------------------------------------------------------------