├── LICENCE ├── README.md ├── cmdparser.go ├── doc.go ├── example_source_test.go ├── examples └── httpsource │ ├── Dockerfile │ ├── apisource │ └── apisource.go │ └── main.go ├── go.mod ├── inferschema.go ├── protocol.go ├── safewriter.go ├── schema └── schema.go ├── source.go ├── sourceRunner.go └── trackers.go /LICENCE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 bitstrapped 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Airbyte - Golang SDK/CDK 2 | 3 | This package aims to help developers build connectors (sources/destinations) really fast in Go. 4 | The focus of this package is developer efficiency. It focusses on letting developers focus more on connector business logic instead of airbyte protocol knowledge. 5 | 6 | ## Installation 7 | 8 | ``` 9 | go get github.com/bitstrapped/airbyte 10 | ``` 11 | 12 | ## Docs 13 | 14 | - View the godoc's here: https://pkg.go.dev/github.com/bitstrapped/airbyte 15 | 16 | 17 | ## Usage 18 | 19 | ### By Example 20 | 21 | 1. The fastest way to get started it to look at the full example in `examples/httpsource` or the Example in the godoc 22 | 23 | 24 | ### Detailed Usage 25 | 26 | 1. Define a source by implementing the `Source` interface. 27 | 28 | ```go 29 | // Source is the only interface you need to define to create your source! 30 | type Source interface { 31 | // Spec returns the input "form" spec needed for your source 32 | Spec(logTracker LogTracker) (*ConnectorSpecification, error) 33 | // Check verifies the source - usually verify creds/connection etc. 34 | Check(srcCfgPath string, logTracker LogTracker) error 35 | // Discover returns the schema of the data you want to sync 36 | Discover(srcConfigPath string, logTracker LogTracker) (*Catalog, error) 37 | // Read will read the actual data from your source and use tracker.Record(), tracker.State() and tracker.Log() to sync data with airbyte/destinations 38 | // MessageTracker is thread-safe and so it is completely find to spin off goroutines to sync your data (just don't forget your waitgroups :)) 39 | // returning an error from this will cancel the sync and returning a nil from this will successfully end the sync 40 | Read(sourceCfgPath string, prevStatePath string, configuredCat *ConfiguredCatalog, 41 | tracker MessageTracker) error 42 | } 43 | ``` 44 | 45 | 2. Inside of main, pass your source into the sourcerunner 46 | 47 | ```go 48 | func main() { 49 | fsrc := filesource.NewFileSource("foobar.txt") 50 | runner := airbyte.NewSourceRunner(fsrc) 51 | err := runner.Start() 52 | if err != nil { 53 | log.Fatal(err) 54 | } 55 | } 56 | ``` 57 | 58 | 59 | 3. Write a dockerfile (sample below) 60 | 61 | ```dockerfile 62 | FROM golang:1.17-buster as build 63 | 64 | WORKDIR /base 65 | ADD . /base/ 66 | RUN go build -o /base/app . 67 | 68 | 69 | LABEL io.airbyte.version=0.0.1 70 | LABEL io.airbyte.name=airbyte/source 71 | 72 | ENTRYPOINT ["/base/app"] 73 | ``` 74 | 75 | 4. Push to your docker repository and profit! 76 | 77 | ### Contributors 78 | 79 | - We'd like to give a shoutout and thank you to @ajzo90 and his initial work on https://github.com/ajzo90/airbyte-http-connector. @ajzo90's project inspired this project -------------------------------------------------------------------------------- /cmdparser.go: -------------------------------------------------------------------------------- 1 | package airbyte 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "io/ioutil" 7 | "os" 8 | ) 9 | 10 | func getSourceConfigPath() (string, error) { 11 | if os.Args[2] != "--config" { 12 | return "", fmt.Errorf("expect --config") 13 | } 14 | return os.Args[3], nil 15 | } 16 | 17 | func getStatePath() (string, error) { 18 | if len(os.Args) <= 6 { 19 | return "", nil 20 | } 21 | if os.Args[6] != "--state" { 22 | return "", fmt.Errorf("expect --state") 23 | } 24 | return os.Args[7], nil 25 | } 26 | 27 | func getCatalogPath() (string, error) { 28 | if os.Args[4] != "--catalog" { 29 | return "", fmt.Errorf("expect --catalog") 30 | } 31 | return os.Args[5], nil 32 | } 33 | 34 | // UnmarshalFromPath is used to unmarshal json files into respective struct's 35 | // this is most commonly used to unmarshal your State between runs and also unmarshal SourceConfig's 36 | // 37 | // Example usage 38 | // type CustomState struct { 39 | // Timestamp int `json:"timestamp"` 40 | // Foobar string `json:"foobar"` 41 | // } 42 | // 43 | // func (s *CustomSource) Read(stPath string, ...) error { 44 | // var cs CustomState 45 | // err = airbyte.UnmarshalFromPath(stPath, &cs) 46 | // if err != nil { 47 | // // handle error 48 | // } 49 | // // cs is populated 50 | // } 51 | // 52 | func UnmarshalFromPath(path string, v interface{}) error { 53 | b, err := ioutil.ReadFile(path) 54 | if err != nil { 55 | return err 56 | } 57 | 58 | return json.Unmarshal(b, v) 59 | } 60 | -------------------------------------------------------------------------------- /doc.go: -------------------------------------------------------------------------------- 1 | // Airbyte is the go-sdk/cdk to help build connectors quickly in go 2 | // This package abstracts away much of the "protocol" away from the user and lets them focus on biz logic 3 | // It focuses on developer efficiency and tries to be strongly typed as much as possible to help dev's move fast without mistakes 4 | package airbyte 5 | -------------------------------------------------------------------------------- /example_source_test.go: -------------------------------------------------------------------------------- 1 | package airbyte_test 2 | 3 | import ( 4 | "encoding/json" 5 | "errors" 6 | "fmt" 7 | "log" 8 | "net/http" 9 | "os" 10 | "time" 11 | 12 | "github.com/bitstrapped/airbyte" 13 | ) 14 | 15 | type HTTPSource struct { 16 | baseURL string 17 | } 18 | 19 | type LastSyncTime struct { 20 | Timestamp int64 `json:"timestamp"` 21 | } 22 | 23 | type HTTPConfig struct { 24 | APIKey string `json:"apiKey"` 25 | } 26 | 27 | func NewHTTPSource(baseURL string) airbyte.Source { 28 | return HTTPSource{ 29 | baseURL: baseURL, 30 | } 31 | } 32 | 33 | func (h HTTPSource) Spec(logTracker airbyte.LogTracker) (*airbyte.ConnectorSpecification, error) { 34 | logTracker.Log(airbyte.LogLevelInfo, "Running Spec") 35 | return &airbyte.ConnectorSpecification{ 36 | DocumentationURL: "https://bitstrapped.com", 37 | ChangeLogURL: "https://bitstrapped.com", 38 | SupportsIncremental: false, 39 | SupportsNormalization: true, 40 | SupportsDBT: true, 41 | SupportedDestinationSyncModes: []airbyte.DestinationSyncMode{ 42 | airbyte.DestinationSyncModeOverwrite, 43 | }, 44 | ConnectionSpecification: airbyte.ConnectionSpecification{ 45 | Title: "Example HTTP Source", 46 | Description: "This is an example http source for the docs's", 47 | Type: "object", 48 | Required: []airbyte.PropertyName{"apiKey"}, 49 | Properties: airbyte.Properties{ 50 | Properties: map[airbyte.PropertyName]airbyte.PropertySpec{ 51 | "apiKey": { 52 | Description: "api key to access http source, valid uuid", 53 | Examples: []string{"xxxx-xxxx-xxxx-xxxx"}, 54 | PropertyType: airbyte.PropertyType{ 55 | Type: []airbyte.PropType{ 56 | airbyte.String, 57 | }, 58 | }, 59 | }, 60 | }, 61 | }, 62 | }, 63 | }, nil 64 | } 65 | 66 | func (h HTTPSource) Check(srcCfgPath string, logTracker airbyte.LogTracker) error { 67 | logTracker.Log(airbyte.LogLevelDebug, "validating api connection") 68 | var srcCfg HTTPConfig 69 | err := airbyte.UnmarshalFromPath(srcCfgPath, &srcCfg) 70 | if err != nil { 71 | return err 72 | } 73 | 74 | resp, err := http.Get(fmt.Sprintf("%s/ping?key=%s", h.baseURL, srcCfg.APIKey)) 75 | if err != nil { 76 | return err 77 | } 78 | 79 | if resp.StatusCode != http.StatusOK { 80 | return errors.New("Invalid status") 81 | } 82 | 83 | return nil 84 | } 85 | 86 | func (h HTTPSource) Discover(srcCfgPath string, logTracker airbyte.LogTracker) (*airbyte.Catalog, error) { 87 | var srcCfg HTTPConfig 88 | err := airbyte.UnmarshalFromPath(srcCfgPath, &srcCfg) 89 | if err != nil { 90 | return nil, err 91 | } 92 | 93 | return &airbyte.Catalog{Streams: []airbyte.Stream{{ 94 | Name: "users", 95 | JSONSchema: airbyte.Properties{ 96 | Properties: map[airbyte.PropertyName]airbyte.PropertySpec{ 97 | "userid": { 98 | PropertyType: airbyte.PropertyType{ 99 | Type: []airbyte.PropType{airbyte.Integer, airbyte.Null}, 100 | AirbyteType: airbyte.BigInteger}, 101 | Description: "user ID - see the big int", 102 | }, 103 | "name": { 104 | PropertyType: airbyte.PropertyType{ 105 | Type: []airbyte.PropType{airbyte.String, airbyte.Null}, 106 | }, 107 | Description: "user name", 108 | }, 109 | }, 110 | }, 111 | SupportedSyncModes: []airbyte.SyncMode{ 112 | airbyte.SyncModeFullRefresh, 113 | }, 114 | SourceDefinedCursor: false, 115 | Namespace: "bitstrapped", 116 | }, 117 | { 118 | Name: "payments", 119 | JSONSchema: airbyte.InferSchemaFromStruct(Payment{}, logTracker), 120 | SupportedSyncModes: []airbyte.SyncMode{ 121 | airbyte.SyncModeFullRefresh, 122 | }, 123 | SourceDefinedCursor: false, 124 | Namespace: "bitstrapped", 125 | }, 126 | }}, nil 127 | } 128 | 129 | type User struct { 130 | UserID int64 `json:"userid"` 131 | Name string `json:"name"` 132 | } 133 | 134 | type Payment struct { 135 | UserID int64 `json:"userid"` 136 | PaymentAmount int64 `json:"paymentAmount"` 137 | } 138 | 139 | func (h HTTPSource) Read(sourceCfgPath string, prevStatePath string, configuredCat *airbyte.ConfiguredCatalog, 140 | tracker airbyte.MessageTracker) error { 141 | tracker.Log(airbyte.LogLevelInfo, "Running read") 142 | var src HTTPConfig 143 | err := airbyte.UnmarshalFromPath(sourceCfgPath, &src) 144 | if err != nil { 145 | return err 146 | } 147 | 148 | // see if there is a last sync 149 | var st LastSyncTime 150 | airbyte.UnmarshalFromPath(sourceCfgPath, &st) 151 | if st.Timestamp <= 0 { 152 | st.Timestamp = -1 153 | } 154 | 155 | for _, stream := range configuredCat.Streams { 156 | if stream.Stream.Name == "users" { 157 | var u []User 158 | uri := fmt.Sprintf("https://api.bistrapped.com/users?apiKey=%s", src.APIKey) 159 | resp, err := http.Get(uri) 160 | if err != nil { 161 | return err 162 | } 163 | err = json.NewDecoder(resp.Body).Decode(&u) 164 | if err != nil { 165 | return err 166 | } 167 | 168 | for _, ur := range u { 169 | err := tracker.Record(ur, stream.Stream.Name, stream.Stream.Namespace) 170 | if err != nil { 171 | return err 172 | } 173 | } 174 | } 175 | 176 | if stream.Stream.Name == "payments" { 177 | var p []Payment 178 | uri := fmt.Sprintf("%s/payments?apiKey=%s", h.baseURL, src.APIKey) 179 | resp, err := http.Get(uri) 180 | if err != nil { 181 | return err 182 | } 183 | err = json.NewDecoder(resp.Body).Decode(&p) 184 | if err != nil { 185 | return err 186 | } 187 | 188 | for _, py := range p { 189 | err := tracker.Record(py, stream.Stream.Name, stream.Stream.Namespace) 190 | if err != nil { 191 | return err 192 | } 193 | } 194 | } 195 | } 196 | 197 | tracker.State(&LastSyncTime{ 198 | Timestamp: time.Now().UnixMilli(), 199 | }) 200 | return nil 201 | } 202 | 203 | func Example() { 204 | hsrc := NewHTTPSource("https://api.bitstrapped.com") 205 | runner := airbyte.NewSourceRunner(hsrc, os.Stdout) 206 | err := runner.Start() 207 | if err != nil { 208 | log.Fatal(err) 209 | } 210 | } 211 | -------------------------------------------------------------------------------- /examples/httpsource/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:1.17-buster as build 2 | WORKDIR /base 3 | ADD . /base/ 4 | RUN go build -o /base/app . 5 | ENTRYPOINT ["/base/app"] -------------------------------------------------------------------------------- /examples/httpsource/apisource/apisource.go: -------------------------------------------------------------------------------- 1 | package apisource 2 | 3 | import ( 4 | "encoding/json" 5 | "errors" 6 | "fmt" 7 | "net/http" 8 | "time" 9 | 10 | "github.com/bitstrapped/airbyte" 11 | ) 12 | 13 | type APISource struct { 14 | baseURL string 15 | } 16 | 17 | type LastSyncTime struct { 18 | Timestamp int64 `json:"timestamp"` 19 | } 20 | 21 | type HTTPConfig struct { 22 | APIKey string `json:"apiKey"` 23 | } 24 | 25 | func NewAPISource(baseURL string) airbyte.Source { 26 | return APISource{ 27 | baseURL: baseURL, 28 | } 29 | } 30 | 31 | func (h APISource) Spec(logTracker airbyte.LogTracker) (*airbyte.ConnectorSpecification, error) { 32 | if err := logTracker.Log(airbyte.LogLevelInfo, "Running Spec"); err != nil { 33 | return nil, err 34 | } 35 | return &airbyte.ConnectorSpecification{ 36 | DocumentationURL: "https://bitstrapped.com", 37 | ChangeLogURL: "https://bitstrapped.com", 38 | SupportsIncremental: false, 39 | SupportsNormalization: true, 40 | SupportsDBT: true, 41 | SupportedDestinationSyncModes: []airbyte.DestinationSyncMode{ 42 | airbyte.DestinationSyncModeOverwrite, 43 | }, 44 | ConnectionSpecification: airbyte.ConnectionSpecification{ 45 | Title: "Example HTTP Source", 46 | Description: "This is an example http source for the docs's", 47 | Type: "object", 48 | Required: []airbyte.PropertyName{"apiKey"}, 49 | Properties: airbyte.Properties{ 50 | Properties: map[airbyte.PropertyName]airbyte.PropertySpec{ 51 | "apiKey": { 52 | Description: "api key to access http source, valid uuid", 53 | Examples: []string{"xxxx-xxxx-xxxx-xxxx"}, 54 | PropertyType: airbyte.PropertyType{ 55 | Type: []airbyte.PropType{ 56 | airbyte.String, 57 | }, 58 | }, 59 | }, 60 | }, 61 | }, 62 | }, 63 | }, nil 64 | } 65 | 66 | func (h APISource) Check(srcCfgPath string, logTracker airbyte.LogTracker) error { 67 | if err := logTracker.Log(airbyte.LogLevelDebug, "validating api connection"); err != nil { 68 | return err 69 | } 70 | var srcCfg HTTPConfig 71 | err := airbyte.UnmarshalFromPath(srcCfgPath, &srcCfg) 72 | if err != nil { 73 | return err 74 | } 75 | 76 | resp, err := http.Get(fmt.Sprintf("%s/ping?key=%s", h.baseURL, srcCfg.APIKey)) 77 | if err != nil { 78 | return err 79 | } 80 | 81 | if resp.StatusCode != http.StatusOK { 82 | return errors.New("invalid status") 83 | } 84 | 85 | return resp.Body.Close() 86 | } 87 | 88 | func (h APISource) Discover(srcCfgPath string, logTracker airbyte.LogTracker) (*airbyte.Catalog, error) { 89 | var srcCfg HTTPConfig 90 | err := airbyte.UnmarshalFromPath(srcCfgPath, &srcCfg) 91 | if err != nil { 92 | return nil, err 93 | } 94 | 95 | return &airbyte.Catalog{Streams: []airbyte.Stream{{ 96 | Name: "users", 97 | JSONSchema: airbyte.Properties{ 98 | Properties: map[airbyte.PropertyName]airbyte.PropertySpec{ 99 | "userid": { 100 | PropertyType: airbyte.PropertyType{ 101 | Type: []airbyte.PropType{airbyte.Integer, airbyte.Null}, 102 | AirbyteType: airbyte.BigInteger}, 103 | Description: "user ID - see the big int", 104 | }, 105 | "name": { 106 | PropertyType: airbyte.PropertyType{ 107 | Type: []airbyte.PropType{airbyte.String, airbyte.Null}, 108 | }, 109 | Description: "user name", 110 | }, 111 | }, 112 | }, 113 | SupportedSyncModes: []airbyte.SyncMode{ 114 | airbyte.SyncModeFullRefresh, 115 | }, 116 | SourceDefinedCursor: false, 117 | Namespace: "bitstrapped", 118 | }, 119 | { 120 | Name: "payments", 121 | JSONSchema: airbyte.Properties{ 122 | Properties: map[airbyte.PropertyName]airbyte.PropertySpec{ 123 | "userid": { 124 | PropertyType: airbyte.PropertyType{ 125 | Type: []airbyte.PropType{airbyte.Integer, airbyte.Null}, 126 | AirbyteType: airbyte.BigInteger}, 127 | Description: "user ID - see the big int", 128 | }, 129 | "paymentAmount": { 130 | PropertyType: airbyte.PropertyType{ 131 | Type: []airbyte.PropType{airbyte.Integer, airbyte.Null}, 132 | }, 133 | Description: "payment amount", 134 | }, 135 | }, 136 | }, 137 | SupportedSyncModes: []airbyte.SyncMode{ 138 | airbyte.SyncModeFullRefresh, 139 | }, 140 | SourceDefinedCursor: false, 141 | Namespace: "bitstrapped", 142 | }, 143 | }}, nil 144 | } 145 | 146 | type User struct { 147 | UserID int64 `json:"userid"` 148 | Name string `json:"name"` 149 | } 150 | 151 | type Payment struct { 152 | UserID int64 `json:"userid"` 153 | PaymentAmount int64 `json:"paymentAmount"` 154 | } 155 | 156 | func (h APISource) Read(sourceCfgPath string, prevStatePath string, configuredCat *airbyte.ConfiguredCatalog, 157 | tracker airbyte.MessageTracker) error { 158 | if err := tracker.Log(airbyte.LogLevelInfo, "Running read"); err != nil { 159 | return err 160 | } 161 | var src HTTPConfig 162 | err := airbyte.UnmarshalFromPath(sourceCfgPath, &src) 163 | if err != nil { 164 | return err 165 | } 166 | 167 | // see if there is a last sync 168 | var st LastSyncTime 169 | _ = airbyte.UnmarshalFromPath(sourceCfgPath, &st) 170 | if st.Timestamp <= 0 { 171 | st.Timestamp = -1 172 | } 173 | 174 | for _, stream := range configuredCat.Streams { 175 | if stream.Stream.Name == "users" { 176 | var u []User 177 | uri := fmt.Sprintf("https://api.bistrapped.com/users?apiKey=%s", src.APIKey) 178 | if err := httpGet(uri, &u); err != nil { 179 | return err 180 | } 181 | 182 | for _, ur := range u { 183 | err := tracker.Record(ur, stream.Stream.Name, stream.Stream.Namespace) 184 | if err != nil { 185 | return err 186 | } 187 | } 188 | } 189 | 190 | if stream.Stream.Name == "payments" { 191 | var p []Payment 192 | uri := fmt.Sprintf("%s/payments?apiKey=%s", h.baseURL, src.APIKey) 193 | if err := httpGet(uri, &p); err != nil { 194 | return err 195 | } 196 | 197 | for _, py := range p { 198 | err := tracker.Record(py, stream.Stream.Name, stream.Stream.Namespace) 199 | if err != nil { 200 | return err 201 | } 202 | } 203 | } 204 | } 205 | 206 | return tracker.State(&LastSyncTime{ 207 | Timestamp: time.Now().UnixMilli(), 208 | }) 209 | } 210 | 211 | func httpGet(uri string, v interface{}) error { 212 | resp, err := http.Get(uri) 213 | if err != nil { 214 | return err 215 | } 216 | defer resp.Body.Close() 217 | return json.NewDecoder(resp.Body).Decode(v) 218 | } 219 | -------------------------------------------------------------------------------- /examples/httpsource/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "log" 5 | "os" 6 | 7 | "github.com/bitstrapped/airbyte" 8 | "github.com/bitstrapped/airbyte/examples/httpsource/apisource" 9 | ) 10 | 11 | func main() { 12 | hsrc := apisource.NewAPISource("https://api.bitstrapped.com") 13 | runner := airbyte.NewSourceRunner(hsrc, os.Stdout) 14 | err := runner.Start() 15 | if err != nil { 16 | log.Fatal(err) 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/bitstrapped/airbyte 2 | 3 | go 1.17 4 | -------------------------------------------------------------------------------- /inferschema.go: -------------------------------------------------------------------------------- 1 | package airbyte 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "github.com/bitstrapped/airbyte/schema" 7 | "reflect" 8 | ) 9 | 10 | // Infer schema translates golang structs to JSONSchema format 11 | func InferSchemaFromStruct(i interface{}, logTracker LogTracker) Properties { 12 | var prop Properties 13 | 14 | s, err := schema.Generate(reflect.TypeOf(i)) 15 | if err != nil { 16 | logTracker.Log(LogLevelError, fmt.Sprintf("generate schema error: %v", err)) 17 | return prop 18 | } 19 | 20 | b, err := json.Marshal(s) 21 | if err != nil { 22 | logTracker.Log(LogLevelError, fmt.Sprintf("json marshal schema error: %v", err)) 23 | return prop 24 | } 25 | 26 | err = json.Unmarshal(b, &prop) 27 | if err != nil { 28 | logTracker.Log(LogLevelError, fmt.Sprintf("unmarshal schema to propspec error: %v", err)) 29 | return prop 30 | } 31 | 32 | return prop 33 | } 34 | -------------------------------------------------------------------------------- /protocol.go: -------------------------------------------------------------------------------- 1 | package airbyte 2 | 3 | import ( 4 | "encoding/json" 5 | "errors" 6 | "io" 7 | "time" 8 | ) 9 | 10 | // Should conform to https://github.com/airbytehq/airbyte/blob/master/airbyte-protocol/models/src/main/resources/airbyte_protocol/airbyte_protocol.yaml 11 | 12 | type cmd string 13 | 14 | const ( 15 | cmdSpec cmd = "spec" 16 | cmdCheck cmd = "check" 17 | cmdDiscover cmd = "discover" 18 | cmdRead cmd = "read" 19 | ) 20 | 21 | type msgType string 22 | 23 | const ( 24 | msgTypeRecord msgType = "RECORD" 25 | msgTypeState msgType = "STATE" 26 | msgTypeLog msgType = "LOG" 27 | msgTypeConnectionStat msgType = "CONNECTION_STATUS" 28 | msgTypeCatalog msgType = "CATALOG" 29 | msgTypeSpec msgType = "SPEC" 30 | ) 31 | 32 | var errInvalidTypePayload = errors.New("message type and payload are invalid") 33 | 34 | type message struct { 35 | Type msgType `json:"type"` 36 | *record `json:"record,omitempty"` 37 | *state `json:"state,omitempty"` 38 | *logMessage `json:"log,omitempty"` 39 | *ConnectorSpecification `json:"spec,omitempty"` 40 | *connectionStatus `json:"connectionStatus,omitempty"` 41 | *Catalog `json:"catalog,omitempty"` 42 | } 43 | 44 | // message MarshalJSON is a custom marshaller which validates the messageType with the sub-struct 45 | func (m *message) MarshalJSON() ([]byte, error) { 46 | switch m.Type { 47 | case msgTypeRecord: 48 | if m.record == nil || 49 | m.state != nil || 50 | m.logMessage != nil || 51 | m.connectionStatus != nil || 52 | m.Catalog != nil { 53 | return nil, errInvalidTypePayload 54 | } 55 | case msgTypeState: 56 | if m.state == nil || 57 | m.record != nil || 58 | m.logMessage != nil || 59 | m.connectionStatus != nil || 60 | m.Catalog != nil { 61 | return nil, errInvalidTypePayload 62 | } 63 | case msgTypeLog: 64 | if m.logMessage == nil || 65 | m.record != nil || 66 | m.state != nil || 67 | m.connectionStatus != nil || 68 | m.Catalog != nil { 69 | return nil, errInvalidTypePayload 70 | } 71 | } 72 | 73 | type m2 message 74 | return json.Marshal(m2(*m)) 75 | } 76 | 77 | // write emits data outbound from your src/destination to airbyte workers 78 | func write(w io.Writer, m *message) error { 79 | return json.NewEncoder(w).Encode(m) 80 | } 81 | 82 | // record defines a record as per airbyte - a "data point" 83 | type record struct { 84 | EmittedAt int64 `json:"emitted_at"` 85 | Namespace string `json:"namespace"` 86 | Data interface{} `json:"data"` 87 | Stream string `json:"stream"` 88 | } 89 | 90 | // state is used to store data between syncs - useful for incremental syncs and state storage 91 | type state struct { 92 | Data interface{} `json:"data"` 93 | } 94 | 95 | // LogLevel defines the log levels that can be emitted with airbyte logs 96 | type LogLevel string 97 | 98 | const ( 99 | LogLevelFatal LogLevel = "FATAL" 100 | LogLevelError LogLevel = "ERROR" 101 | LogLevelWarn LogLevel = "WARN" 102 | LogLevelInfo LogLevel = "INFO" 103 | LogLevelDebug LogLevel = "DEBUG" 104 | LogLevelTrace LogLevel = "TRACE" 105 | ) 106 | 107 | type logMessage struct { 108 | Level LogLevel `json:"level"` 109 | Message string `json:"message"` 110 | } 111 | 112 | type checkStatus string 113 | 114 | const ( 115 | checkStatusSuccess checkStatus = "SUCCEEDED" 116 | checkStatusFailed checkStatus = "FAILED" 117 | ) 118 | 119 | type connectionStatus struct { 120 | Status checkStatus `json:"status"` 121 | } 122 | 123 | // Catalog defines the complete available schema you can sync with a source 124 | // This should not be mistaken with ConfiguredCatalog which is the "selected" schema you want to sync 125 | type Catalog struct { 126 | Streams []Stream `json:"streams"` 127 | } 128 | 129 | // Stream defines a single "schema" you'd like to sync - think of this as a table, collection, topic, etc. In airbyte terminology these are "streams" 130 | type Stream struct { 131 | Name string `json:"name"` 132 | JSONSchema Properties `json:"json_schema"` 133 | SupportedSyncModes []SyncMode `json:"supported_sync_modes,omitempty"` 134 | SourceDefinedCursor bool `json:"source_defined_cursor,omitempty"` 135 | DefaultCursorField []string `json:"default_cursor_field,omitempty"` 136 | SourceDefinedPrimaryKey [][]string `json:"source_defined_primary_key,omitempty"` 137 | Namespace string `json:"namespace"` 138 | } 139 | 140 | // ConfiguredCatalog is the "selected" schema you want to sync 141 | // This should not be mistaken with Catalog which represents the complete available schema to sync 142 | type ConfiguredCatalog struct { 143 | Streams []ConfiguredStream `json:"streams"` 144 | } 145 | 146 | // ConfiguredStream defines a single selected stream to sync 147 | type ConfiguredStream struct { 148 | Stream Stream `json:"stream"` 149 | SyncMode SyncMode `json:"sync_mode"` 150 | CursorField []string `json:"cursor_field"` 151 | DestinationSyncMode DestinationSyncMode `json:"destination_sync_mode"` 152 | PrimaryKey [][]string `json:"primary_key"` 153 | } 154 | 155 | // SyncMode defines the modes that your source is able to sync in 156 | type SyncMode string 157 | 158 | const ( 159 | // SyncModeFullRefresh means the data will be wiped and fully synced on each run 160 | SyncModeFullRefresh SyncMode = "full_refresh" 161 | // SyncModeIncremental is used for incremental syncs 162 | SyncModeIncremental SyncMode = "incremental" 163 | ) 164 | 165 | // DestinationSyncMode represents how the destination should interpret your data 166 | type DestinationSyncMode string 167 | 168 | var ( 169 | // DestinationSyncModeAppend is used for the destination to know it needs to append data 170 | DestinationSyncModeAppend DestinationSyncMode = "append" 171 | // DestinationSyncModeOverwrite is used to indicate the destination should overwrite data 172 | DestinationSyncModeOverwrite DestinationSyncMode = "overwrite" 173 | ) 174 | 175 | // ConnectorSpecification is used to define the connector wide settings. Every connection using your connector will comply to these settings 176 | type ConnectorSpecification struct { 177 | DocumentationURL string `json:"documentationUrl,omitempty"` 178 | ChangeLogURL string `json:"changeLogUrl"` 179 | SupportsIncremental bool `json:"supportsIncremental"` 180 | SupportsNormalization bool `json:"supportsNormalization"` 181 | SupportsDBT bool `json:"supportsDBT"` 182 | SupportedDestinationSyncModes []DestinationSyncMode `json:"supported_destination_sync_modes"` 183 | ConnectionSpecification ConnectionSpecification `json:"connectionSpecification"` 184 | } 185 | 186 | // https://json-schema.org/learn/getting-started-step-by-step.html 187 | 188 | // Properties defines the property map which is used to define any single "field name" along with its specification 189 | type Properties struct { 190 | Properties map[PropertyName]PropertySpec `json:"properties"` 191 | } 192 | 193 | // PropertyName is a alias for a string to make it clear to the user that the "key" in the map is the name of the property 194 | type PropertyName string 195 | 196 | // ConnectionSpecification is used to define the settings that are configurable "per" instance of your connector 197 | type ConnectionSpecification struct { 198 | Title string `json:"title"` 199 | Description string `json:"description"` 200 | Properties 201 | Type string `json:"type"` // should always be "object" 202 | Required []PropertyName `json:"required"` 203 | } 204 | 205 | // PropType defines the property types any field can take. See more here: https://docs.airbyte.com/understanding-airbyte/supported-data-types 206 | type PropType string 207 | 208 | const ( 209 | String PropType = "string" 210 | Number PropType = "number" 211 | Integer PropType = "integer" 212 | Object PropType = "object" 213 | Array PropType = "array" 214 | Null PropType = "null" 215 | ) 216 | 217 | // AirbytePropType is used to define airbyte specific property types. See more here: https://docs.airbyte.com/understanding-airbyte/supported-data-types 218 | type AirbytePropType string 219 | 220 | const ( 221 | TimestampWithTZ AirbytePropType = "timestamp_with_timezone" 222 | TimestampWOTZ AirbytePropType = "timestamp_without_timezone" 223 | BigInteger AirbytePropType = "big_integer" 224 | BigNumber AirbytePropType = "big_number" 225 | ) 226 | 227 | // FormatType is used to define data type formats supported by airbyte where needed (usually for strings formatted as dates). See more here: https://docs.airbyte.com/understanding-airbyte/supported-data-types 228 | type FormatType string 229 | 230 | const ( 231 | Date FormatType = "date" 232 | DateTime FormatType = "datetime" 233 | ) 234 | 235 | type PropertyType struct { 236 | Type []PropType `json:"type,omitempty"` 237 | AirbyteType AirbytePropType `json:"airbyte_type,omitempty"` 238 | } 239 | type PropertySpec struct { 240 | Description string `json:"description"` 241 | PropertyType `json:",omitempty"` 242 | Examples []string `json:"examples,omitempty"` 243 | Items map[string]interface{} `json:"items,omitempty"` 244 | Properties map[PropertyName]PropertySpec `json:"properties,omitempty"` 245 | IsSecret bool `json:"airbyte_secret,omitempty"` 246 | } 247 | 248 | // LogWriter is exported for documentation purposes - only use this through LogTracker or MessageTracker 249 | // to ensure thread-safe behavior with the writer 250 | type LogWriter func(level LogLevel, s string) error 251 | 252 | // StateWriter is exported for documentation purposes - only use this through MessageTracker 253 | type StateWriter func(v interface{}) error 254 | 255 | // RecordWriter is exported for documentation purposes - only use this through MessageTracker 256 | type RecordWriter func(v interface{}, streamName string, namespace string) error 257 | 258 | func newLogWriter(w io.Writer) LogWriter { 259 | return func(lvl LogLevel, s string) error { 260 | return write(w, &message{ 261 | Type: msgTypeLog, 262 | logMessage: &logMessage{ 263 | Level: lvl, 264 | Message: s, 265 | }, 266 | }) 267 | } 268 | 269 | } 270 | func newStateWriter(w io.Writer) StateWriter { 271 | return func(s interface{}) error { 272 | return write(w, &message{ 273 | Type: msgTypeState, 274 | state: &state{ 275 | Data: s, 276 | }, 277 | }) 278 | } 279 | } 280 | 281 | func newRecordWriter(w io.Writer) RecordWriter { 282 | return func(s interface{}, stream string, namespace string) error { 283 | return write(w, &message{ 284 | Type: msgTypeRecord, 285 | record: &record{ 286 | EmittedAt: time.Now().UnixMilli(), 287 | Data: s, 288 | Namespace: namespace, 289 | Stream: stream, 290 | }, 291 | }) 292 | } 293 | } 294 | -------------------------------------------------------------------------------- /safewriter.go: -------------------------------------------------------------------------------- 1 | package airbyte 2 | 3 | import ( 4 | "io" 5 | "sync" 6 | ) 7 | 8 | type safeWriter struct { 9 | w io.Writer 10 | mu sync.Mutex 11 | } 12 | 13 | func newSafeWriter(w io.Writer) io.Writer { 14 | return &safeWriter{ 15 | w: w, 16 | } 17 | } 18 | 19 | func (sw *safeWriter) Write(p []byte) (int, error) { 20 | sw.mu.Lock() 21 | defer sw.mu.Unlock() 22 | return sw.w.Write(p) 23 | } 24 | -------------------------------------------------------------------------------- /schema/schema.go: -------------------------------------------------------------------------------- 1 | //The following schema package has been edited to correspond with the schematic syntax used by airbyte. 2 | //Edits are marked with #Edit 3 | //For unedited software see https://github.com/danielgtaylor/huma v1.5.4 4 | //Following copyright notice applies to the 'schema' file. 5 | 6 | //Copyright 2020 Daniel G. Taylor 7 | // 8 | //Permission is hereby granted, free of charge, to any person obtaining a copy of this software 9 | //and associated documentation files (the "Software"), to deal in the Software without restriction, 10 | //including without limitation the rights to use, copy, modify, merge, publish, distribute, 11 | //sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is 12 | //furnished to do so, subject to the following conditions: 13 | // 14 | //The above copyright notice and this permission notice shall be included in all copies or substantial 15 | //portions of the Software. 16 | // 17 | //THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT 18 | //NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19 | //NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | //LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 21 | //WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 22 | 23 | // Package schema implements OpenAPI 3 compatible JSON Schema which can be 24 | // generated from structs. 25 | 26 | package schema 27 | 28 | import ( 29 | "encoding/json" 30 | "errors" 31 | "fmt" 32 | "net" 33 | "net/url" 34 | "reflect" 35 | "regexp" 36 | "strconv" 37 | "strings" 38 | "time" 39 | ) 40 | 41 | // ErrSchemaInvalid is sent when there is a problem building the schema. 42 | var ErrSchemaInvalid = errors.New("schema is invalid") 43 | 44 | // Mode defines whether the schema is being generated for read or 45 | // write mode. Read-only fields are dropped when in write mode, for example. 46 | type Mode int 47 | 48 | const ( 49 | // ModeAll is for general purpose use and includes all fields. 50 | ModeAll Mode = iota 51 | // ModeRead is for HTTP HEAD & GET and will hide write-only fields. 52 | ModeRead 53 | // ModeWrite is for HTTP POST, PUT, PATCH, DELETE and will hide 54 | // read-only fields. 55 | ModeWrite 56 | ) 57 | 58 | // JSON Schema type constants 59 | const ( 60 | TypeBoolean = "boolean" 61 | TypeInteger = "integer" 62 | TypeNumber = "number" 63 | TypeString = "string" 64 | TypeArray = "array" 65 | TypeObject = "object" 66 | ) 67 | 68 | var ( 69 | timeType = reflect.TypeOf(time.Time{}) 70 | ipType = reflect.TypeOf(net.IP{}) 71 | uriType = reflect.TypeOf(url.URL{}) 72 | byteSliceType = reflect.TypeOf([]byte(nil)) 73 | ) 74 | 75 | // I returns a pointer to the given int. Useful helper function for pointer 76 | // schema validators like MaxLength or MinItems. 77 | func I(value uint64) *uint64 { 78 | return &value 79 | } 80 | 81 | // F returns a pointer to the given float64. Useful helper function for pointer 82 | // schema validators like Maximum or Minimum. 83 | func F(value float64) *float64 { 84 | return &value 85 | } 86 | 87 | // getTagValue returns a value of the schema's type for the given tag string. 88 | // Uses JSON parsing if the schema is not a string. 89 | func getTagValue(s *Schema, t reflect.Type, value string) (interface{}, error) { 90 | // Special case: strings don't need quotes. 91 | if s.Type[0] == TypeString { 92 | return value, nil 93 | } 94 | 95 | // Special case: array of strings with comma-separated values and no quotes. 96 | if s.Type[0] == TypeArray && s.Items != nil && s.Items.Type[0] == TypeString && len(value) > 0 && value[0] != '[' { 97 | values := []string{} 98 | for _, s := range strings.Split(value, ",") { 99 | values = append(values, strings.TrimSpace(s)) 100 | } 101 | return values, nil 102 | } 103 | 104 | var v interface{} 105 | if err := json.Unmarshal([]byte(value), &v); err != nil { 106 | return nil, err 107 | } 108 | 109 | vv := reflect.ValueOf(v) 110 | tv := reflect.TypeOf(v) 111 | if v != nil && tv != t { 112 | if tv.Kind() == reflect.Slice { 113 | // Slices can't be cast due to the different layouts. Instead, we make a 114 | // new instance of the destination slice, and convert each value in 115 | // the original to the new type. 116 | tmp := reflect.MakeSlice(t, 0, vv.Len()) 117 | for i := 0; i < vv.Len(); i++ { 118 | if !vv.Index(i).Elem().Type().ConvertibleTo(t.Elem()) { 119 | return nil, fmt.Errorf("unable to convert %v to %v: %w", vv.Index(i).Interface(), t.Elem(), ErrSchemaInvalid) 120 | } 121 | 122 | tmp = reflect.Append(tmp, vv.Index(i).Elem().Convert(t.Elem())) 123 | } 124 | v = tmp.Interface() 125 | } else if !tv.ConvertibleTo(t) { 126 | return nil, fmt.Errorf("unable to convert %v to %v: %w", tv, t, ErrSchemaInvalid) 127 | } 128 | 129 | v = reflect.ValueOf(v).Convert(t).Interface() 130 | } 131 | 132 | return v, nil 133 | } 134 | 135 | // Schema represents a JSON Schema which can be generated from Go structs 136 | type Schema struct { 137 | Type []string `json:"type,omitempty"` // #Edit from: Type string `json:"type,omitempty"` 138 | Description string `json:"description,omitempty"` 139 | Items *Schema `json:"items,omitempty"` 140 | Properties map[string]*Schema `json:"properties,omitempty"` 141 | AdditionalProperties interface{} `json:"additionalProperties,omitempty"` 142 | PatternProperties map[string]*Schema `json:"patternProperties,omitempty"` 143 | Required []string `json:"required,omitempty"` 144 | Format string `json:"format,omitempty"` 145 | Enum []interface{} `json:"enum,omitempty"` 146 | Default interface{} `json:"default,omitempty"` 147 | Example interface{} `json:"example,omitempty"` 148 | Minimum *float64 `json:"minimum,omitempty"` 149 | ExclusiveMinimum *bool `json:"exclusiveMinimum,omitempty"` 150 | Maximum *float64 `json:"maximum,omitempty"` 151 | ExclusiveMaximum *bool `json:"exclusiveMaximum,omitempty"` 152 | MultipleOf float64 `json:"multipleOf,omitempty"` 153 | MinLength *uint64 `json:"minLength,omitempty"` 154 | MaxLength *uint64 `json:"maxLength,omitempty"` 155 | Pattern string `json:"pattern,omitempty"` 156 | MinItems *uint64 `json:"minItems,omitempty"` 157 | MaxItems *uint64 `json:"maxItems,omitempty"` 158 | UniqueItems bool `json:"uniqueItems,omitempty"` 159 | MinProperties *uint64 `json:"minProperties,omitempty"` 160 | MaxProperties *uint64 `json:"maxProperties,omitempty"` 161 | AllOf []*Schema `json:"allOf,omitempty"` 162 | AnyOf []*Schema `json:"anyOf,omitempty"` 163 | OneOf []*Schema `json:"oneOf,omitempty"` 164 | Not *Schema `json:"not,omitempty"` 165 | Nullable bool `json:"nullable,omitempty"` 166 | ReadOnly bool `json:"readOnly,omitempty"` 167 | WriteOnly bool `json:"writeOnly,omitempty"` 168 | Deprecated bool `json:"deprecated,omitempty"` 169 | ContentEncoding string `json:"contentEncoding,omitempty"` 170 | Ref string `json:"$ref,omitempty"` 171 | } 172 | 173 | // HasValidation returns true if at least one validator is set on the schema. 174 | // This excludes the schema's type but includes most other fields and can be 175 | // used to trigger additional slow validation steps when needed. 176 | func (s *Schema) HasValidation() bool { 177 | if s.Items != nil || len(s.Properties) > 0 || s.AdditionalProperties != nil || len(s.PatternProperties) > 0 || len(s.Required) > 0 || len(s.Enum) > 0 || s.Minimum != nil || s.ExclusiveMinimum != nil || s.Maximum != nil || s.ExclusiveMaximum != nil || s.MultipleOf != 0 || s.MinLength != nil || s.MaxLength != nil || s.Pattern != "" || s.MinItems != nil || s.MaxItems != nil || s.UniqueItems || s.MinProperties != nil || s.MaxProperties != nil || len(s.AllOf) > 0 || len(s.AnyOf) > 0 || len(s.OneOf) > 0 || s.Not != nil || s.Ref != "" { 178 | return true 179 | } 180 | 181 | return false 182 | } 183 | 184 | // RemoveProperty removes a property by name from the schema, making sure to 185 | // also remove it from the required property set if present. 186 | func (s *Schema) RemoveProperty(name string) { 187 | delete(s.Properties, name) 188 | for i := range s.Required { 189 | if s.Required[i] == name { 190 | s.Required[i] = s.Required[len(s.Required)-1] 191 | s.Required = s.Required[:len(s.Required)-1] 192 | break 193 | } 194 | } 195 | } 196 | 197 | // Generate creates a JSON schema for a Go type. Struct field tags 198 | // can be used to provide additional metadata such as descriptions and 199 | // validation. 200 | func Generate(t reflect.Type) (*Schema, error) { 201 | return GenerateWithMode(t, ModeAll, nil) 202 | } 203 | 204 | // getFields performs a breadth-first search for all fields including embedded 205 | // ones. It may return multiple fields with the same name, the first of which 206 | // represents the outer-most declaration. 207 | func getFields(typ reflect.Type) []reflect.StructField { 208 | fields := make([]reflect.StructField, 0, typ.NumField()) 209 | embedded := []reflect.StructField{} 210 | 211 | for i := 0; i < typ.NumField(); i++ { 212 | f := typ.Field(i) 213 | if f.Anonymous { 214 | embedded = append(embedded, f) 215 | continue 216 | } 217 | 218 | fields = append(fields, f) 219 | } 220 | 221 | for _, f := range embedded { 222 | newTyp := f.Type 223 | if newTyp.Kind() == reflect.Ptr { 224 | newTyp = newTyp.Elem() 225 | } 226 | if newTyp.Kind() == reflect.Struct { 227 | fields = append(fields, getFields(newTyp)...) 228 | } 229 | } 230 | 231 | return fields 232 | } 233 | 234 | // GenerateFromField generates a schema for a single struct field. It returns 235 | // the computed field name, whether it is optional, its schema, and any error 236 | // which may have occurred. 237 | func GenerateFromField(f reflect.StructField, mode Mode) (string, bool, *Schema, error) { 238 | jsonTags := strings.Split(f.Tag.Get("json"), ",") 239 | name := strings.ToLower(f.Name) 240 | if len(jsonTags) > 0 && jsonTags[0] != "" { 241 | name = jsonTags[0] 242 | } 243 | 244 | if name == "-" { 245 | // Skip deliberately filtered out items 246 | return name, false, nil, nil 247 | } 248 | 249 | s, err := GenerateWithMode(f.Type, mode, nil) 250 | if err != nil { 251 | return name, false, nil, err 252 | } 253 | 254 | if tag, ok := f.Tag.Lookup("description"); ok { 255 | s.Description = tag 256 | } 257 | 258 | if tag, ok := f.Tag.Lookup("doc"); ok { 259 | s.Description = tag 260 | } 261 | 262 | if tag, ok := f.Tag.Lookup("format"); ok { 263 | s.Format = tag 264 | } 265 | 266 | if tag, ok := f.Tag.Lookup("enum"); ok { 267 | s.Enum = []interface{}{} 268 | 269 | enumType := f.Type 270 | enumSchema := s 271 | if s.Type[0] == TypeArray { // #Edit from: if s.Type == TypeArray { 272 | // Enum values should be the type of the array elements, not the 273 | // array itself! 274 | enumType = f.Type.Elem() 275 | enumSchema = s.Items 276 | } 277 | 278 | for _, v := range strings.Split(tag, ",") { 279 | parsed, err := getTagValue(enumSchema, enumType, v) 280 | if err != nil { 281 | return name, false, nil, err 282 | } 283 | 284 | enumSchema.Enum = append(enumSchema.Enum, parsed) 285 | } 286 | } 287 | 288 | if tag, ok := f.Tag.Lookup("default"); ok { 289 | v, err := getTagValue(s, f.Type, tag) 290 | if err != nil { 291 | return name, false, nil, err 292 | } 293 | 294 | s.Default = v 295 | } 296 | 297 | if tag, ok := f.Tag.Lookup("example"); ok { 298 | v, err := getTagValue(s, f.Type, tag) 299 | if err != nil { 300 | return name, false, nil, err 301 | } 302 | 303 | s.Example = v 304 | } 305 | 306 | if tag, ok := f.Tag.Lookup("minimum"); ok { 307 | min, err := strconv.ParseFloat(tag, 64) 308 | if err != nil { 309 | return name, false, nil, err 310 | } 311 | s.Minimum = &min 312 | } 313 | 314 | if tag, ok := f.Tag.Lookup("exclusiveMinimum"); ok { 315 | min, err := strconv.ParseFloat(tag, 64) 316 | if err != nil { 317 | return name, false, nil, err 318 | } 319 | s.Minimum = &min 320 | t := true 321 | s.ExclusiveMinimum = &t 322 | } 323 | 324 | if tag, ok := f.Tag.Lookup("maximum"); ok { 325 | max, err := strconv.ParseFloat(tag, 64) 326 | if err != nil { 327 | return name, false, nil, err 328 | } 329 | s.Maximum = &max 330 | } 331 | 332 | if tag, ok := f.Tag.Lookup("exclusiveMaximum"); ok { 333 | max, err := strconv.ParseFloat(tag, 64) 334 | if err != nil { 335 | return name, false, nil, err 336 | } 337 | s.Maximum = &max 338 | t := true 339 | s.ExclusiveMaximum = &t 340 | } 341 | 342 | if tag, ok := f.Tag.Lookup("multipleOf"); ok { 343 | mof, err := strconv.ParseFloat(tag, 64) 344 | if err != nil { 345 | return name, false, nil, err 346 | } 347 | s.MultipleOf = mof 348 | } 349 | 350 | if tag, ok := f.Tag.Lookup("minLength"); ok { 351 | min, err := strconv.ParseUint(tag, 10, 64) 352 | if err != nil { 353 | return name, false, nil, err 354 | } 355 | s.MinLength = &min 356 | } 357 | 358 | if tag, ok := f.Tag.Lookup("maxLength"); ok { 359 | max, err := strconv.ParseUint(tag, 10, 64) 360 | if err != nil { 361 | return name, false, nil, err 362 | } 363 | s.MaxLength = &max 364 | } 365 | 366 | if tag, ok := f.Tag.Lookup("pattern"); ok { 367 | s.Pattern = tag 368 | 369 | if _, err := regexp.Compile(s.Pattern); err != nil { 370 | return name, false, nil, err 371 | } 372 | } 373 | 374 | if tag, ok := f.Tag.Lookup("minItems"); ok { 375 | min, err := strconv.ParseUint(tag, 10, 64) 376 | if err != nil { 377 | return name, false, nil, err 378 | } 379 | s.MinItems = &min 380 | } 381 | 382 | if tag, ok := f.Tag.Lookup("maxItems"); ok { 383 | max, err := strconv.ParseUint(tag, 10, 64) 384 | if err != nil { 385 | return name, false, nil, err 386 | } 387 | s.MaxItems = &max 388 | } 389 | 390 | if tag, ok := f.Tag.Lookup("uniqueItems"); ok { 391 | if !(tag == "true" || tag == "false") { 392 | return name, false, nil, fmt.Errorf("%s uniqueItems: boolean should be true or false: %w", f.Name, ErrSchemaInvalid) 393 | } 394 | s.UniqueItems = tag == "true" 395 | } 396 | 397 | if tag, ok := f.Tag.Lookup("minProperties"); ok { 398 | min, err := strconv.ParseUint(tag, 10, 64) 399 | if err != nil { 400 | return name, false, nil, err 401 | } 402 | s.MinProperties = &min 403 | } 404 | 405 | if tag, ok := f.Tag.Lookup("maxProperties"); ok { 406 | max, err := strconv.ParseUint(tag, 10, 64) 407 | if err != nil { 408 | return name, false, nil, err 409 | } 410 | s.MaxProperties = &max 411 | } 412 | 413 | if tag, ok := f.Tag.Lookup("nullable"); ok { 414 | if !(tag == "true" || tag == "false") { 415 | return name, false, nil, fmt.Errorf("%s nullable: boolean should be true or false but got %s: %w", f.Name, tag, ErrSchemaInvalid) 416 | } 417 | s.Nullable = tag == "true" 418 | } 419 | 420 | if tag, ok := f.Tag.Lookup("readOnly"); ok { 421 | if !(tag == "true" || tag == "false") { 422 | return name, false, nil, fmt.Errorf("%s readOnly: boolean should be true or false: %w", f.Name, ErrSchemaInvalid) 423 | } 424 | s.ReadOnly = tag == "true" 425 | } 426 | 427 | if tag, ok := f.Tag.Lookup("writeOnly"); ok { 428 | if !(tag == "true" || tag == "false") { 429 | return name, false, nil, fmt.Errorf("%s writeOnly: boolean should be true or false: %w", f.Name, ErrSchemaInvalid) 430 | } 431 | s.WriteOnly = tag == "true" 432 | } 433 | 434 | if tag, ok := f.Tag.Lookup("deprecated"); ok { 435 | if !(tag == "true" || tag == "false") { 436 | return name, false, nil, fmt.Errorf("%s deprecated: boolean should be true or false: %w", f.Name, ErrSchemaInvalid) 437 | } 438 | s.Deprecated = tag == "true" 439 | } 440 | 441 | optional := false 442 | for _, tag := range jsonTags[1:] { 443 | if tag == "omitempty" { 444 | optional = true 445 | } 446 | } 447 | 448 | return name, optional, s, nil 449 | } 450 | 451 | // GenerateWithMode creates a JSON schema for a Go type. Struct field 452 | // tags can be used to provide additional metadata such as descriptions and 453 | // validation. The mode can be all, read, or write. In read or write mode 454 | // any field that is marked as the opposite will be excluded, e.g. a 455 | // write-only field would not be included in read mode. If a schema is given 456 | // as input, add to it, otherwise creates a new schema. 457 | func GenerateWithMode(t reflect.Type, mode Mode, schema *Schema) (*Schema, error) { 458 | if schema == nil { 459 | schema = &Schema{} 460 | } 461 | 462 | if t == ipType { 463 | // Special case: IP address. 464 | return &Schema{Type: []string{TypeString, "null"}, Format: "ipv4"}, nil // #Edit from: return &Schema{Type: TypeString, Format: "ipv4"}, nil 465 | } 466 | 467 | switch t.Kind() { 468 | case reflect.Struct: 469 | // Handle special cases. 470 | switch t { 471 | case timeType: 472 | return &Schema{Type: []string{TypeString, "null"}, Format: "date-time"}, nil // #Edit from: return &Schema{Type: TypeString, Format: "date-time"}, nil 473 | case uriType: 474 | return &Schema{Type: []string{TypeString, "null"}, Format: "uri"}, nil // #Edit from: return &Schema{Type: TypeString, Format: "uri"}, nil 475 | } 476 | 477 | properties := make(map[string]*Schema) 478 | required := make([]string, 0) 479 | schema.Type = []string{TypeObject, "null"} // #Edit from: schema.Type = TypeObject 480 | schema.AdditionalProperties = false 481 | 482 | for _, f := range getFields(t) { 483 | name, optional, s, err := GenerateFromField(f, mode) 484 | if err != nil { 485 | return nil, err 486 | } 487 | 488 | if s == nil { 489 | // Skip deliberately filtered out items 490 | continue 491 | } 492 | 493 | if _, ok := properties[name]; ok { 494 | // Item already exists, ignore it since we process embedded fields 495 | // after top-level ones. 496 | continue 497 | } 498 | 499 | if s.ReadOnly && mode == ModeWrite { 500 | continue 501 | } 502 | 503 | if s.WriteOnly && mode == ModeRead { 504 | continue 505 | } 506 | 507 | properties[name] = s 508 | 509 | if !optional { 510 | required = append(required, name) 511 | } 512 | } 513 | 514 | if len(properties) > 0 { 515 | schema.Properties = properties 516 | } 517 | 518 | if len(required) > 0 { 519 | schema.Required = required 520 | } 521 | 522 | return schema, nil // #Edit new line 523 | 524 | case reflect.Map: 525 | schema.Type = []string{TypeObject, "null"} // #Edit from: schema.Type = TypeObject 526 | s, err := GenerateWithMode(t.Elem(), mode, nil) 527 | if err != nil { 528 | return nil, err 529 | } 530 | schema.AdditionalProperties = s 531 | case reflect.Slice, reflect.Array: 532 | if t.Elem().Kind() == reflect.Uint8 { 533 | // Special case: `[]byte` should be a Base-64 string. 534 | schema.Type = []string{TypeString, "null"} // #Edit from: schema.Type = TypeString 535 | } else { 536 | schema.Type = []string{TypeArray, "null"} // #Edit from: schema.Type = TypeArray 537 | s, err := GenerateWithMode(t.Elem(), mode, nil) 538 | if err != nil { 539 | return nil, err 540 | } 541 | schema.Items = s 542 | } 543 | case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32: 544 | schema.Type = []string{TypeInteger, "null"} // #Edit from: schema.Type = TypeInteger 545 | schema.Format = "int32" 546 | case reflect.Int64: 547 | schema.Type = []string{TypeInteger, ""} // #Edit from: schema.Type = TypeInteger 548 | schema.Format = "int64" 549 | case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32: 550 | // Unsigned integers can't be negative. 551 | schema.Type = []string{TypeInteger, "null"} // #Edit from: schema.Type = TypeInteger 552 | schema.Format = "int32" 553 | schema.Minimum = F(0.0) 554 | case reflect.Uint64: 555 | schema.Type = []string{TypeInteger, "null"} // #Edit from: schema.Type = TypeInteger 556 | schema.Format = "int64" 557 | schema.Minimum = F(0.0) 558 | case reflect.Float32: 559 | schema.Type = []string{TypeNumber, "null"} // #Edit from: schema.Type = TypeInteger 560 | schema.Format = "float" 561 | case reflect.Float64: 562 | schema.Type = []string{TypeNumber, "null"} // #Edit from: schema.Type = TypeInteger 563 | schema.Format = "double" 564 | case reflect.Bool: 565 | schema.Type = []string{TypeBoolean, "null"} // #Edit from: schema.Type = TypeInteger 566 | case reflect.String: 567 | schema.Type = []string{TypeString, "null"} // #Edit from: schema.Type = TypeInteger 568 | case reflect.Ptr: 569 | return GenerateWithMode(t.Elem(), mode, schema) 570 | case reflect.Interface: 571 | // Interfaces can be any type. 572 | case reflect.Uintptr, reflect.UnsafePointer, reflect.Func: 573 | // Ignored... 574 | default: 575 | return nil, fmt.Errorf("unsupported type %s from %s", t.Kind(), t) 576 | } 577 | 578 | return schema, nil 579 | } 580 | -------------------------------------------------------------------------------- /source.go: -------------------------------------------------------------------------------- 1 | package airbyte 2 | 3 | // Source is the only interface you need to define to create your source! 4 | type Source interface { 5 | // Spec returns the input "form" spec needed for your source 6 | Spec(logTracker LogTracker) (*ConnectorSpecification, error) 7 | // Check verifies the source - usually verify creds/connection etc. 8 | Check(srcCfgPath string, logTracker LogTracker) error 9 | // Discover returns the schema of the data you want to sync 10 | Discover(srcConfigPath string, logTracker LogTracker) (*Catalog, error) 11 | // Read will read the actual data from your source and use tracker.Record(), tracker.State() and tracker.Log() to sync data with airbyte/destinations 12 | // MessageTracker is thread-safe and so it is completely find to spin off goroutines to sync your data (just don't forget your waitgroups :)) 13 | // returning an error from this will cancel the sync and returning a nil from this will successfully end the sync 14 | Read(sourceCfgPath string, prevStatePath string, configuredCat *ConfiguredCatalog, 15 | tracker MessageTracker) error 16 | } 17 | -------------------------------------------------------------------------------- /sourceRunner.go: -------------------------------------------------------------------------------- 1 | package airbyte 2 | 3 | import ( 4 | "io" 5 | "log" 6 | "os" 7 | ) 8 | 9 | // SourceRunner acts as an "orchestrator" of sorts to run your source for you 10 | type SourceRunner struct { 11 | w io.Writer 12 | src Source 13 | msgTracker MessageTracker 14 | } 15 | 16 | // NewSourceRunner takes your defined Source and plugs it in with the rest of airbyte 17 | func NewSourceRunner(src Source, w io.Writer) SourceRunner { 18 | w = newSafeWriter(w) 19 | msgTracker := MessageTracker{ 20 | Record: newRecordWriter(w), 21 | State: newStateWriter(w), 22 | Log: newLogWriter(w), 23 | } 24 | 25 | return SourceRunner{ 26 | w: w, 27 | src: src, 28 | msgTracker: msgTracker, 29 | } 30 | } 31 | 32 | // Start starts your source 33 | // Example usage would look like this in your main.go 34 | // func() main { 35 | // src := newCoolSource() 36 | // runner := airbyte.NewSourceRunner(src) 37 | // err := runner.Start() 38 | // if err != nil { 39 | // log.Fatal(err) 40 | // } 41 | // } 42 | // Yes, it really is that easy! 43 | func (sr SourceRunner) Start() error { 44 | switch cmd(os.Args[1]) { 45 | case cmdSpec: 46 | spec, err := sr.src.Spec(LogTracker{ 47 | Log: sr.msgTracker.Log, 48 | }) 49 | if err != nil { 50 | sr.msgTracker.Log(LogLevelError, "failed"+err.Error()) 51 | return err 52 | } 53 | return write(sr.w, &message{ 54 | Type: msgTypeSpec, 55 | ConnectorSpecification: spec, 56 | }) 57 | 58 | case cmdCheck: 59 | inP, err := getSourceConfigPath() 60 | if err != nil { 61 | return err 62 | } 63 | err = sr.src.Check(inP, LogTracker{ 64 | Log: sr.msgTracker.Log, 65 | }) 66 | if err != nil { 67 | log.Println(err) 68 | return write(sr.w, &message{ 69 | Type: msgTypeConnectionStat, 70 | connectionStatus: &connectionStatus{ 71 | Status: checkStatusFailed, 72 | }, 73 | }) 74 | } 75 | 76 | return write(sr.w, &message{ 77 | Type: msgTypeConnectionStat, 78 | connectionStatus: &connectionStatus{ 79 | Status: checkStatusSuccess, 80 | }, 81 | }) 82 | 83 | case cmdDiscover: 84 | inP, err := getSourceConfigPath() 85 | if err != nil { 86 | return err 87 | } 88 | ct, err := sr.src.Discover(inP, LogTracker{ 89 | Log: sr.msgTracker.Log}, 90 | ) 91 | if err != nil { 92 | return err 93 | } 94 | return write(sr.w, &message{ 95 | Type: msgTypeCatalog, 96 | Catalog: ct, 97 | }) 98 | 99 | case cmdRead: 100 | var incat ConfiguredCatalog 101 | p, err := getCatalogPath() 102 | if err != nil { 103 | return err 104 | } 105 | 106 | err = UnmarshalFromPath(p, &incat) 107 | if err != nil { 108 | return err 109 | } 110 | 111 | srp, err := getSourceConfigPath() 112 | if err != nil { 113 | return err 114 | } 115 | 116 | stp, err := getStatePath() 117 | if err != nil { 118 | return err 119 | } 120 | 121 | err = sr.src.Read(srp, stp, &incat, sr.msgTracker) 122 | if err != nil { 123 | log.Println("failed") 124 | return err 125 | } 126 | 127 | } 128 | 129 | return nil 130 | } 131 | -------------------------------------------------------------------------------- /trackers.go: -------------------------------------------------------------------------------- 1 | package airbyte 2 | 3 | // MessageTracker is used to encap State tracking, Record tracking and Log tracking 4 | // It's thread safe 5 | type MessageTracker struct { 6 | // State will save an arbitrary JSON blob to airbyte state 7 | State StateWriter 8 | // Record will emit a record (data point) out to airbyte to sync with appropriate timestamps 9 | Record RecordWriter 10 | // Log logs out to airbyte 11 | Log LogWriter 12 | } 13 | 14 | // LogTracker is a single struct which holds a tracker which can be used for logs 15 | type LogTracker struct { 16 | Log LogWriter 17 | } 18 | --------------------------------------------------------------------------------