├── docs └── .gitkeep ├── datahub ├── .gitkeep ├── version.go ├── doc.go ├── parameter.go ├── utils_test.go ├── resource.go ├── subscription.go ├── util │ ├── format.go │ └── wrapper.go ├── json_helper.go ├── client_config.go ├── utils.go ├── config.go ├── shard.go ├── compress_test.go ├── pbmodel │ └── datahub.proto ├── async_producer_test.go ├── avro_helper.go ├── topic_test.go ├── account.go ├── record_test.go ├── topic.go ├── avro_helper_test.go ├── compress.go ├── data_serializer_test.go ├── schemaclient.go ├── batch_serializer_test.go ├── data_serializer.go ├── producer.go ├── batch_serializer.go ├── record.go ├── restclient.go ├── rw_api_test.go └── datahub.go ├── .gitignore ├── examples ├── api │ ├── constant.go │ ├── shard.go │ ├── project.go │ ├── subscription.go │ ├── offset.go │ ├── topic.go │ └── connector.go ├── datahubcmd │ ├── project.go │ ├── maincmd.go │ ├── record.go │ └── topic.go ├── producer │ └── producer.go ├── async_producer │ └── async_producer.go └── consume │ └── consumption.go ├── go.mod ├── test └── e2e │ ├── parameter.go │ └── batch_test.go ├── README.md └── LICENSE /docs/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /datahub/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | -------------------------------------------------------------------------------- /datahub/version.go: -------------------------------------------------------------------------------- 1 | package datahub 2 | 3 | const ( 4 | DATAHUB_SDK_VERSION = "1.1.0" 5 | DATAHUB_CLIENT_VERSION = "1.2" 6 | ) 7 | -------------------------------------------------------------------------------- /datahub/doc.go: -------------------------------------------------------------------------------- 1 | // package is the offical Aliyun examples SDK for Go programing language. 2 | // 3 | // The examples SDK for Go provides APIs and utilities that developers can use to 4 | // build Go applications that use Aliyun examples services, such as can Aliyun MaxCompute. 5 | // 6 | // pack 7 | package datahub 8 | -------------------------------------------------------------------------------- /examples/api/constant.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "github.com/aliyun/aliyun-datahub-sdk-go/datahub" 5 | ) 6 | 7 | const ( 8 | accessId = "" 9 | accessKey = "" 10 | endpoint = "" 11 | projectName = "" 12 | topicName = "" 13 | blobTopicName = "" 14 | subId = "" 15 | connectorId = "" 16 | 17 | spiltShardId = "" 18 | mergeShardId = "" 19 | mergeAdjacentShardId = "" 20 | 21 | odpsEndpoint = "" 22 | odpsProject = "" 23 | odpsTable = "" 24 | odpsAccessId = "" 25 | odpsAccessKey = "" 26 | ) 27 | 28 | var dh datahub.DataHubApi 29 | -------------------------------------------------------------------------------- /datahub/parameter.go: -------------------------------------------------------------------------------- 1 | package datahub 2 | 3 | type CreateTopicParameter struct { 4 | ShardCount int 5 | LifeCycle int 6 | Comment string 7 | RecordType RecordType 8 | RecordSchema *RecordSchema 9 | ExpandMode ExpandMode 10 | } 11 | 12 | type UpdateTopicParameter struct { 13 | LifeCycle int 14 | Comment string 15 | } 16 | 17 | type CreateConnectorParameter struct { 18 | SinkStartTime int64 19 | ConnectorType ConnectorType 20 | ColumnFields []string 21 | ColumnNameMap map[string]string 22 | Config interface{} 23 | } 24 | 25 | type UpdateConnectorParameter struct { 26 | ColumnFields []string 27 | ColumnNameMap map[string]string 28 | Config interface{} 29 | } 30 | -------------------------------------------------------------------------------- /datahub/utils_test.go: -------------------------------------------------------------------------------- 1 | package datahub 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func TestCalculateHashCode(t *testing.T) { 10 | res, err := calculateHashCode("aaa") 11 | assert.Nil(t, err) 12 | assert.Equal(t, res, uint32(876991330)) 13 | 14 | res, err = calculateHashCode("test") 15 | assert.Nil(t, err) 16 | assert.Equal(t, res, uint32(2949673445)) 17 | } 18 | 19 | func TestCalculateMD5(t *testing.T) { 20 | res, err := calculateMD5("aaa") 21 | assert.Nil(t, err) 22 | assert.Equal(t, res, "47bce5c74f589f4867dbd57e9ca9f808") 23 | 24 | res, err = calculateMD5("test") 25 | assert.Nil(t, err) 26 | assert.Equal(t, res, "098f6bcd4621d373cade4e832627b4f6") 27 | } 28 | -------------------------------------------------------------------------------- /datahub/resource.go: -------------------------------------------------------------------------------- 1 | package datahub 2 | 3 | const ( 4 | projectsPath = "/projects" 5 | projectPath = "/projects/%s" 6 | topicsPath = "/projects/%s/topics" 7 | topicPath = "/projects/%s/topics/%s" 8 | shardsPath = "/projects/%s/topics/%s/shards" 9 | shardPath = "/projects/%s/topics/%s/shards/%s" 10 | 11 | connectorsPath = "/projects/%s/topics/%s/connectors" 12 | connectorPath = "/projects/%s/topics/%s/connectors/%s" 13 | consumerGroupPath = "/projects/%s/topics/%s/subscriptions/%s" 14 | 15 | subscriptionsPath = "/projects/%s/topics/%s/subscriptions" 16 | subscriptionPath = "/projects/%s/topics/%s/subscriptions/%s" 17 | offsetsPath = "/projects/%s/topics/%s/subscriptions/%s/offsets" 18 | ) 19 | -------------------------------------------------------------------------------- /datahub/subscription.go: -------------------------------------------------------------------------------- 1 | package datahub 2 | 3 | type SubscriptionEntry struct { 4 | SubId string `json:"SubId"` 5 | TopicName string `json:"TopicName"` 6 | IsOwner bool `json:"IsOwner"` 7 | Type SubscriptionType `json:"Type"` 8 | State SubscriptionState `json:"State,omitempty"` 9 | Comment string `json:"Comment,omitempty"` 10 | CreateTime int64 `json:"CreateTime"` 11 | LastModifyTime int64 `json:"LastModifyTime"` 12 | } 13 | 14 | type SubscriptionOffset struct { 15 | Timestamp int64 `json:"Timestamp"` 16 | Sequence int64 `json:"Sequence"` 17 | VersionId int64 `json:"Version"` 18 | SessionId *int64 `json:"SessionId"` 19 | } 20 | -------------------------------------------------------------------------------- /datahub/util/format.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "strconv" 5 | "unicode" 6 | ) 7 | 8 | func CheckProjectName(projectName string) bool { 9 | return isNameValid(projectName, 3, 32) 10 | } 11 | 12 | func CheckTopicName(topicName string) bool { 13 | return isNameValid(topicName, 1, 128) 14 | } 15 | 16 | func CheckComment(comment string) bool { 17 | if comment == "" || len(comment) > 1024 { 18 | return false 19 | } 20 | return true 21 | } 22 | 23 | func CheckShardId(shardId string) bool { 24 | if _, err := strconv.Atoi(shardId); err != nil { 25 | return false 26 | } 27 | return true 28 | } 29 | 30 | func isNameValid(name string, minLen, maxLen int) bool { 31 | if name == "" || len(name) > maxLen || len(name) < minLen { 32 | return false 33 | } 34 | for _, c := range name { 35 | if !unicode.IsLetter(c) && !unicode.IsDigit(c) && c != '_' { 36 | return false 37 | } 38 | } 39 | return true 40 | } 41 | -------------------------------------------------------------------------------- /datahub/json_helper.go: -------------------------------------------------------------------------------- 1 | package datahub 2 | 3 | import ( 4 | jsoniter "github.com/json-iterator/go" 5 | ) 6 | 7 | var parser = jsoniter.Config{ 8 | UseNumber: true, 9 | }.Froze() 10 | 11 | func parseJson(buf []byte) (map[string]any, error) { 12 | obj := make(map[string]any) 13 | err := parser.Unmarshal(buf, &obj) 14 | if err != nil { 15 | return nil, err 16 | } 17 | 18 | return obj, nil 19 | } 20 | 21 | type JsonParseOption func(*jsonParseConfig) error 22 | 23 | func newDefaultJsonParseConfig() *jsonParseConfig { 24 | return &jsonParseConfig{ 25 | ignoreNotExistKey: false, 26 | } 27 | } 28 | 29 | func getJsonParseConfig(opts ...JsonParseOption) (*jsonParseConfig, error) { 30 | config := newDefaultJsonParseConfig() 31 | for _, opt := range opts { 32 | if err := opt(config); err != nil { 33 | return nil, err 34 | } 35 | } 36 | return config, nil 37 | } 38 | 39 | type jsonParseConfig struct { 40 | ignoreNotExistKey bool 41 | } 42 | 43 | func WithIgnoreNotExistKey(b bool) JsonParseOption { 44 | return func(o *jsonParseConfig) error { 45 | o.ignoreNotExistKey = b 46 | return nil 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /datahub/util/wrapper.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "encoding/binary" 5 | "fmt" 6 | "hash/crc32" 7 | "reflect" 8 | ) 9 | 10 | func WrapMessage(data []byte) []byte { 11 | h1 := []byte("DHUB") 12 | 13 | crc32c := crc32.MakeTable(crc32.Castagnoli) 14 | crc := crc32.Checksum(data, crc32c) 15 | h2 := make([]byte, 4) 16 | binary.BigEndian.PutUint32(h2, crc) 17 | 18 | h3 := make([]byte, 4) 19 | binary.BigEndian.PutUint32(h3, uint32(len(data))) 20 | 21 | //buf := make([]byte,0,len(h1)+len(h2)+len(h3)+len(data)) 22 | buf := append(h1, h2...) 23 | buf = append(buf, h3...) 24 | buf = append(buf, data...) 25 | return buf 26 | 27 | } 28 | 29 | func UnwrapMessage(data []byte) ([]byte, error) { 30 | 31 | crc := data[4:8] 32 | 33 | body := data[12:] 34 | crc32c := crc32.MakeTable(crc32.Castagnoli) 35 | cs := crc32.Checksum(body, crc32c) 36 | computedCrc := make([]byte, 4) 37 | binary.BigEndian.PutUint32(computedCrc, cs) 38 | 39 | if !reflect.DeepEqual(crc, computedCrc) { 40 | return nil, fmt.Errorf("parse pb response body fail, error: crc check error. crc: %s, compute crc: %s", crc, computedCrc) 41 | } 42 | 43 | return body, nil 44 | 45 | } 46 | -------------------------------------------------------------------------------- /datahub/client_config.go: -------------------------------------------------------------------------------- 1 | package datahub 2 | 3 | import "time" 4 | 5 | type SendStrategy int 6 | 7 | const ( 8 | RoundRobin SendStrategy = iota 9 | Random 10 | ) 11 | 12 | type BaseConfig struct { 13 | Account Account 14 | UserAgent string 15 | Endpoint string 16 | Project string 17 | Topic string 18 | MaxRetry int 19 | RetryInterval time.Duration 20 | } 21 | 22 | type ProducerConfig struct { 23 | BaseConfig 24 | SendStrategy SendStrategy 25 | Parittioner PartitionFunc 26 | Protocol Protocol 27 | MaxAsyncFlightingNum int 28 | MaxAsyncBufferNum int 29 | MaxAsyncBufferTime time.Duration 30 | EnableSuccessCh bool 31 | EnableErrorCh bool 32 | } 33 | 34 | func NewProducerConfig() *ProducerConfig { 35 | return &ProducerConfig{ 36 | BaseConfig: BaseConfig{ 37 | MaxRetry: 3, 38 | RetryInterval: 500 * time.Millisecond, 39 | }, 40 | SendStrategy: RoundRobin, 41 | Parittioner: DefaultPartitionFunc, 42 | Protocol: Batch, 43 | MaxAsyncFlightingNum: 16, 44 | MaxAsyncBufferNum: 1000, 45 | MaxAsyncBufferTime: 5 * time.Second, 46 | EnableSuccessCh: true, 47 | EnableErrorCh: true, 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /examples/datahubcmd/project.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "fmt" 6 | 7 | "github.com/aliyun/aliyun-datahub-sdk-go/datahub" 8 | ) 9 | 10 | // subcommands 11 | var ListProjectsCommand *flag.FlagSet 12 | var GetProjectCommand *flag.FlagSet 13 | 14 | // flag arguments 15 | var ProjectName string 16 | 17 | func init() { 18 | // list projects cmd 19 | ListProjectsCommand = flag.NewFlagSet("lp", flag.ExitOnError) 20 | RegisterSubCommand("lp", ListProjectsCommand, list_projects_parsed_check, list_projects) 21 | 22 | // get project cmd 23 | GetProjectCommand = flag.NewFlagSet("gp", flag.ExitOnError) 24 | GetProjectCommand.StringVar(&ProjectName, "project", "", "project name. (Required)") 25 | RegisterSubCommand("gp", GetProjectCommand, get_project_parsed_check, get_project) 26 | } 27 | 28 | func list_projects_parsed_check() bool { 29 | return true 30 | } 31 | 32 | func list_projects(dh datahub.DataHubApi) error { 33 | projects, err := dh.ListProject() 34 | if err != nil { 35 | return err 36 | } 37 | fmt.Println(*projects) 38 | return nil 39 | } 40 | 41 | func get_project_parsed_check() bool { 42 | if ProjectName == "" { 43 | return false 44 | } 45 | return true 46 | } 47 | 48 | func get_project(dh datahub.DataHubApi) error { 49 | project, err := dh.GetProject(ProjectName) 50 | if err != nil { 51 | return err 52 | } 53 | fmt.Println(*project) 54 | return nil 55 | } 56 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/aliyun/aliyun-datahub-sdk-go 2 | 3 | go 1.23.0 4 | 5 | toolchain go1.23.9 6 | 7 | require ( 8 | github.com/aliyun/alibaba-cloud-sdk-go v1.62.709 9 | github.com/aliyun/credentials-go v1.4.8 10 | github.com/golang/protobuf v1.5.4 11 | github.com/hamba/avro/v2 v2.28.0 12 | github.com/json-iterator/go v1.1.12 13 | github.com/klauspost/compress v1.17.11 14 | github.com/pierrec/lz4 v2.6.1+incompatible 15 | github.com/shopspring/decimal v1.3.1 16 | github.com/sirupsen/logrus v1.9.3 17 | github.com/stretchr/testify v1.9.0 18 | google.golang.org/protobuf v1.33.0 19 | ) 20 | 21 | require ( 22 | github.com/alibabacloud-go/debug v1.0.1 // indirect 23 | github.com/alibabacloud-go/tea v1.2.2 // indirect 24 | github.com/davecgh/go-spew v1.1.1 // indirect 25 | github.com/frankban/quicktest v1.14.6 // indirect 26 | github.com/google/go-cmp v0.5.9 // indirect 27 | github.com/jmespath/go-jmespath v0.4.0 // indirect 28 | github.com/mitchellh/mapstructure v1.5.0 // indirect 29 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect 30 | github.com/modern-go/reflect2 v1.0.2 // indirect 31 | github.com/opentracing/opentracing-go v1.2.1-0.20220228012449-10b1cf09e00b // indirect 32 | github.com/pmezard/go-difflib v1.0.0 // indirect 33 | golang.org/x/net v0.20.0 // indirect 34 | golang.org/x/sys v0.16.0 // indirect 35 | gopkg.in/ini.v1 v1.67.0 // indirect 36 | gopkg.in/yaml.v3 v3.0.1 // indirect 37 | ) 38 | -------------------------------------------------------------------------------- /examples/api/shard.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/aliyun/aliyun-datahub-sdk-go/datahub" 7 | ) 8 | 9 | func main() { 10 | dh = datahub.New(accessId, accessKey, endpoint) 11 | 12 | listShard() 13 | 14 | spiltShard() 15 | 16 | mergeShard() 17 | } 18 | 19 | func listShard() { 20 | ls, err := dh.ListShard(projectName, topicName) 21 | if err != nil { 22 | fmt.Println("get shard list failed") 23 | fmt.Println(err) 24 | } 25 | fmt.Println("get shard list successful") 26 | for _, shard := range ls.Shards { 27 | fmt.Println(shard) 28 | } 29 | } 30 | 31 | func spiltShard() { 32 | ss, err := dh.SplitShard(projectName, topicName, spiltShardId) 33 | if err != nil { 34 | fmt.Println("split shard failed") 35 | fmt.Println(err) 36 | } 37 | fmt.Println("split shard successful") 38 | fmt.Println(ss) 39 | 40 | // After splitting, you need to wait for all shard states to be ready 41 | // before you can perform related operations. 42 | dh.WaitAllShardsReady(projectName, topicName) 43 | } 44 | 45 | func mergeShard() { 46 | ms, err := dh.MergeShard(projectName, topicName, mergeShardId, mergeAdjacentShardId) 47 | if err != nil { 48 | fmt.Println("merge shard failed") 49 | fmt.Println(err) 50 | } 51 | fmt.Println("merge shard successful") 52 | fmt.Println(ms) 53 | 54 | // After splitting, you need to wait for all shard states to be ready 55 | // before you can perform related operations. 56 | dh.WaitAllShardsReady(projectName, topicName) 57 | } 58 | -------------------------------------------------------------------------------- /datahub/utils.go: -------------------------------------------------------------------------------- 1 | package datahub 2 | 3 | import ( 4 | "crypto/md5" 5 | "fmt" 6 | "hash/crc32" 7 | "hash/fnv" 8 | "io" 9 | "net" 10 | "os" 11 | "time" 12 | 13 | log "github.com/sirupsen/logrus" 14 | ) 15 | 16 | func calculateCrc32(buf []byte) uint32 { 17 | table := crc32.MakeTable(crc32.Castagnoli) 18 | return crc32.Checksum(buf, table) 19 | } 20 | 21 | func calculateHashCode(input string) (uint32, error) { 22 | fnv32 := fnv.New32a() 23 | _, err := fnv32.Write([]byte(input)) 24 | if err != nil { 25 | return 0, err 26 | } 27 | return fnv32.Sum32(), nil 28 | } 29 | 30 | func calculateMD5(input string) (string, error) { 31 | hasher := md5.New() 32 | _, err := io.WriteString(hasher, input) 33 | if err != nil { 34 | return "", err 35 | } 36 | 37 | hashBytes := hasher.Sum(nil) 38 | return fmt.Sprintf("%x", hashBytes), nil 39 | } 40 | 41 | func getHostIP() (string, error) { 42 | hostname, err := os.Hostname() 43 | if err != nil { 44 | return "", err 45 | } 46 | 47 | ips, err := net.LookupIP(hostname) 48 | if err != nil { 49 | return "", err 50 | } 51 | 52 | for _, ip := range ips { 53 | if ip.To4() != nil && !ip.IsLoopback() { 54 | return ip.String(), nil 55 | } 56 | } 57 | 58 | return "", fmt.Errorf("cannot get host ip") 59 | } 60 | 61 | func withRecover(key string, fn func()) { 62 | defer func() { 63 | if err := recover(); err != nil { 64 | log.Errorf("%s panic, err:%v", key, err) 65 | } 66 | time.Sleep(time.Second) 67 | }() 68 | 69 | fn() 70 | } 71 | -------------------------------------------------------------------------------- /examples/producer/producer.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/aliyun/aliyun-datahub-sdk-go/datahub" 7 | ) 8 | 9 | func check(err error) { 10 | if err != nil { 11 | panic(err) 12 | } 13 | } 14 | 15 | func genRecord(schema *datahub.RecordSchema) datahub.IRecord { 16 | if schema != nil { // TUPLE record 17 | record := datahub.NewTupleRecord(schema) 18 | err := record.SetValueByName("string_field", "test111") 19 | check(err) 20 | err = record.SetValueByName("double_field", 3.145) 21 | check(err) 22 | err = record.SetValueByName("integer_field", 123456) 23 | check(err) 24 | return record 25 | } else { 26 | str := "hello world" 27 | return datahub.NewBlobRecord([]byte(str)) 28 | } 29 | } 30 | 31 | func syncSend() { 32 | cfg := datahub.NewProducerConfig() 33 | cfg.Account = datahub.NewAliyunAccount("ak", "sk") 34 | cfg.Endpoint = "https://dh-cn-wulanchabu.aliyuncs.com" 35 | cfg.Project = "test_project" 36 | cfg.Topic = "test_topic" 37 | 38 | producer := datahub.NewProducer(cfg) 39 | err := producer.Init() 40 | 41 | if err != nil { 42 | panic(err) 43 | } 44 | 45 | schema, err := producer.GetSchema() 46 | if err != nil { 47 | panic(err) 48 | } 49 | 50 | // recommended size 512KB ~ 1MB,cannot exceed 4MB 51 | records := make([]datahub.IRecord, 0) 52 | for i := 0; i < 100; i++ { 53 | records = append(records, genRecord(schema)) 54 | } 55 | 56 | deatils, err := producer.Send(records) 57 | if err != nil { 58 | panic(err) 59 | } 60 | 61 | fmt.Printf("send to shard %s success\n", deatils.ShardId) 62 | } 63 | 64 | func main() { 65 | syncSend() 66 | } 67 | -------------------------------------------------------------------------------- /datahub/config.go: -------------------------------------------------------------------------------- 1 | package datahub 2 | 3 | import ( 4 | "fmt" 5 | "math" 6 | "net/http" 7 | "os" 8 | "runtime" 9 | "time" 10 | ) 11 | 12 | type Protocol int 13 | 14 | const ( 15 | unknownProtocol Protocol = iota 16 | Protobuf 17 | Batch 18 | ) 19 | 20 | type Config struct { 21 | UserAgent string 22 | CompressorType CompressorType 23 | Protocol Protocol 24 | HttpClient *http.Client 25 | } 26 | 27 | func NewDefaultConfig() *Config { 28 | return &Config{ 29 | UserAgent: DefaultUserAgent(), 30 | CompressorType: ZSTD, 31 | Protocol: Batch, 32 | HttpClient: DefaultHttpClient(), 33 | } 34 | } 35 | 36 | // DefaultHttpClient returns a default HTTP client with sensible values. 37 | func DefaultHttpClient() *http.Client { 38 | return &http.Client{ 39 | Transport: &http.Transport{ 40 | DialContext: TraceDialContext(10 * time.Second), 41 | Proxy: http.ProxyFromEnvironment, 42 | MaxIdleConns: math.MaxInt32, 43 | MaxIdleConnsPerHost: math.MaxInt32, 44 | MaxConnsPerHost: math.MaxInt32, 45 | IdleConnTimeout: 30 * time.Second, 46 | TLSHandshakeTimeout: 10 * time.Second, 47 | ExpectContinueTimeout: 1 * time.Second, 48 | ResponseHeaderTimeout: 100 * time.Second, 49 | }, 50 | } 51 | } 52 | 53 | // DefaultUserAgent returns a default user agent 54 | func DefaultUserAgent() string { 55 | hostIp, _ := getHostIP() 56 | return fmt.Sprintf("godatahub/%s %s@%s#%s#%d", DATAHUB_SDK_VERSION, runtime.Version(), runtime.GOOS, hostIp, os.Getpid()) 57 | } 58 | 59 | func defaultClientAgent() string { 60 | hostIp, _ := getHostIP() 61 | return fmt.Sprintf("goclient/%s %s@%s#%s#%d", DATAHUB_SDK_VERSION, runtime.Version(), runtime.GOOS, hostIp, os.Getpid()) 62 | } 63 | -------------------------------------------------------------------------------- /examples/api/project.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/aliyun/aliyun-datahub-sdk-go/datahub" 7 | ) 8 | 9 | func main() { 10 | 11 | dh = datahub.New(accessId, accessKey, endpoint) 12 | createProjet() 13 | listProject() 14 | getProject() 15 | updateProject() 16 | deleteProject() 17 | 18 | } 19 | 20 | func createProjet() { 21 | if _, err := dh.CreateProject(projectName, "project comment"); err != nil { 22 | if _, ok := err.(*datahub.ResourceExistError); ok { 23 | fmt.Println("project already exists") 24 | } else { 25 | fmt.Println("create project failed") 26 | fmt.Println(err) 27 | return 28 | } 29 | } 30 | fmt.Println("create successful") 31 | } 32 | 33 | func deleteProject() { 34 | if _, err := dh.DeleteProject(projectName); err != nil { 35 | if _, ok := err.(*datahub.ResourceNotFoundError); ok { 36 | fmt.Println("project not found") 37 | } else { 38 | fmt.Println("delete project failed") 39 | fmt.Println(err) 40 | return 41 | } 42 | } 43 | fmt.Println("delete project successful") 44 | } 45 | 46 | func listProject() { 47 | lp, err := dh.ListProject() 48 | if err != nil { 49 | fmt.Println("get project list failed") 50 | fmt.Println(err) 51 | return 52 | } 53 | fmt.Println("get project list successful") 54 | for _, projectName := range lp.ProjectNames { 55 | fmt.Println(projectName) 56 | } 57 | } 58 | 59 | func getProject() { 60 | gp, err := dh.GetProject(projectName) 61 | if err != nil { 62 | fmt.Println("get project message failed") 63 | fmt.Println(err) 64 | return 65 | } 66 | fmt.Println("get project message successful") 67 | fmt.Println(*gp) 68 | 69 | } 70 | 71 | func updateProject() { 72 | if _, err := dh.UpdateProject(projectName, "new project comment"); err != nil { 73 | fmt.Println("update project comment failed") 74 | fmt.Println(err) 75 | return 76 | } 77 | fmt.Println("update project comment successful") 78 | } 79 | -------------------------------------------------------------------------------- /datahub/shard.go: -------------------------------------------------------------------------------- 1 | package datahub 2 | 3 | import ( 4 | "fmt" 5 | "math/big" 6 | "strings" 7 | ) 8 | 9 | type ShardEntry struct { 10 | ShardId string `json:"ShardId"` 11 | State ShardState `json:"State"` 12 | BeginHashKey string `json:"BeginHashKey"` 13 | EndHashKey string `json:"EndHashKey"` 14 | ClosedTime int64 `json:"ClosedTime"` 15 | ParentShardIds []string `json:"ParentShardIds"` 16 | LeftShardId string `json:"LeftShardId"` 17 | RightShardId string `json:"RightShardId"` 18 | Address string `json:"Address"` 19 | } 20 | 21 | func generateSpliteKey(projectName, topicName, shardId string, datahub DataHubApi) (string, error) { 22 | ls, err := datahub.ListShard(projectName, topicName) 23 | if err != nil { 24 | return "", err 25 | } 26 | shards := ls.Shards 27 | splitKey := "" 28 | for _, shard := range shards { 29 | if strings.EqualFold(shardId, shard.ShardId) { 30 | if shard.State != ACTIVE { 31 | return "", fmt.Errorf("only active shard can be split,the shard %s state is %s", shard.ShardId, shard.State) 32 | } 33 | splitKey, err = getSplitKey(shard.BeginHashKey, shard.EndHashKey) 34 | splitKey = strings.ToUpper(splitKey) 35 | if err != nil { 36 | return "", err 37 | } 38 | } 39 | } 40 | if splitKey == "" { 41 | return "", fmt.Errorf("shard not exist") 42 | } 43 | return splitKey, nil 44 | } 45 | 46 | func getSplitKey(beginHashKey, endHashKey string) (string, error) { 47 | var begin, end, sum, quo big.Int 48 | base := 16 49 | 50 | if len(beginHashKey) != 32 || len(endHashKey) != 32 { 51 | return "", fmt.Errorf("invalid Hash Key Range") 52 | } 53 | _, ok := begin.SetString(beginHashKey, base) 54 | if !ok { 55 | return "", fmt.Errorf("invalid Hash Key Range") 56 | } 57 | _, ok = end.SetString(endHashKey, base) 58 | if !ok { 59 | return "", fmt.Errorf("invalid Hash Key Range") 60 | } 61 | 62 | sum.Add(&begin, &end) 63 | quo.Quo(&sum, big.NewInt(2)) 64 | return quo.Text(base), nil 65 | 66 | } 67 | -------------------------------------------------------------------------------- /datahub/compress_test.go: -------------------------------------------------------------------------------- 1 | package datahub 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func TestGetCompressTypeFromValue(t *testing.T) { 10 | ret := getCompressTypeFromValue(0) 11 | assert.Equal(t, NOCOMPRESS, ret) 12 | 13 | ret = getCompressTypeFromValue(1) 14 | assert.Equal(t, DEFLATE, ret) 15 | 16 | ret = getCompressTypeFromValue(2) 17 | assert.Equal(t, LZ4, ret) 18 | 19 | ret = getCompressTypeFromValue(3) 20 | assert.Equal(t, ZSTD, ret) 21 | 22 | ret = getCompressTypeFromValue(4) 23 | assert.Equal(t, NOCOMPRESS, ret) 24 | 25 | ret = getCompressTypeFromValue(-1) 26 | assert.Equal(t, NOCOMPRESS, ret) 27 | } 28 | 29 | func TestInvalidLz4(t *testing.T) { 30 | compressor := lz4Compressor{} 31 | 32 | data := []byte("hello") // len=5 33 | cData, err := compressor.Compress(data) 34 | assert.Nil(t, err) 35 | assert.Equal(t, 6, len(cData)) 36 | } 37 | 38 | func TestLz4(t *testing.T) { 39 | compressor := lz4Compressor{} 40 | 41 | data := []byte("hello world,aaaaaaaaaaaaaaaaaaaaa,bbb,cccccccccccccccccccc") 42 | cData, err := compressor.Compress(data) 43 | assert.Nil(t, err) 44 | 45 | rawData, err := compressor.DeCompress(cData, int64(len(data))) 46 | assert.Nil(t, err) 47 | assert.Equal(t, data, rawData) 48 | } 49 | 50 | func TestDefalte(t *testing.T) { 51 | compressor := deflateCompressor{} 52 | 53 | data := []byte("hello world,aaaaaaaaaaaaaaaaaaaaa,bbb,cccccccccccccccccccc") 54 | cData, err := compressor.Compress(data) 55 | assert.Nil(t, err) 56 | 57 | rawData, err := compressor.DeCompress(cData, int64(len(data))) 58 | assert.Nil(t, err) 59 | assert.Equal(t, data, rawData) 60 | } 61 | 62 | func TestZstd(t *testing.T) { 63 | compressor := zstdCompressor{} 64 | 65 | data := []byte("hello world,aaaaaaaaaaaaaaaaaaaaa,bbb,cccccccccccccccccccc") 66 | cData, err := compressor.Compress(data) 67 | assert.Nil(t, err) 68 | 69 | rawData, err := compressor.DeCompress(cData, int64(len(data))) 70 | assert.Nil(t, err) 71 | assert.Equal(t, data, rawData) 72 | } 73 | -------------------------------------------------------------------------------- /examples/api/subscription.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/aliyun/aliyun-datahub-sdk-go/datahub" 7 | ) 8 | 9 | func main() { 10 | dh = datahub.New(accessId, accessKey, endpoint) 11 | } 12 | 13 | func createSubscription() { 14 | csr, err := dh.CreateSubscription(projectName, topicName, "sub comment") 15 | if err != nil { 16 | fmt.Println("create subscription failed") 17 | fmt.Println(err) 18 | return 19 | } 20 | fmt.Println("create subscription successful") 21 | fmt.Println(*csr) 22 | } 23 | 24 | func getSubscription() { 25 | gs, err := dh.GetSubscription(projectName, topicName, subId) 26 | if err != nil { 27 | fmt.Println("get subscription failed") 28 | fmt.Println(err) 29 | return 30 | } 31 | fmt.Println("get subscription successful") 32 | fmt.Println(gs) 33 | } 34 | 35 | func delSubscription() { 36 | if _, err := dh.DeleteSubscription(projectName, topicName, subId); err != nil { 37 | if _, ok := err.(*datahub.ResourceNotFoundError); ok { 38 | fmt.Println("subscription not found") 39 | } else { 40 | fmt.Println("delete subscription failed") 41 | return 42 | } 43 | } 44 | fmt.Println("delete subscription successful") 45 | } 46 | 47 | func listSubscription() { 48 | pageIndex := 1 49 | pageSize := 5 50 | ls, err := dh.ListSubscription(projectName, topicName, pageIndex, pageSize) 51 | if err != nil { 52 | fmt.Println("get subscription list failed") 53 | fmt.Println(err) 54 | return 55 | } 56 | fmt.Println("get subscription list successful") 57 | for _, sub := range ls.Subscriptions { 58 | fmt.Println(sub) 59 | } 60 | } 61 | 62 | func updateSubscription() { 63 | if _, err := dh.UpdateSubscription(projectName, topicName, subId, "new sub comment"); err != nil { 64 | fmt.Println("update subscription comment failed") 65 | fmt.Println(err) 66 | return 67 | } 68 | fmt.Println("update subscription comment successful") 69 | } 70 | 71 | func updateSubState() { 72 | if _, err := dh.UpdateSubscriptionState(projectName, topicName, subId, datahub.SUB_OFFLINE); err != nil { 73 | fmt.Println("update subscription state failed") 74 | fmt.Println(err) 75 | return 76 | } 77 | defer dh.UpdateSubscriptionState(projectName, topicName, subId, datahub.SUB_ONLINE) 78 | fmt.Println("update subscription state successful") 79 | } 80 | -------------------------------------------------------------------------------- /examples/api/offset.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/aliyun/aliyun-datahub-sdk-go/datahub" 7 | ) 8 | 9 | func main() { 10 | dh = datahub.New(accessId, accessKey, endpoint) 11 | } 12 | 13 | func openOffset() { 14 | shardIds := []string{"0", "1", "2"} 15 | oss, err := dh.OpenSubscriptionSession(projectName, topicName, subId, shardIds) 16 | if err != nil { 17 | fmt.Println("open session failed") 18 | fmt.Println(err) 19 | } 20 | fmt.Println("open session successful") 21 | fmt.Println(oss) 22 | } 23 | 24 | func getOffset() { 25 | shardIds := []string{"0", "1", "2"} 26 | gss, err := dh.GetSubscriptionOffset(projectName, topicName, subId, shardIds) 27 | if err != nil { 28 | fmt.Println("get session failed") 29 | fmt.Println(err) 30 | } 31 | fmt.Println("get session successful") 32 | fmt.Println(gss) 33 | } 34 | 35 | func updateOffset() { 36 | shardIds := []string{"0", "1", "2"} 37 | oss, err := dh.OpenSubscriptionSession(projectName, topicName, subId, shardIds) 38 | if err != nil { 39 | fmt.Println("open session failed") 40 | fmt.Println(err) 41 | } 42 | fmt.Println("open session successful") 43 | fmt.Println(oss) 44 | 45 | offset := oss.Offsets["0"] 46 | 47 | // set offset message 48 | offset.Sequence = 900 49 | offset.Timestamp = 1565593166690 50 | 51 | offsetMap := map[string]datahub.SubscriptionOffset{ 52 | "0": offset, 53 | } 54 | if _, err := dh.CommitSubscriptionOffset(projectName, topicName, subId, offsetMap); err != nil { 55 | if _, ok := err.(*datahub.SubscriptionOfflineError); ok { 56 | fmt.Println("the subscription has offline") 57 | } else if _, ok := err.(*datahub.SubscriptionSessionInvalidError); ok { 58 | fmt.Println("the subscription is open elsewhere") 59 | } else if _, ok := err.(*datahub.SubscriptionOffsetResetError); ok { 60 | fmt.Println("the subscription is reset elsewhere") 61 | } else { 62 | fmt.Println(err) 63 | } 64 | fmt.Println("update offset failed") 65 | return 66 | } 67 | fmt.Println("update offset successful") 68 | } 69 | 70 | func resetOffset() { 71 | 72 | offset := datahub.SubscriptionOffset{ 73 | Timestamp: 1565593166690, 74 | } 75 | offsetMap := map[string]datahub.SubscriptionOffset{ 76 | "1": offset, 77 | } 78 | 79 | if _, err := dh.ResetSubscriptionOffset(projectName, topicName, subId, offsetMap); err != nil { 80 | fmt.Println("reset offset failed") 81 | fmt.Println(err) 82 | return 83 | } 84 | fmt.Println("reset offset successful") 85 | } 86 | -------------------------------------------------------------------------------- /datahub/pbmodel/datahub.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto2"; 2 | package pbmodel; 3 | 4 | option java_package = "com.aliyun.datahub.client.model.protobuf"; 5 | option java_outer_classname = "DatahubProtos"; 6 | 7 | message StringPair 8 | { 9 | required string key = 1; 10 | required string value = 2; 11 | } 12 | 13 | message FieldData 14 | { 15 | optional bytes value = 1; 16 | } 17 | 18 | message RecordAttributes 19 | { 20 | repeated StringPair attributes = 1; 21 | } 22 | 23 | message RecordData 24 | { 25 | repeated FieldData data = 1; 26 | } 27 | 28 | message RecordEntry 29 | { 30 | optional string shard_id = 1; 31 | optional string hash_key = 2; 32 | optional string partition_key = 3; 33 | optional string cursor = 4; 34 | optional string next_cursor = 5; 35 | optional int64 sequence = 6; 36 | optional int64 system_time = 7; 37 | optional RecordAttributes attributes = 8; 38 | required RecordData data = 9; 39 | } 40 | 41 | message PutRecordsRequest 42 | { 43 | repeated RecordEntry records = 1; 44 | } 45 | 46 | message FailedRecord 47 | { 48 | required int32 index = 1; 49 | optional string error_code = 2; 50 | optional string error_message = 3; 51 | } 52 | 53 | message PutRecordsResponse 54 | { 55 | optional int32 failed_count = 1; 56 | repeated FailedRecord failed_records = 2; 57 | } 58 | 59 | message GetRecordsRequest 60 | { 61 | required string cursor = 1; 62 | optional int32 limit = 2 [default = 1]; 63 | } 64 | 65 | message GetRecordsResponse 66 | { 67 | required string next_cursor = 1; 68 | required int32 record_count = 2; 69 | optional int64 start_sequence = 3; 70 | repeated RecordEntry records = 4; 71 | optional int64 latest_sequence = 5 [default = -1]; 72 | optional int64 latest_time = 6 [default = -1]; 73 | } 74 | 75 | message BinaryRecordEntry 76 | { 77 | optional string cursor = 1; 78 | optional string next_cursor = 2; 79 | optional int64 sequence = 3; 80 | optional int64 system_time = 4; 81 | optional uint32 serial = 5 [default = 0]; 82 | optional bytes data = 6; 83 | } 84 | 85 | message PutBinaryRecordsRequest 86 | { 87 | repeated BinaryRecordEntry records = 1; 88 | } 89 | 90 | message GetBinaryRecordsResponse 91 | { 92 | required string next_cursor = 1; 93 | required int32 record_count = 2; 94 | optional int64 start_sequence = 3; 95 | repeated BinaryRecordEntry records = 4; 96 | optional int64 latest_sequence = 5 [default = -1]; 97 | optional int64 latest_time = 6 [default = -1]; 98 | } 99 | -------------------------------------------------------------------------------- /test/e2e/parameter.go: -------------------------------------------------------------------------------- 1 | package e2e 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | 7 | "github.com/aliyun/aliyun-datahub-sdk-go/datahub" 8 | ) 9 | 10 | var accessId = "" 11 | var accessKey = "" 12 | var endpoint = "" 13 | var projectName = "" 14 | var tupleTopicName = "" 15 | var blobTopicName = "" 16 | var batchTupleTopicName = "" 17 | var batchBlobTopicName = "" 18 | 19 | var client = datahub.New(accessId, accessKey, endpoint) 20 | var batchClient = datahub.NewBatchClient(accessId, accessKey, endpoint) 21 | 22 | func init() { 23 | var reinit = false 24 | if len(accessId) == 0 { 25 | accessId = os.Getenv("ACCESS_ID") 26 | if len(accessId) > 0 { 27 | reinit = true 28 | fmt.Printf("Inited accessId from ENV ACCESS_ID: %s\n", accessId) 29 | } 30 | } 31 | 32 | if len(accessKey) == 0 { 33 | accessKey = os.Getenv("ACCESS_KEY") 34 | if len(accessKey) > 0 { 35 | reinit = true 36 | fmt.Printf("Inited accessKey from ENV ACCESS_KEY: %s\n", accessKey) 37 | } 38 | } 39 | 40 | if len(endpoint) == 0 { 41 | endpoint = os.Getenv("ENDPOINT") 42 | if len(endpoint) > 0 { 43 | reinit = true 44 | fmt.Printf("Inited endpoint from ENV ENDPOINT: %s\n", endpoint) 45 | } 46 | } 47 | 48 | if reinit == true { 49 | client = datahub.New(accessId, accessKey, endpoint) 50 | batchClient = datahub.NewBatchClient(accessId, accessKey, endpoint) 51 | } 52 | 53 | if len(projectName) == 0 { 54 | projectName = os.Getenv("PROJECT_NAME") 55 | if len(projectName) > 0 { 56 | fmt.Printf("Inited projectName from ENV PROJECT_NAME: %s\n", projectName) 57 | } 58 | } 59 | 60 | if len(tupleTopicName) == 0 { 61 | tupleTopicName = os.Getenv("TUPLE_TOPIC_NAME") 62 | if len(tupleTopicName) > 0 { 63 | fmt.Printf("Inited tupleTopicName from ENV TUPLE_TOPIC_NAME: %s\n", tupleTopicName) 64 | } 65 | } 66 | 67 | if len(blobTopicName) == 0 { 68 | blobTopicName = os.Getenv("BLOB_TOPIC_NAME") 69 | if len(blobTopicName) > 0 { 70 | fmt.Printf("Inited blobTopicName from ENV BLOB_TOPIC_NAME: %s\n", blobTopicName) 71 | } 72 | } 73 | 74 | if len(batchTupleTopicName) == 0 { 75 | batchTupleTopicName = os.Getenv("BATCH_TUPLE_TOPIC_NAME") 76 | if len(batchTupleTopicName) > 0 { 77 | fmt.Printf("Inited batchTupleTopicName from ENV BATCH_TUPLE_TOPIC_NAME: %s\n", batchTupleTopicName) 78 | } 79 | } 80 | 81 | if len(batchBlobTopicName) == 0 { 82 | batchBlobTopicName = os.Getenv("BATCH_BLOB_TOPIC_NAME") 83 | if len(batchBlobTopicName) > 0 { 84 | fmt.Printf("Inited batchBlobTopicName from ENV BATCH_BLOB_TOPIC_NAME: %s\n", batchBlobTopicName) 85 | } 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /examples/async_producer/async_producer.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/aliyun/aliyun-datahub-sdk-go/datahub" 7 | ) 8 | 9 | func check(err error) { 10 | if err != nil { 11 | panic(err) 12 | } 13 | } 14 | 15 | func genRecord(schema *datahub.RecordSchema) datahub.IRecord { 16 | if schema != nil { // TUPLE record 17 | record := datahub.NewTupleRecord(schema) 18 | err := record.SetValueByName("string_field", "test111") 19 | check(err) 20 | err = record.SetValueByName("double_field", 3.145) 21 | check(err) 22 | err = record.SetValueByName("integer_field", 123456) 23 | check(err) 24 | return record 25 | } else { 26 | str := "hello world" 27 | return datahub.NewBlobRecord([]byte(str)) 28 | } 29 | } 30 | 31 | func handleSuccessRun(producer datahub.AsyncProducer) { 32 | for suc := range producer.Successes() { 33 | // handle request success 34 | fmt.Printf("shard:%s, rid:%s, records:%d, latency:%v\n", 35 | suc.ShardId, suc.RequestId, len(suc.Records), suc.Latency) 36 | } 37 | } 38 | 39 | func handleFailedRun(producer datahub.AsyncProducer) { 40 | // handle request failed 41 | for err := range producer.Errors() { 42 | fmt.Printf("shard:%s, records:%d, latency:%v, error:%v\n", 43 | err.ShardId, len(err.Records), err.Latency, err.Err) 44 | } 45 | } 46 | 47 | func asyncWrite() { 48 | cfg := datahub.NewProducerConfig() 49 | cfg.Account = datahub.NewAliyunAccount("ak", "sk") 50 | cfg.Endpoint = "https://dh-cn-hangzhou.aliyuncs.com" 51 | cfg.Project = "test_project" 52 | cfg.Topic = "test_topic" 53 | 54 | producer := datahub.NewAsyncProducer(cfg) 55 | err := producer.Init() 56 | 57 | if err != nil { 58 | panic(err) 59 | } 60 | 61 | schema, err := producer.GetSchema() 62 | if err != nil { 63 | panic(err) 64 | } 65 | 66 | go handleSuccessRun(producer) 67 | go handleFailedRun(producer) 68 | 69 | for i := 0; i < 1000; i++ { 70 | producer.Input() <- genRecord(schema) 71 | } 72 | 73 | err = producer.Close() 74 | if err != nil { 75 | panic(err) 76 | } 77 | } 78 | 79 | func asyncWritewithHash() { 80 | cfg := datahub.NewProducerConfig() 81 | cfg.Account = datahub.NewAliyunAccount("ak", "sk") 82 | cfg.Endpoint = "https://dh-cn-hangzhou.aliyuncs.com" 83 | cfg.Project = "test_project" 84 | cfg.Topic = "test_topic" 85 | 86 | producer := datahub.NewAsyncProducer(cfg) 87 | err := producer.Init() 88 | 89 | if err != nil { 90 | panic(err) 91 | } 92 | 93 | schema, err := producer.GetSchema() 94 | if err != nil { 95 | panic(err) 96 | } 97 | 98 | go handleSuccessRun(producer) 99 | go handleFailedRun(producer) 100 | 101 | for i := 0; i < 1000; i++ { 102 | record := genRecord(schema) 103 | // set partition key, it will decide which shard to write to 104 | record.SetPartitionKey(fmt.Sprintf("pk_%d", i)) 105 | producer.Input() <- genRecord(schema) 106 | } 107 | 108 | err = producer.Close() 109 | if err != nil { 110 | panic(err) 111 | } 112 | } 113 | 114 | func main() { 115 | // normal async write 116 | asyncWrite() 117 | 118 | // async write with hash 119 | asyncWritewithHash() 120 | } 121 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Datahub Golang SDK 2 | 3 | The Project is Datahub Golang SDK. 4 | 5 | To use the SDK, you’ll need [Go setup up on your computer](https://golang.org/doc/install). If you’re not familiar with Go and want to spend a little extra time learning, you can take the [Go tour](https://tour.golang.org/welcome/1) to get started! 6 | 7 | ## Dependencies 8 | 9 | - go(>=1.23.0) 10 | 11 | ## Installation 12 | 13 | - Install the Go Tools 14 | - Download the lastest version from [here](https://golang.org/dl/) 15 | 16 | ``` 17 | tar -C /usr/local -xzf go$VERSION.$OS-$ARCH.tar.gz 18 | ``` 19 | 20 | - Config your local GO workspace [reference](https://golang.org/doc/install#install), and you set the GOPATH environment variable equals your go workspace. 21 | 22 | - Install Datahub Go SDK 23 | 24 | ``` 25 | $ go get github.com/sirupsen/logrus 26 | $ go get -u -insecure github.com/aliyun/aliyun-datahub-sdk-go/datahub 27 | ``` 28 | 29 | - Run Example Tests 30 | 31 | - Modify [example.go](http://github.com/aliyun/aliyun-datahub-sdk-go/blob/master/examples/exampletest/example.go) and config your accessid, accesskey, endpoint, such as: 32 | 33 | ``` 34 | accessid := "**your access id**" 35 | accesskey := "**your access key**" 36 | endpoint := "**the datahub server endpoint**" 37 | project_name := "**your project name**" 38 | ``` 39 | 40 | - Build and Run Tests 41 | 42 | ``` 43 | $ go install github.com/aliyun/aliyun-datahub-sdk-go/examples/exampletest 44 | $ $GOPATH/bin/exampletest 45 | ``` 46 | 47 | 48 | ## More Detail Examples 49 | 50 | - [datahubcmd](http://github.com/aliyun/aliyun-datahub-sdk-go/tree/master/examples/datahubcmd) 51 | 52 | datahubcmd provides a command line runtime tool. 53 | 54 | ``` 55 | $ cd datahubcmd 56 | $ go build * 57 | $ # print usage 58 | $ ./maincmd 59 | $ # list project 60 | $ go run maincmd.go project.go -endpoint -accessid -accesskey subcmd lp 61 | ``` 62 | 63 | - [more specific examples](http://github.com/aliyun/aliyun-datahub-sdk-go/tree/master/examples) 64 | - if your want run project example,modify the project related parameters in [constant.go](http://github.com/aliyun/aliyun-datahub-sdk-go/tree/master/examples/constant.go) 65 | 66 | ``` 67 | accessId = "**your access id**" 68 | accessKey = "**your access key**" 69 | endpoint = "**the datahub server endpoint**" 70 | projectName = "**your project name**" 71 | ``` 72 | 73 | - run example 74 | 75 | You can run directly with```go run constant.go project.go```,or run after build. 76 | 77 | - if your want run other example,you should modify the related parameter,for example,you want run topic example,you should modify the ```topicName``` and ```blobTopicName```,and ensure the project already exits ,and run ```go run constant.go topic.go``` 78 | 79 | ## [more Instructions](http://github.com/aliyun/aliyun-datahub-sdk-go/tree/master/Instructions.md) 80 | 81 | ## source installation 82 | 83 | For a development install, clone the repository and then install from source: 84 | 85 | ``` 86 | git clone http://github.com/aliyun/aliyun-datahub-sdk-go.git 87 | ``` 88 | 89 | ## License 90 | 91 | Licensed under the [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0.html) 92 | -------------------------------------------------------------------------------- /examples/datahubcmd/maincmd.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "fmt" 6 | "os" 7 | 8 | "github.com/aliyun/aliyun-datahub-sdk-go/datahub" 9 | ) 10 | 11 | func Usage() { 12 | fmt.Printf("Usage: %s -endpoint [endpoint] -accessid [accessid] -accesskey [accesskey] subcmd [options]\n"+ 13 | "example:\n"+ 14 | " # list project\n"+ 15 | " $ go run maincmd.go project.go -endpoint -accessid -accesskey subcmd lp\n"+ 16 | "option:\n"+ 17 | " lp\n"+ 18 | " list project\n"+ 19 | " gp -project projectName\n"+ 20 | " get project message\n"+ 21 | " lt -project projectName\n"+ 22 | " list all topic belong to pojectName\n"+ 23 | " gt -project projectName -topic topicName\n"+ 24 | " get topic message\n"+ 25 | " ct -project projectName -topic topicName [-comment topicComment] [-shardcount shardNum] [-type blob/tuple] [-lifecycle lifecycle] [-schema yourSchema(Json type)]\n"+ 26 | " create topic,parameter in [] is Optional, it hava default value\n"+ 27 | " dt -project projectName -topic topicName\n"+ 28 | " delete topic\n"+ 29 | " ut -project projectName -topic topicName -comment topicComment\n"+ 30 | " update topic comment\n"+ 31 | " gr -project projectName -topic topicName -shardid shardId [-timeout timeout]\n"+ 32 | " get record by OLDEST cursor,parameter in [] is Optional, it hava default value\n"+ 33 | " pr -project projectName -topic topicName -shardid shardId -source data(Blob type is file name, Tuple type is json string)\n"+ 34 | " put records\n", os.Args[0]) 35 | 36 | fmt.Println() 37 | flag.PrintDefaults() 38 | } 39 | 40 | type ParsedCheckFunc func() bool 41 | type ExecuteFunc func(datahub.DataHubApi) error 42 | 43 | type SubCommand struct { 44 | Name string 45 | FlagSet *flag.FlagSet 46 | ParsedCheck ParsedCheckFunc 47 | Execute ExecuteFunc 48 | } 49 | 50 | var SubCommands []*SubCommand 51 | 52 | func init() { 53 | SubCommands = make([]*SubCommand, 0, 10) 54 | } 55 | 56 | func RegisterSubCommand(name string, flagset *flag.FlagSet, check ParsedCheckFunc, execute ExecuteFunc) { 57 | subcmd := &SubCommand{ 58 | Name: name, 59 | FlagSet: flagset, 60 | ParsedCheck: check, 61 | Execute: execute, 62 | } 63 | SubCommands = append(SubCommands, subcmd) 64 | } 65 | 66 | func main() { 67 | var endpoint, accessid, accesskey string 68 | flag.StringVar(&endpoint, "endpoint", "", "examples server endpoint. (Required)") 69 | flag.StringVar(&accessid, "accessid", "", "examples account accessid. (Required)") 70 | flag.StringVar(&accesskey, "accesskey", "", "examples account accesskey. (Required)") 71 | 72 | flag.Parse() 73 | 74 | if endpoint == "" || accessid == "" || accesskey == "" || flag.NArg() == 0 { 75 | Usage() 76 | os.Exit(1) 77 | } 78 | 79 | fmt.Println("\n============command result============\n") 80 | 81 | dh := datahub.New(accessid, accesskey, endpoint) 82 | 83 | cmdname := flag.Arg(0) 84 | for _, subcmd := range SubCommands { 85 | if cmdname == subcmd.Name { 86 | subcmd.FlagSet.Parse(os.Args[8:]) 87 | if ok := subcmd.ParsedCheck(); !ok { 88 | fmt.Printf("subcommand %s usage:\n", subcmd.Name) 89 | subcmd.FlagSet.PrintDefaults() 90 | os.Exit(1) 91 | } 92 | err := subcmd.Execute(dh) 93 | if err != nil { 94 | panic(err) 95 | } 96 | } 97 | } 98 | } 99 | -------------------------------------------------------------------------------- /examples/consume/consumption.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "reflect" 6 | "time" 7 | 8 | "github.com/aliyun/aliyun-datahub-sdk-go/datahub" 9 | ) 10 | 11 | func offset_consumption() { 12 | projectName := "test_project" 13 | topicName := "test_topic" 14 | subId := "174479399622950OBW" 15 | shardId := "0" 16 | 17 | // add config to examples client 18 | config := &datahub.Config{ 19 | Protocol: datahub.Batch, 20 | CompressorType: datahub.ZSTD, 21 | } 22 | 23 | account := datahub.NewAliyunAccount("ak", "sk") 24 | dh := datahub.NewClientWithConfig("endpoint", config, account) 25 | 26 | // add your want to open shardId 27 | shardIds := []string{shardId} 28 | session, err := dh.OpenSubscriptionSession(projectName, topicName, subId, shardIds) 29 | if err != nil { 30 | fmt.Println("open session failed") 31 | return 32 | } 33 | 34 | // check if shardId is open 35 | so, ok := session.Offsets[shardId] 36 | if !ok { 37 | fmt.Printf("shardId %s can not open session", shardId) 38 | return 39 | } 40 | var cursor string 41 | // get the next cursor that last committed,so sequence+1 42 | gc, err := dh.GetCursor(projectName, topicName, shardId, datahub.SEQUENCE, so.Sequence+1) 43 | if err != nil { 44 | if _, ok := err.(*datahub.SeekOutOfRangeError); ok { 45 | fmt.Println("sequence is seek out of range, it maybe Expired") 46 | // sequence invalid,get data from the oldest cursor of valid data 47 | gc1, err := dh.GetCursor(projectName, topicName, shardId, datahub.OLDEST) 48 | if err != nil { 49 | return 50 | } 51 | cursor = gc1.Cursor 52 | } else { 53 | fmt.Println("get cursor failed") 54 | fmt.Println(err) 55 | return 56 | } 57 | } else { 58 | cursor = gc.Cursor 59 | } 60 | 61 | // get topic from the topic 62 | topic, err := dh.GetTopic(projectName, topicName) 63 | if err != nil { 64 | fmt.Println("get topic failed") 65 | return 66 | } 67 | // get the num of data 68 | limitNum := 1000 69 | var readNum int64 = 0 70 | for { 71 | res, err := dh.GetTupleRecords(projectName, topicName, shardId, cursor, limitNum, topic.RecordSchema) 72 | if err != nil { 73 | fmt.Println("get error failed") 74 | return 75 | } 76 | // no data ,read later 77 | if res.RecordCount <= 0 { 78 | time.Sleep(time.Second * 5) 79 | fmt.Println("no data now, wait 5 seconds") 80 | } 81 | 82 | for _, record := range res.Records { 83 | tRecord, ok := record.(*datahub.TupleRecord) 84 | if !ok { 85 | fmt.Printf("the record type is %v,not a TupleRecord", reflect.TypeOf(record)) 86 | return 87 | } 88 | // consume data 89 | fmt.Println(tRecord.Values) 90 | 91 | so.Sequence = record.GetSequence() 92 | so.Timestamp = record.GetSystemTime() 93 | readNum++ 94 | if readNum%100 == 0 { 95 | 96 | ms := map[string]datahub.SubscriptionOffset{ 97 | shardId: so, 98 | } 99 | if _, err := dh.CommitSubscriptionOffset(projectName, topicName, subId, ms); err != nil { 100 | if _, ok := err.(*datahub.SubscriptionOffsetResetError); ok { 101 | fmt.Println("subscription is reset in elsewhere") 102 | return 103 | } else if _, ok := err.(*datahub.SubscriptionSessionInvalidError); ok { 104 | fmt.Println("subscription is initialized in elsewhere") 105 | return 106 | } else { 107 | fmt.Println(err) 108 | return 109 | } 110 | } 111 | fmt.Println("commit offset successful") 112 | time.Sleep(time.Second * 5) 113 | } 114 | } 115 | cursor = res.NextCursor 116 | } 117 | } 118 | -------------------------------------------------------------------------------- /datahub/async_producer_test.go: -------------------------------------------------------------------------------- 1 | package datahub 2 | 3 | import ( 4 | "testing" 5 | "time" 6 | 7 | "github.com/stretchr/testify/assert" 8 | ) 9 | 10 | func TestDefaultParitionFuncWithExtend(t *testing.T) { 11 | 12 | topic := &GetTopicResult{ 13 | ExpandMode: ONLY_EXTEND, 14 | } 15 | 16 | shards := make([]ShardEntry, 0) 17 | shards = append(shards, ShardEntry{ 18 | ShardId: "0", 19 | BeginHashKey: "00000000000000000000000000000000", 20 | }) 21 | shards = append(shards, ShardEntry{ 22 | ShardId: "1", 23 | BeginHashKey: "55555555555555555555555555555555", 24 | }) 25 | shards = append(shards, ShardEntry{ 26 | ShardId: "2", 27 | BeginHashKey: "99999999999999999999999999999999", 28 | }) 29 | shards = append(shards, ShardEntry{ 30 | ShardId: "3", 31 | BeginHashKey: "EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE", 32 | }) 33 | 34 | record := NewBlobRecord([]byte("test")) 35 | shardId := DefaultPartitionFunc(topic, shards, record) 36 | assert.Equal(t, shardId, "") 37 | 38 | record.SetShardId("2") 39 | shardId = DefaultPartitionFunc(topic, shards, record) 40 | assert.Equal(t, shardId, "2") 41 | 42 | record.SetShardId("") 43 | record.SetPartitionKey("abcd") 44 | shardId = DefaultPartitionFunc(topic, shards, record) 45 | assert.Equal(t, shardId, "1") 46 | 47 | record.SetPartitionKey("test1") 48 | shardId = DefaultPartitionFunc(topic, shards, record) 49 | assert.Equal(t, shardId, "0") 50 | } 51 | 52 | func TestDefaultParitionFuncWithSplit(t *testing.T) { 53 | topic := &GetTopicResult{ 54 | ExpandMode: SPLIT_EXTEND, 55 | } 56 | 57 | shards := make([]ShardEntry, 0) 58 | shards = append(shards, ShardEntry{ 59 | ShardId: "0", 60 | BeginHashKey: "00000000000000000000000000000000", 61 | }) 62 | shards = append(shards, ShardEntry{ 63 | ShardId: "1", 64 | BeginHashKey: "55555555555555555555555555555555", 65 | }) 66 | shards = append(shards, ShardEntry{ 67 | ShardId: "2", 68 | BeginHashKey: "99999999999999999999999999999999", 69 | }) 70 | shards = append(shards, ShardEntry{ 71 | ShardId: "3", 72 | BeginHashKey: "EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE", 73 | }) 74 | 75 | record := NewBlobRecord([]byte("test")) 76 | shardId := DefaultPartitionFunc(topic, shards, record) 77 | assert.Equal(t, shardId, "") 78 | 79 | record.SetShardId("2") 80 | shardId = DefaultPartitionFunc(topic, shards, record) 81 | assert.Equal(t, shardId, "2") 82 | 83 | record.SetShardId("") 84 | record.SetPartitionKey("abcd") 85 | shardId = DefaultPartitionFunc(topic, shards, record) 86 | assert.Equal(t, shardId, "2") 87 | 88 | record.SetPartitionKey("test1") 89 | shardId = DefaultPartitionFunc(topic, shards, record) 90 | assert.Equal(t, shardId, "1") 91 | } 92 | 93 | func TestBufferHelper(t *testing.T) { 94 | buffer := newBufferHelper(3, 2, time.Second*2) 95 | 96 | buffer.input() <- NewBlobRecord(nil) 97 | buffer.input() <- NewBlobRecord(nil) 98 | buffer.input() <- NewBlobRecord(nil) 99 | 100 | // wait record flush to batch 101 | time.Sleep(50 * time.Millisecond) 102 | assert.Equal(t, len(buffer.output()), 1) 103 | batch := <-buffer.output() 104 | assert.Equal(t, len(batch), 3) 105 | 106 | buffer.input() <- NewBlobRecord(nil) 107 | time.Sleep(time.Millisecond * 1000) 108 | assert.Equal(t, len(buffer.output()), 0) 109 | time.Sleep(time.Millisecond * 1100) 110 | assert.Equal(t, len(buffer.output()), 1) 111 | batch1 := <-buffer.output() 112 | assert.Equal(t, len(batch1), 1) 113 | 114 | buffer.batchInput() <- batch1 115 | batch2 := <-buffer.output() 116 | assert.Equal(t, len(batch2), 1) 117 | } 118 | -------------------------------------------------------------------------------- /datahub/avro_helper.go: -------------------------------------------------------------------------------- 1 | package datahub 2 | 3 | import ( 4 | "fmt" 5 | "sync" 6 | 7 | "github.com/hamba/avro/v2" 8 | ) 9 | 10 | const ( 11 | defaultAvroAttributeName = "__dh_attribute__" 12 | defaultAvroBlobColumnName = "data" 13 | defaultAvroRecordName = "AvroRecord" 14 | defaultInnterAvroRecordName = "InnerAvroRecord" 15 | ) 16 | 17 | var ( 18 | sBlobAvroSchemaOnce sync.Once 19 | sBlobAvroSchema avro.Schema 20 | ) 21 | 22 | func getAvroBlobSchema() avro.Schema { 23 | sBlobAvroSchemaOnce.Do(func() { 24 | sBlobAvroSchema, _ = getAvroSchema(nil) 25 | }) 26 | return sBlobAvroSchema 27 | } 28 | 29 | func getAvroColumnSchema(fieldType FieldType) (avro.Schema, error) { 30 | switch fieldType { 31 | case BOOLEAN: 32 | return avro.NewPrimitiveSchema(avro.Boolean, nil), nil 33 | case TINYINT, SMALLINT, INTEGER: 34 | return avro.NewPrimitiveSchema(avro.Int, nil), nil 35 | case BIGINT, TIMESTAMP: 36 | return avro.NewPrimitiveSchema(avro.Long, nil), nil 37 | case FLOAT: 38 | return avro.NewPrimitiveSchema(avro.Float, nil), nil 39 | case DOUBLE: 40 | return avro.NewPrimitiveSchema(avro.Double, nil), nil 41 | case STRING, JSON, DECIMAL: 42 | return avro.NewPrimitiveSchema(avro.String, nil), nil 43 | default: 44 | // cannot reach here 45 | return nil, fmt.Errorf("unknown field type %s", fieldType) 46 | } 47 | } 48 | 49 | func getAvroField(name string, fieldType FieldType, allowNull bool) (*avro.Field, error) { 50 | schema, err := getAvroColumnSchema(fieldType) 51 | if err != nil { 52 | return nil, err 53 | } 54 | 55 | if allowNull { 56 | schemas := []avro.Schema{avro.NewNullSchema(), schema} 57 | schema, err = avro.NewUnionSchema(schemas) 58 | if err != nil { 59 | return nil, err 60 | } 61 | } 62 | 63 | return avro.NewField(name, schema) 64 | } 65 | 66 | func getAttrAvroField() (*avro.Field, error) { 67 | attrValueSchema, err := getAvroColumnSchema(STRING) 68 | if err != nil { 69 | return nil, err 70 | } 71 | 72 | mapSchema := avro.NewMapSchema(attrValueSchema) 73 | nullMapSchema := []avro.Schema{avro.NewNullSchema(), mapSchema} 74 | attrSchema, err := avro.NewUnionSchema(nullMapSchema) 75 | if err != nil { 76 | return nil, err 77 | } 78 | 79 | return avro.NewField(defaultAvroAttributeName, attrSchema, avro.WithDefault(nil)) 80 | } 81 | 82 | func getTupleFields(schema *RecordSchema) ([]*avro.Field, error) { 83 | avroFields := make([]*avro.Field, 0) 84 | for _, field := range schema.Fields { 85 | avroField, err := getAvroField(field.Name, field.Type, field.AllowNull) 86 | if err != nil { 87 | return nil, err 88 | } 89 | 90 | avroFields = append(avroFields, avroField) 91 | } 92 | return avroFields, nil 93 | } 94 | 95 | func getBlobFields() ([]*avro.Field, error) { 96 | avroFields := make([]*avro.Field, 0) 97 | schema := avro.NewPrimitiveSchema(avro.Bytes, nil) 98 | field, err := avro.NewField(defaultAvroBlobColumnName, schema) 99 | if err != nil { 100 | return nil, err 101 | } 102 | avroFields = append(avroFields, field) 103 | return avroFields, nil 104 | } 105 | 106 | func getAvroSchema(schema *RecordSchema) (avro.Schema, error) { 107 | var avroFields []*avro.Field = nil 108 | var err error = nil 109 | if schema != nil { 110 | avroFields, err = getTupleFields(schema) 111 | } else { 112 | avroFields, err = getBlobFields() 113 | } 114 | if err != nil { 115 | return nil, err 116 | } 117 | 118 | attrField, err := getAttrAvroField() 119 | if err != nil { 120 | return nil, err 121 | } 122 | 123 | avroFields = append(avroFields, attrField) 124 | avroSchema, err := avro.NewRecordSchema(defaultAvroRecordName, "", avroFields) 125 | if err != nil { 126 | return nil, err 127 | } 128 | 129 | return avroSchema, nil 130 | } 131 | -------------------------------------------------------------------------------- /datahub/topic_test.go: -------------------------------------------------------------------------------- 1 | package datahub 2 | 3 | import ( 4 | "encoding/json" 5 | "testing" 6 | 7 | "github.com/stretchr/testify/assert" 8 | ) 9 | 10 | func TestRecordSchema_UnmaschemahalJSON(t *testing.T) { 11 | schema := NewRecordSchema() 12 | 13 | err := json.Unmarshal([]byte(`{ 14 | "fields":[ 15 | {"name":"f1","type":"STRING","notnull":false,"comment":"c1"}, 16 | {"name":"f2","type":"BIGINT","notnull":true,"comment":"c2"}, 17 | {"name":"f3","type":"DOUBLE","notnull":false,"comment":"c3"} 18 | ]}`), schema) 19 | 20 | assert.Nil(t, err) 21 | assert.Equal(t, 3, len(schema.Fields)) 22 | assert.Equal(t, "f1", schema.Fields[0].Name) 23 | assert.Equal(t, STRING, schema.Fields[0].Type) 24 | assert.Equal(t, false, schema.Fields[0].AllowNull) 25 | assert.Equal(t, "c1", schema.Fields[0].Comment) 26 | 27 | assert.Equal(t, "f2", schema.Fields[1].Name) 28 | assert.Equal(t, BIGINT, schema.Fields[1].Type) 29 | assert.Equal(t, true, schema.Fields[1].AllowNull) 30 | assert.Equal(t, "c2", schema.Fields[1].Comment) 31 | 32 | assert.Equal(t, "f3", schema.Fields[2].Name) 33 | assert.Equal(t, DOUBLE, schema.Fields[2].Type) 34 | assert.Equal(t, false, schema.Fields[2].AllowNull) 35 | assert.Equal(t, "c3", schema.Fields[2].Comment) 36 | 37 | assert.Equal(t, 3, len(schema.fieldIndexMap)) 38 | assert.Equal(t, 0, schema.fieldIndexMap["f1"]) 39 | assert.Equal(t, 1, schema.fieldIndexMap["f2"]) 40 | assert.Equal(t, 2, schema.fieldIndexMap["f3"]) 41 | } 42 | 43 | func TestSchemaGetFiled(t *testing.T) { 44 | schema := NewRecordSchema() 45 | 46 | schema.AddField(Field{Name: "f1", Type: BOOLEAN, AllowNull: true, Comment: "test_f1"}) 47 | schema.AddField(Field{Name: "f2", Type: INTEGER, AllowNull: false, Comment: "test_f2"}) 48 | schema.AddField(Field{Name: "f3", Type: STRING, AllowNull: true, Comment: "test_f3"}) 49 | 50 | assert.Equal(t, schema.GetFieldIndex("f1"), 0) 51 | assert.Equal(t, schema.GetFieldIndex("f2"), 1) 52 | assert.Equal(t, schema.GetFieldIndex("f3"), 2) 53 | assert.Equal(t, schema.GetFieldIndex("f4"), -1) 54 | 55 | col, err := schema.GetFieldByName("f1") 56 | assert.Nil(t, err) 57 | assert.Equal(t, col.Name, "f1") 58 | assert.Equal(t, col.Type, BOOLEAN) 59 | assert.Equal(t, col.AllowNull, true) 60 | assert.Equal(t, col.Comment, "test_f1") 61 | col, err = schema.GetFieldByIndex(0) 62 | assert.Nil(t, err) 63 | assert.Equal(t, col.Name, "f1") 64 | assert.Equal(t, col.Type, BOOLEAN) 65 | assert.Equal(t, col.AllowNull, true) 66 | assert.Equal(t, col.Comment, "test_f1") 67 | 68 | col, err = schema.GetFieldByName("f2") 69 | assert.Nil(t, err) 70 | assert.Equal(t, col.Name, "f2") 71 | assert.Equal(t, col.Type, INTEGER) 72 | assert.Equal(t, col.AllowNull, false) 73 | assert.Equal(t, col.Comment, "test_f2") 74 | col, err = schema.GetFieldByIndex(1) 75 | assert.Nil(t, err) 76 | assert.Equal(t, col.Name, "f2") 77 | assert.Equal(t, col.Type, INTEGER) 78 | assert.Equal(t, col.AllowNull, false) 79 | assert.Equal(t, col.Comment, "test_f2") 80 | 81 | col, err = schema.GetFieldByName("f3") 82 | assert.Nil(t, err) 83 | assert.Equal(t, col.Name, "f3") 84 | assert.Equal(t, col.Type, STRING) 85 | assert.Equal(t, col.AllowNull, true) 86 | assert.Equal(t, col.Comment, "test_f3") 87 | col, err = schema.GetFieldByIndex(2) 88 | assert.Nil(t, err) 89 | assert.Equal(t, col.Name, "f3") 90 | assert.Equal(t, col.Type, STRING) 91 | assert.Equal(t, col.AllowNull, true) 92 | assert.Equal(t, col.Comment, "test_f3") 93 | 94 | col, err = schema.GetFieldByName("f4") 95 | assert.Nil(t, col) 96 | assert.True(t, IsFieldNotExistsError(err)) 97 | assert.Equal(t, err.Error(), "field[f4] not exist") 98 | 99 | col, err = schema.GetFieldByIndex(3) 100 | assert.Nil(t, col) 101 | assert.True(t, IsFieldNotExistsError(err)) 102 | assert.Equal(t, err.Error(), "field index[3] out of range") 103 | 104 | col, err = schema.GetFieldByIndex(-1) 105 | assert.Nil(t, col) 106 | assert.True(t, IsFieldNotExistsError(err)) 107 | assert.Equal(t, err.Error(), "field index[-1] out of range") 108 | } 109 | -------------------------------------------------------------------------------- /datahub/account.go: -------------------------------------------------------------------------------- 1 | package datahub 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/aliyun/credentials-go/credentials" 7 | ) 8 | 9 | type Account interface { 10 | fmt.Stringer 11 | GetAccountId() string 12 | GetAccountKey() string 13 | GetSecurityToken() string 14 | } 15 | 16 | type AliyunAccount struct { 17 | // Aliyun Access key ID 18 | AccessId string 19 | 20 | // Aliyun Secret Access Key 21 | AccessKey string 22 | } 23 | 24 | // create new instance 25 | func NewAliyunAccount(accessId, accessKey string) *AliyunAccount { 26 | return &AliyunAccount{ 27 | AccessId: accessId, 28 | AccessKey: accessKey, 29 | } 30 | } 31 | 32 | func (a AliyunAccount) String() string { 33 | return fmt.Sprintf("accessId: %s, accessKey: %s", a.AccessId, a.AccessKey) 34 | } 35 | 36 | func (a AliyunAccount) GetAccountId() string { 37 | return a.AccessId 38 | } 39 | 40 | func (a AliyunAccount) GetAccountKey() string { 41 | return a.AccessKey 42 | } 43 | 44 | func (a AliyunAccount) GetSecurityToken() string { 45 | return "" 46 | } 47 | 48 | type CredentialAccount struct { 49 | provider credentials.Credential 50 | } 51 | 52 | func NewCredentialAccount(credential credentials.Credential) *CredentialAccount { 53 | return &CredentialAccount{ 54 | provider: credential, 55 | } 56 | } 57 | 58 | func (a CredentialAccount) String() string { 59 | credential := a.getCredential() 60 | return fmt.Sprintf("credential: %s", credential.String()) 61 | } 62 | 63 | func (a CredentialAccount) getCredential() *credentials.CredentialModel { 64 | credential, err := a.provider.GetCredential() 65 | if err != nil { 66 | panic(err) 67 | } 68 | return credential 69 | } 70 | 71 | func (a CredentialAccount) GetAccountId() string { 72 | credential := a.getCredential() 73 | return *credential.AccessKeyId 74 | } 75 | 76 | func (a CredentialAccount) GetAccountKey() string { 77 | credential := a.getCredential() 78 | return *credential.AccessKeySecret 79 | } 80 | 81 | func (a CredentialAccount) GetSecurityToken() string { 82 | credential := a.getCredential() 83 | return *credential.SecurityToken 84 | } 85 | 86 | type StsCredential struct { 87 | // Access key ID 88 | AccessId string 89 | 90 | // Secret Access Key 91 | AccessKey string 92 | 93 | // Security Token 94 | SecurityToken string 95 | } 96 | 97 | // create new instance 98 | func NewStsCredential(accessId, accessKey, securityToken string) *StsCredential { 99 | return &StsCredential{ 100 | AccessId: accessId, 101 | AccessKey: accessKey, 102 | SecurityToken: securityToken, 103 | } 104 | } 105 | 106 | func (a StsCredential) String() string { 107 | return fmt.Sprintf("accessId: %s, accessKey: %s, securityToken: %s", a.AccessId, a.AccessKey, a.SecurityToken) 108 | } 109 | 110 | func (a StsCredential) GetAccountId() string { 111 | return a.AccessId 112 | } 113 | 114 | func (a StsCredential) GetAccountKey() string { 115 | return a.AccessKey 116 | } 117 | 118 | func (a StsCredential) GetSecurityToken() string { 119 | return a.SecurityToken 120 | } 121 | 122 | type DwarfCredential struct { 123 | AccessId string 124 | AccessKey string 125 | SecurityToken string 126 | DwarfToken string 127 | DwarfSign string 128 | } 129 | 130 | func NewDwarfCredential(accessId, accessKey, securityToken, dwarfToken, dwarfSign string) *DwarfCredential { 131 | return &DwarfCredential{ 132 | AccessId: accessId, 133 | AccessKey: accessKey, 134 | SecurityToken: securityToken, 135 | DwarfToken: dwarfToken, 136 | DwarfSign: dwarfSign, 137 | } 138 | } 139 | 140 | func (a DwarfCredential) String() string { 141 | return fmt.Sprintf("accessId: %s, accessKey: %s, securityToken: %s, dwarfToken:%s, dwarfSign:%s", 142 | a.AccessId, a.AccessKey, a.SecurityToken, a.DwarfToken, a.DwarfSign) 143 | } 144 | 145 | func (a DwarfCredential) GetAccountId() string { 146 | return a.AccessId 147 | } 148 | 149 | func (a DwarfCredential) GetAccountKey() string { 150 | return a.AccessKey 151 | } 152 | 153 | func (a DwarfCredential) GetSecurityToken() string { 154 | return a.SecurityToken 155 | } 156 | -------------------------------------------------------------------------------- /datahub/record_test.go: -------------------------------------------------------------------------------- 1 | package datahub 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/shopspring/decimal" 7 | "github.com/stretchr/testify/assert" 8 | ) 9 | 10 | func TestTupleRecordFromJson(t *testing.T) { 11 | dhSchema := NewRecordSchema() 12 | dhSchema.AddField(Field{Name: "f1", Type: BOOLEAN, AllowNull: true}) 13 | dhSchema.AddField(Field{Name: "f2", Type: TINYINT, AllowNull: true}) 14 | dhSchema.AddField(Field{Name: "f3", Type: SMALLINT, AllowNull: true}) 15 | dhSchema.AddField(Field{Name: "f4", Type: INTEGER, AllowNull: true}) 16 | dhSchema.AddField(Field{Name: "f5", Type: BIGINT, AllowNull: true}) 17 | dhSchema.AddField(Field{Name: "f6", Type: TIMESTAMP, AllowNull: true}) 18 | dhSchema.AddField(Field{Name: "f7", Type: FLOAT, AllowNull: true}) 19 | dhSchema.AddField(Field{Name: "f8", Type: DOUBLE, AllowNull: true}) 20 | dhSchema.AddField(Field{Name: "f9", Type: DECIMAL, AllowNull: true}) 21 | dhSchema.AddField(Field{Name: "f10", Type: STRING, AllowNull: true}) 22 | dhSchema.AddField(Field{Name: "f11", Type: JSON, AllowNull: true}) 23 | 24 | str := "{\"f1\":false,\"f10\":\"QmbqUcPJjukbGBVtKo7hao5fqBHLFK3qROgQylKw3gW9sZ52hIzgomSF0esBZnVz\",\"f2\":14,\"f3\":20929,\"f4\":1237817139,\"f5\":6855982949904009034,\"f6\":1748599432750,\"f7\":0.7207972,\"f8\":0.3287779159869558,\"f9\":\"0.6769154\"}" 25 | record, err := NewTupleRecordFromJson(dhSchema, []byte(str)) 26 | assert.Nil(t, err) 27 | val, _ := record.GetValueByName("f1") 28 | assert.Equal(t, val, Boolean(false)) 29 | val, _ = record.GetValueByName("f2") 30 | assert.Equal(t, val, Tinyint(14)) 31 | val, _ = record.GetValueByName("f2") 32 | assert.Equal(t, val, Tinyint(14)) 33 | val, _ = record.GetValueByName("f3") 34 | assert.Equal(t, val, Smallint(20929)) 35 | val, _ = record.GetValueByName("f4") 36 | assert.Equal(t, val, Integer(1237817139)) 37 | val, _ = record.GetValueByName("f5") 38 | assert.Equal(t, val, Bigint(6855982949904009034)) 39 | val, _ = record.GetValueByName("f6") 40 | assert.Equal(t, val, Timestamp(1748599432750)) 41 | val, _ = record.GetValueByName("f7") 42 | assert.Equal(t, val, Float(0.7207972)) 43 | val, _ = record.GetValueByName("f8") 44 | assert.Equal(t, val, Double(0.3287779159869558)) 45 | val, _ = record.GetValueByName("f9") 46 | eval, _ := decimal.NewFromString("0.6769154") 47 | assert.Equal(t, val, Decimal(eval)) 48 | val, _ = record.GetValueByName("f10") 49 | assert.Equal(t, val, String("QmbqUcPJjukbGBVtKo7hao5fqBHLFK3qROgQylKw3gW9sZ52hIzgomSF0esBZnVz")) 50 | val, _ = record.GetValueByName("f11") 51 | assert.Nil(t, val) 52 | } 53 | 54 | func TestTupleRecordFromJsonWithNotExistKey(t *testing.T) { 55 | dhSchema := NewRecordSchema() 56 | dhSchema.AddField(Field{Name: "f1", Type: BOOLEAN, AllowNull: true}) 57 | dhSchema.AddField(Field{Name: "f2", Type: INTEGER, AllowNull: true}) 58 | 59 | // column not match 60 | str := "{\"f1\":false,\"f10\":\"Qmbq\"}" 61 | _, err := NewTupleRecordFromJson(dhSchema, []byte(str)) 62 | assert.Equal(t, err.Error(), "field[f10] not exist") 63 | 64 | // ignore column not match 65 | record, err := NewTupleRecordFromJson(dhSchema, []byte(str), WithIgnoreNotExistKey(true)) 66 | assert.Nil(t, err) 67 | val, _ := record.GetValueByName("f1") 68 | assert.Equal(t, val, Boolean(false)) 69 | val, _ = record.GetValueByName("f2") 70 | assert.Nil(t, val) 71 | } 72 | 73 | func TestTupleRecordFromJsonWithFail(t *testing.T) { 74 | dhSchema := NewRecordSchema() 75 | dhSchema.AddField(Field{Name: "f1", Type: BOOLEAN, AllowNull: true}) 76 | dhSchema.AddField(Field{Name: "f2", Type: INTEGER, AllowNull: true}) 77 | 78 | // invalid json 79 | str := "{\"f1\":false,\"f10\":\"Qmbq" 80 | _, err := NewTupleRecordFromJson(dhSchema, []byte(str)) 81 | assert.NotNil(t, err) 82 | 83 | // type not match 84 | str = "{\"f1\":false,\"f1\":\"Qmbq\"}" 85 | _, err = NewTupleRecordFromJson(dhSchema, []byte(str)) 86 | assert.NotNil(t, err) 87 | assert.Equal(t, err.Error(), "value type[string] not match field type[BOOLEAN]") 88 | } 89 | 90 | func TestSetValueError(t *testing.T) { 91 | dhSchema := NewRecordSchema() 92 | dhSchema.AddField(Field{Name: "f1", Type: BOOLEAN, AllowNull: true}) 93 | dhSchema.AddField(Field{Name: "f2", Type: INTEGER, AllowNull: true}) 94 | 95 | record := NewTupleRecord(dhSchema) 96 | err := record.SetValueByIdx(3, 1) 97 | assert.True(t, IsFieldNotExistsError(err)) 98 | 99 | err = record.SetValueByIdx(-1, 1) 100 | assert.True(t, IsFieldNotExistsError(err)) 101 | 102 | err = record.SetValueByName("f3", 1) 103 | assert.True(t, IsFieldNotExistsError(err)) 104 | } 105 | -------------------------------------------------------------------------------- /examples/api/topic.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/aliyun/aliyun-datahub-sdk-go/datahub" 7 | ) 8 | 9 | func main() { 10 | fmt.Println(topicName) 11 | dh = datahub.New(accessId, accessKey, endpoint) 12 | createTupleTopic() 13 | createBlobTopic() 14 | 15 | listTopic() 16 | 17 | getTopic() 18 | 19 | updateTopic() 20 | 21 | deleteTopic() 22 | 23 | } 24 | 25 | func createTupleTopic() { 26 | schema := datahub.NewRecordSchema() 27 | schema.AddField(*datahub.NewField("string_field", datahub.STRING)) 28 | schema.AddField(*datahub.NewFieldWithProp("bigint_field", datahub.BIGINT, false, "test11")) 29 | schema.AddField(*datahub.NewField("double_field", datahub.DOUBLE)) 30 | schema.AddField(*datahub.NewField("boolean_field", datahub.BOOLEAN)) 31 | 32 | if _, err := dh.CreateTupleTopic(projectName, topicName, "topic comment", 7, 3, schema); err != nil { 33 | if _, ok := err.(*datahub.ResourceExistError); ok { 34 | fmt.Println("topic already exists") 35 | } else { 36 | fmt.Println("create topic failed") 37 | fmt.Println(err) 38 | return 39 | } 40 | } 41 | fmt.Println("create topic successful") 42 | } 43 | 44 | func createBlobTopic() { 45 | if _, err := dh.CreateBlobTopic(projectName, blobTopicName, "topic comment", 7, 3); err != nil { 46 | if _, ok := err.(*datahub.ResourceExistError); ok { 47 | fmt.Println("topic already exists") 48 | } else { 49 | fmt.Println("create topic failed") 50 | fmt.Println(err) 51 | return 52 | } 53 | } 54 | fmt.Println("create topic successful") 55 | } 56 | 57 | func listTopic() { 58 | lt, err := dh.ListTopic(projectName) 59 | if err != nil { 60 | fmt.Println("get topic list failed") 61 | return 62 | } 63 | fmt.Println("get topic list successful") 64 | fmt.Println(*lt) 65 | } 66 | 67 | func getTopic() { 68 | gt, err := dh.GetTopic(projectName, topicName) 69 | if err != nil { 70 | fmt.Println("get topic failed") 71 | fmt.Println(err) 72 | return 73 | } 74 | fmt.Println("get topic successful") 75 | fmt.Println(*gt) 76 | } 77 | 78 | func updateTopic() { 79 | if _, err := dh.UpdateTopic(projectName, topicName, "new topic comment"); err != nil { 80 | fmt.Println("update topic comment failed") 81 | fmt.Println(err) 82 | return 83 | } 84 | fmt.Println("update topic comment successful") 85 | } 86 | 87 | func deleteTopic() { 88 | if _, err := dh.DeleteTopic(projectName, topicName); err != nil { 89 | if _, ok := err.(*datahub.ResourceNotFoundError); ok { 90 | fmt.Println("topic not found") 91 | } else { 92 | fmt.Println("delete failed") 93 | return 94 | } 95 | } 96 | fmt.Println("delete successful") 97 | 98 | if _, err := dh.DeleteTopic(projectName, blobTopicName); err != nil { 99 | if _, ok := err.(*datahub.ResourceNotFoundError); ok { 100 | fmt.Println("topic not found") 101 | } else { 102 | fmt.Println("delete failed") 103 | return 104 | } 105 | } 106 | fmt.Println("delete successful") 107 | } 108 | 109 | func appendField() { 110 | field := datahub.Field{ 111 | Name: "newField", 112 | Type: datahub.STRING, 113 | AllowNull: true, 114 | } 115 | _, err := dh.AppendField(projectName, topicName, field) 116 | fmt.Println(err) 117 | } 118 | 119 | func getSchema(dh datahub.DataHub, projectName, topicName string) { 120 | gt, err := dh.GetTopic(projectName, topicName) 121 | if err != nil { 122 | fmt.Println("get topic failed") 123 | fmt.Println(err) 124 | return 125 | } else { 126 | schema := gt.RecordSchema 127 | fmt.Println(schema) 128 | } 129 | } 130 | 131 | func createSchema1() { 132 | schema := datahub.NewRecordSchema() 133 | schema.AddField(*datahub.NewField("field1", datahub.STRING)) 134 | schema.AddField(*datahub.NewFieldWithProp("field2", datahub.BIGINT, false, "comment")) 135 | 136 | fmt.Println(schema) 137 | } 138 | func createSchema2() { 139 | recordSchema := datahub.NewRecordSchema() 140 | recordSchema.AddField(datahub.Field{Name: "bigint_field", Type: datahub.BIGINT, AllowNull: true}) 141 | recordSchema.AddField(datahub.Field{Name: "timestamp_field", Type: datahub.TIMESTAMP, AllowNull: false}) 142 | recordSchema.AddField(datahub.Field{Name: "string_field", Type: datahub.STRING}) 143 | recordSchema.AddField(datahub.Field{Name: "double_field", Type: datahub.DOUBLE}) 144 | recordSchema.AddField(datahub.Field{Name: "boolean_field", Type: datahub.BOOLEAN}) 145 | recordSchema.AddField(datahub.Field{Name: "decimal_field", Type: datahub.DECIMAL}) 146 | } 147 | 148 | func createSchema3() { 149 | str := "{\"fields\":[{\"name\":\"field1\",\"type\":\"STRING\",\"notnull\":true,\"comment\":\"\"},{\"name\":\"field2\",\"type\":\"BIGINT\",\"notnull\":false,\"comment\":\"comment\"}]}" 150 | schema, err := datahub.NewRecordSchemaFromJson(str) 151 | if err != nil { 152 | fmt.Println("create recordSchema failed") 153 | fmt.Println(err) 154 | return 155 | } 156 | fmt.Println("create recordSchema successful") 157 | fmt.Println(schema) 158 | } 159 | -------------------------------------------------------------------------------- /datahub/topic.go: -------------------------------------------------------------------------------- 1 | package datahub 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "strings" 7 | "sync/atomic" 8 | 9 | log "github.com/sirupsen/logrus" 10 | ) 11 | 12 | type Field struct { 13 | Name string `json:"name"` 14 | Type FieldType `json:"type"` 15 | AllowNull bool `json:"notnull"` // Double negation is hard to understand, allownull is easier to understand 16 | Comment string `json:"comment"` 17 | } 18 | 19 | func NewField(name string, Type FieldType) *Field { 20 | return &Field{ 21 | Name: name, 22 | Type: Type, 23 | AllowNull: true, 24 | Comment: "", 25 | } 26 | } 27 | 28 | func NewFieldWithProp(name string, Type FieldType, allowNull bool, comment string) *Field { 29 | return &Field{ 30 | Name: name, 31 | Type: Type, 32 | AllowNull: allowNull, 33 | Comment: comment, 34 | } 35 | } 36 | 37 | // RecordSchema 38 | type RecordSchema struct { 39 | Fields []Field `json:"fields"` 40 | fieldIndexMap map[string]int `json:"-"` 41 | hashVal uint32 42 | } 43 | 44 | // NewRecordSchema create a new record schema for tuple record 45 | func NewRecordSchema() *RecordSchema { 46 | return &RecordSchema{ 47 | Fields: make([]Field, 0), 48 | fieldIndexMap: make(map[string]int), 49 | hashVal: 0, 50 | } 51 | } 52 | 53 | func NewRecordSchemaFromJson(SchemaJson string) (recordSchema *RecordSchema, err error) { 54 | recordSchema = &RecordSchema{} 55 | if err = json.Unmarshal([]byte(SchemaJson), recordSchema); err != nil { 56 | return 57 | } 58 | for _, v := range recordSchema.Fields { 59 | if !validateFieldType(v.Type) { 60 | return nil, fmt.Errorf("field type %q illegal", v.Type) 61 | } 62 | } 63 | return 64 | } 65 | 66 | func (rs *RecordSchema) UnmarshalJSON(data []byte) error { 67 | schema := &struct { 68 | Fields []Field `json:"fields"` 69 | }{} 70 | if err := json.Unmarshal(data, schema); err != nil { 71 | return err 72 | } 73 | 74 | rs.fieldIndexMap = make(map[string]int) 75 | for _, v := range schema.Fields { 76 | if err := rs.AddField(v); err != nil { 77 | return err 78 | } 79 | } 80 | 81 | return nil 82 | } 83 | 84 | func (rs *RecordSchema) HashCode() uint32 { 85 | return rs.hashCode() 86 | } 87 | 88 | func (rs *RecordSchema) hashCode() uint32 { 89 | if val := atomic.LoadUint32(&rs.hashVal); val != 0 { 90 | return val 91 | 92 | } 93 | 94 | schemaStr := rs.String() 95 | newVal, err := calculateHashCode(schemaStr) 96 | if err != nil { 97 | log.Warnf("Calculate hash code failed, schema:%s, error:%v", schemaStr, err) 98 | } 99 | 100 | if atomic.CompareAndSwapUint32(&rs.hashVal, 0, newVal) && log.IsLevelEnabled(log.DebugLevel) { 101 | log.Debugf("Calculate hash code success schema:%s, code:%d", schemaStr, newVal) 102 | return newVal 103 | } else { 104 | return atomic.LoadUint32(&rs.hashVal) 105 | } 106 | } 107 | 108 | func (rs *RecordSchema) String() string { 109 | type FieldHelper struct { 110 | Name string `json:"name"` 111 | Type FieldType `json:"type"` 112 | NotNull bool `json:"notnull,omitempty"` 113 | Comment string `json:"comment,omitempty"` 114 | } 115 | 116 | fields := make([]FieldHelper, 0, rs.Size()) 117 | for _, field := range rs.Fields { 118 | tmpField := FieldHelper{field.Name, field.Type, !field.AllowNull, field.Comment} 119 | fields = append(fields, tmpField) 120 | } 121 | 122 | tmpSchema := struct { 123 | Fields []FieldHelper `json:"fields"` 124 | }{fields} 125 | 126 | buf, _ := json.Marshal(tmpSchema) 127 | return string(buf) 128 | } 129 | 130 | // AddField add a field 131 | func (rs *RecordSchema) AddField(f Field) error { 132 | if !validateFieldType(f.Type) { 133 | return fmt.Errorf("field type %q illegal", f.Type) 134 | } 135 | 136 | f.Name = strings.ToLower(f.Name) 137 | _, exists := rs.fieldIndexMap[f.Name] 138 | if exists { 139 | return fmt.Errorf("field %s duplicated", f.Name) 140 | } 141 | 142 | rs.Fields = append(rs.Fields, f) 143 | rs.fieldIndexMap[f.Name] = len(rs.Fields) - 1 144 | return nil 145 | } 146 | 147 | // GetFieldIndex get index of given field 148 | func (rs *RecordSchema) GetFieldIndex(fname string) int { 149 | name := strings.ToLower(fname) 150 | if idx, ok := rs.fieldIndexMap[name]; ok { 151 | return idx 152 | } 153 | return -1 154 | } 155 | 156 | func (rs *RecordSchema) GetFieldByIndex(idx int) (*Field, error) { 157 | if idx < 0 || idx >= len(rs.Fields) { 158 | return nil, newFieldNotExistsError(fmt.Sprintf("field index[%d] out of range", idx)) 159 | } 160 | 161 | return &rs.Fields[idx], nil 162 | } 163 | 164 | func (rs *RecordSchema) GetFieldByName(fname string) (*Field, error) { 165 | idx := rs.GetFieldIndex(strings.ToLower(fname)) 166 | 167 | if idx == -1 { 168 | return nil, newFieldNotExistsError(fmt.Sprintf("field[%s] not exist", fname)) 169 | } 170 | 171 | return rs.GetFieldByIndex(idx) 172 | } 173 | 174 | // Size get record schema fields size 175 | func (rs *RecordSchema) Size() int { 176 | return len(rs.Fields) 177 | } 178 | 179 | type RecordSchemaInfo struct { 180 | VersionId int `json:"VersionId"` 181 | RecordSchema RecordSchema `json:"RecordSchema"` 182 | } 183 | -------------------------------------------------------------------------------- /datahub/avro_helper_test.go: -------------------------------------------------------------------------------- 1 | package datahub 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/hamba/avro/v2" 7 | "github.com/stretchr/testify/assert" 8 | ) 9 | 10 | func TestGenBlobSchema(t *testing.T) { 11 | avroSchema, err := getAvroSchema(nil) 12 | assert.Nil(t, err) 13 | recordSchema, ok := avroSchema.(*avro.RecordSchema) 14 | assert.True(t, ok) 15 | assert.Equal(t, recordSchema.Name(), "AvroRecord") 16 | assert.Equal(t, len(recordSchema.Fields()), 2) 17 | assert.Equal(t, recordSchema.Fields()[0].String(), "{\"name\":\"data\",\"type\":\"bytes\"}") 18 | assert.Equal(t, recordSchema.Fields()[1].String(), "{\"name\":\"__dh_attribute__\",\"type\":[\"null\",{\"type\":\"map\",\"values\":\"string\"}]}") 19 | } 20 | 21 | func TestGenTupleSchema(t *testing.T) { 22 | dhSchema := NewRecordSchema() 23 | dhSchema.AddField(Field{Name: "f1", Type: BOOLEAN, AllowNull: true}) 24 | dhSchema.AddField(Field{Name: "f2", Type: TINYINT, AllowNull: true}) 25 | dhSchema.AddField(Field{Name: "f3", Type: SMALLINT, AllowNull: true}) 26 | dhSchema.AddField(Field{Name: "f4", Type: INTEGER, AllowNull: true}) 27 | dhSchema.AddField(Field{Name: "f5", Type: BIGINT, AllowNull: true}) 28 | dhSchema.AddField(Field{Name: "f6", Type: TIMESTAMP, AllowNull: true}) 29 | dhSchema.AddField(Field{Name: "f7", Type: FLOAT, AllowNull: true}) 30 | dhSchema.AddField(Field{Name: "f8", Type: DOUBLE, AllowNull: true}) 31 | dhSchema.AddField(Field{Name: "f9", Type: DECIMAL, AllowNull: true}) 32 | dhSchema.AddField(Field{Name: "f10", Type: STRING, AllowNull: true}) 33 | dhSchema.AddField(Field{Name: "f11", Type: JSON, AllowNull: true}) 34 | 35 | avroSchema, err := getAvroSchema(dhSchema) 36 | assert.Nil(t, err) 37 | recordSchema, ok := avroSchema.(*avro.RecordSchema) 38 | assert.True(t, ok) 39 | assert.Equal(t, recordSchema.Name(), "AvroRecord") 40 | assert.Equal(t, len(recordSchema.Fields()), 12) 41 | assert.Equal(t, recordSchema.Fields()[0].String(), "{\"name\":\"f1\",\"type\":[\"null\",\"boolean\"]}") 42 | assert.Equal(t, recordSchema.Fields()[1].String(), "{\"name\":\"f2\",\"type\":[\"null\",\"int\"]}") 43 | assert.Equal(t, recordSchema.Fields()[2].String(), "{\"name\":\"f3\",\"type\":[\"null\",\"int\"]}") 44 | assert.Equal(t, recordSchema.Fields()[3].String(), "{\"name\":\"f4\",\"type\":[\"null\",\"int\"]}") 45 | assert.Equal(t, recordSchema.Fields()[4].String(), "{\"name\":\"f5\",\"type\":[\"null\",\"long\"]}") 46 | assert.Equal(t, recordSchema.Fields()[5].String(), "{\"name\":\"f6\",\"type\":[\"null\",\"long\"]}") 47 | assert.Equal(t, recordSchema.Fields()[6].String(), "{\"name\":\"f7\",\"type\":[\"null\",\"float\"]}") 48 | assert.Equal(t, recordSchema.Fields()[7].String(), "{\"name\":\"f8\",\"type\":[\"null\",\"double\"]}") 49 | assert.Equal(t, recordSchema.Fields()[8].String(), "{\"name\":\"f9\",\"type\":[\"null\",\"string\"]}") 50 | assert.Equal(t, recordSchema.Fields()[9].String(), "{\"name\":\"f10\",\"type\":[\"null\",\"string\"]}") 51 | assert.Equal(t, recordSchema.Fields()[10].String(), "{\"name\":\"f11\",\"type\":[\"null\",\"string\"]}") 52 | assert.Equal(t, recordSchema.Fields()[11].String(), "{\"name\":\"__dh_attribute__\",\"type\":[\"null\",{\"type\":\"map\",\"values\":\"string\"}]}") 53 | } 54 | 55 | func TestGenTupleSchemaWithNotNull(t *testing.T) { 56 | dhSchema := NewRecordSchema() 57 | dhSchema.AddField(Field{Name: "f1", Type: BOOLEAN, AllowNull: false}) 58 | dhSchema.AddField(Field{Name: "f2", Type: TINYINT, AllowNull: false}) 59 | dhSchema.AddField(Field{Name: "f3", Type: SMALLINT, AllowNull: false}) 60 | dhSchema.AddField(Field{Name: "f4", Type: INTEGER, AllowNull: false}) 61 | dhSchema.AddField(Field{Name: "f5", Type: BIGINT, AllowNull: false}) 62 | dhSchema.AddField(Field{Name: "f6", Type: TIMESTAMP, AllowNull: false}) 63 | dhSchema.AddField(Field{Name: "f7", Type: FLOAT, AllowNull: false}) 64 | dhSchema.AddField(Field{Name: "f8", Type: DOUBLE, AllowNull: false}) 65 | dhSchema.AddField(Field{Name: "f9", Type: DECIMAL, AllowNull: false}) 66 | dhSchema.AddField(Field{Name: "f10", Type: STRING, AllowNull: false}) 67 | dhSchema.AddField(Field{Name: "f11", Type: JSON, AllowNull: false}) 68 | 69 | avroSchema, err := getAvroSchema(dhSchema) 70 | assert.Nil(t, err) 71 | recordSchema, ok := avroSchema.(*avro.RecordSchema) 72 | assert.True(t, ok) 73 | assert.Equal(t, recordSchema.Name(), "AvroRecord") 74 | assert.Equal(t, len(recordSchema.Fields()), 12) 75 | assert.Equal(t, recordSchema.Fields()[0].String(), "{\"name\":\"f1\",\"type\":\"boolean\"}") 76 | assert.Equal(t, recordSchema.Fields()[1].String(), "{\"name\":\"f2\",\"type\":\"int\"}") 77 | assert.Equal(t, recordSchema.Fields()[2].String(), "{\"name\":\"f3\",\"type\":\"int\"}") 78 | assert.Equal(t, recordSchema.Fields()[3].String(), "{\"name\":\"f4\",\"type\":\"int\"}") 79 | assert.Equal(t, recordSchema.Fields()[4].String(), "{\"name\":\"f5\",\"type\":\"long\"}") 80 | assert.Equal(t, recordSchema.Fields()[5].String(), "{\"name\":\"f6\",\"type\":\"long\"}") 81 | assert.Equal(t, recordSchema.Fields()[6].String(), "{\"name\":\"f7\",\"type\":\"float\"}") 82 | assert.Equal(t, recordSchema.Fields()[7].String(), "{\"name\":\"f8\",\"type\":\"double\"}") 83 | assert.Equal(t, recordSchema.Fields()[8].String(), "{\"name\":\"f9\",\"type\":\"string\"}") 84 | assert.Equal(t, recordSchema.Fields()[9].String(), "{\"name\":\"f10\",\"type\":\"string\"}") 85 | assert.Equal(t, recordSchema.Fields()[10].String(), "{\"name\":\"f11\",\"type\":\"string\"}") 86 | assert.Equal(t, recordSchema.Fields()[11].String(), "{\"name\":\"__dh_attribute__\",\"type\":[\"null\",{\"type\":\"map\",\"values\":\"string\"}]}") 87 | } 88 | -------------------------------------------------------------------------------- /examples/datahubcmd/record.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/json" 5 | "flag" 6 | "fmt" 7 | "io/ioutil" 8 | "strings" 9 | "time" 10 | 11 | "github.com/aliyun/aliyun-datahub-sdk-go/datahub" 12 | ) 13 | 14 | // subcommands 15 | var PutRecordsCommand *flag.FlagSet 16 | var GetRecordsCommand *flag.FlagSet 17 | 18 | // flag arguments 19 | var ShardId string 20 | var RecordSource string 21 | var Timeout int 22 | 23 | func init() { 24 | PutRecordsCommand = flag.NewFlagSet("pr", flag.ExitOnError) 25 | PutRecordsCommand.StringVar(&ProjectName, "project", "", "project name. (Required)") 26 | PutRecordsCommand.StringVar(&TopicName, "topic", "", "topic name. (Required)") 27 | PutRecordsCommand.StringVar(&ShardId, "shardid", "", "shard id. (Required)") 28 | PutRecordsCommand.StringVar(&RecordSource, "source", "", "record source. (Required. Blob type is file name, Tuple type is json string)") 29 | RegisterSubCommand("pr", PutRecordsCommand, put_records_parsed_check, put_records) 30 | 31 | GetRecordsCommand = flag.NewFlagSet("gr", flag.ExitOnError) 32 | GetRecordsCommand.StringVar(&ProjectName, "project", "", "project name. (Required)") 33 | GetRecordsCommand.StringVar(&TopicName, "topic", "", "topic name. (Required)") 34 | GetRecordsCommand.StringVar(&ShardId, "shardid", "", "shard id. (Required)") 35 | GetRecordsCommand.IntVar(&Timeout, "timeout", 0, "timeout.") 36 | RegisterSubCommand("gr", GetRecordsCommand, get_records_parsed_check, get_records) 37 | } 38 | 39 | func put_records_parsed_check() bool { 40 | if ProjectName == "" || TopicName == "" || ShardId == "" || RecordSource == "" { 41 | return false 42 | } 43 | return true 44 | } 45 | 46 | func put_records(dh datahub.DataHubApi) error { 47 | topic, err := dh.GetTopic(TopicName, ProjectName) 48 | if err != nil { 49 | return err 50 | } 51 | 52 | var records []datahub.IRecord 53 | switch topic.RecordType { 54 | case datahub.BLOB: 55 | dat, err := ioutil.ReadFile(RecordSource) 56 | if err != nil { 57 | return err 58 | } 59 | 60 | records = make([]datahub.IRecord, 1) 61 | record := datahub.NewBlobRecord(dat, 0) 62 | record.ShardId = ShardId 63 | records[0] = record 64 | 65 | case datahub.TUPLE: 66 | recordsData := &struct { 67 | Records []map[string]interface{} `json:records` 68 | }{} 69 | 70 | decoder := json.NewDecoder(strings.NewReader(RecordSource)) 71 | decoder.UseNumber() 72 | err := decoder.Decode(recordsData) 73 | if err != nil { 74 | return err 75 | } 76 | 77 | records = make([]datahub.IRecord, len(recordsData.Records)) 78 | for idx, record_data := range recordsData.Records { 79 | record := datahub.NewTupleRecord(topic.RecordSchema, 0) 80 | for key, val := range record_data { 81 | record.ShardId = ShardId 82 | record.SetValueByName(key, val) 83 | } 84 | records[idx] = record 85 | } 86 | } 87 | 88 | trynum := 0 89 | for { 90 | result, err := dh.PutRecords(ProjectName, TopicName, records) 91 | if err != nil { 92 | return err 93 | } 94 | if len(result.FailedRecords) == 0 { 95 | fmt.Println("put records suc! trynum:", trynum) 96 | return nil 97 | } else { 98 | fail_records := make([]datahub.IRecord, len(result.FailedRecords)) 99 | for idx, failinfo := range result.FailedRecords { 100 | fail_records[idx] = records[failinfo.Index] 101 | } 102 | records = fail_records 103 | } 104 | fmt.Printf("put records failed last time, trynum: %d, result: \n%s\n", trynum, result) 105 | trynum++ 106 | return nil 107 | } 108 | } 109 | 110 | func get_records_parsed_check() bool { 111 | if ProjectName == "" || TopicName == "" || ShardId == "" { 112 | return false 113 | } 114 | return true 115 | } 116 | 117 | func get_records(dh datahub.DataHubApi) error { 118 | topic, err := dh.GetTopic(ProjectName, TopicName) 119 | if err != nil { 120 | return err 121 | } 122 | 123 | gc, err := dh.GetCursor(ProjectName, TopicName, ShardId, datahub.OLDEST) 124 | if err != nil { 125 | return err 126 | } 127 | 128 | rch := make(chan datahub.IRecord, 10) 129 | quit := make(chan int) 130 | 131 | // productor goroutine 132 | go func(dh datahub.DataHubApi, projectName, topicName, shardId, cursor string, schema *datahub.RecordSchema) { 133 | for { 134 | result, err := dh.GetTupleRecords(projectName, topicName, shardId, cursor, 10, schema) 135 | //result, err := dh.GetRecords(topic, shardId, cursor, 10) 136 | if err != nil { 137 | fmt.Println("get records occured error! err=", err) 138 | continue 139 | } 140 | 141 | if len(result.Records) == 0 { 142 | continue 143 | } 144 | 145 | for _, record := range result.Records { 146 | rch <- record 147 | } 148 | cursor = result.NextCursor 149 | } 150 | }(dh, ProjectName, TopicName, ShardId, gc.Cursor, topic.RecordSchema) 151 | 152 | // consumer goroutine 153 | go func(rt datahub.RecordType) { 154 | switch topic.RecordType { 155 | case datahub.BLOB: 156 | for record := range rch { 157 | br := record.(*datahub.BlobRecord) 158 | fmt.Println(br) 159 | } 160 | case datahub.TUPLE: 161 | for record := range rch { 162 | tr := record.(*datahub.TupleRecord) 163 | fmt.Println(tr) 164 | } 165 | } 166 | }(topic.RecordType) 167 | 168 | // timeout goroutine 169 | go func(timeout int) { 170 | if timeout > 0 { 171 | time.Sleep(time.Duration(timeout) * time.Second) 172 | quit <- 1 173 | } 174 | }(Timeout) 175 | 176 | <-quit 177 | fmt.Println("get records main thread is timeout, quit!") 178 | return nil 179 | } 180 | -------------------------------------------------------------------------------- /examples/datahubcmd/topic.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "encoding/json" 6 | "flag" 7 | "fmt" 8 | "strings" 9 | 10 | "github.com/aliyun/aliyun-datahub-sdk-go/datahub" 11 | ) 12 | 13 | // subcommands 14 | var ListTopicsCommand *flag.FlagSet 15 | var GetTopicCommand *flag.FlagSet 16 | var CreateTopicCommand *flag.FlagSet 17 | var DeleteTopicCommand *flag.FlagSet 18 | var UpdateTopicCommand *flag.FlagSet 19 | 20 | // flag arguments 21 | var TopicName string 22 | var ShardCount int 23 | var RecordType string 24 | var RecordSchema string 25 | var Comment string 26 | var Lifecycle int 27 | 28 | func init() { 29 | // list topics cmd 30 | ListTopicsCommand = flag.NewFlagSet("lt", flag.ExitOnError) 31 | ListTopicsCommand.StringVar(&ProjectName, "project", "", "project name. (Required)") 32 | RegisterSubCommand("lt", ListTopicsCommand, list_topics_parsed_check, list_topics) 33 | 34 | // get topic cmd 35 | GetTopicCommand = flag.NewFlagSet("gt", flag.ExitOnError) 36 | GetTopicCommand.StringVar(&ProjectName, "project", "", "project name. (Required)") 37 | GetTopicCommand.StringVar(&TopicName, "topic", "", "topic name. (Required)") 38 | RegisterSubCommand("gt", GetTopicCommand, get_topic_parsed_check, get_topic) 39 | 40 | // create topic cmd 41 | CreateTopicCommand = flag.NewFlagSet("ct", flag.ExitOnError) 42 | CreateTopicCommand.StringVar(&ProjectName, "project", "", "project name. (Required)") 43 | CreateTopicCommand.StringVar(&TopicName, "topic", "", "topic name. (Required)") 44 | CreateTopicCommand.IntVar(&ShardCount, "shardcount", 3, "shard count.") 45 | CreateTopicCommand.StringVar(&RecordType, "type", "blob", "record type.") 46 | CreateTopicCommand.StringVar(&RecordSchema, "schema", "", "record schema. (If type is tuple, it is required, and only json format)") 47 | CreateTopicCommand.StringVar(&Comment, "comment", "topic comment", "topic comment.") 48 | CreateTopicCommand.IntVar(&Lifecycle, "lifecycle", 7, "topic life cycle.") 49 | RegisterSubCommand("ct", CreateTopicCommand, create_topic_parsed_check, create_topic) 50 | 51 | // delete topic cmd 52 | DeleteTopicCommand = flag.NewFlagSet("dt", flag.ExitOnError) 53 | DeleteTopicCommand.StringVar(&ProjectName, "project", "", "project name. (Required)") 54 | DeleteTopicCommand.StringVar(&TopicName, "topic", "", "topic name. (Required)") 55 | RegisterSubCommand("dt", DeleteTopicCommand, delete_topic_parsed_check, delete_topic) 56 | 57 | // update topic cmd 58 | UpdateTopicCommand = flag.NewFlagSet("ut", flag.ExitOnError) 59 | UpdateTopicCommand.StringVar(&ProjectName, "project", "", "project name. (Required)") 60 | UpdateTopicCommand.StringVar(&TopicName, "topic", "", "topic name. (Required)") 61 | UpdateTopicCommand.IntVar(&Lifecycle, "lifecycle", 7, "topic life cycle.") 62 | UpdateTopicCommand.StringVar(&Comment, "comment", "", "topic comment.") 63 | RegisterSubCommand("ut", UpdateTopicCommand, update_topic_parsed_check, update_topic) 64 | } 65 | 66 | func list_topics_parsed_check() bool { 67 | if ProjectName == "" { 68 | return false 69 | } 70 | return true 71 | } 72 | 73 | func list_topics(dh datahub.DataHubApi) error { 74 | topics, err := dh.ListTopic(ProjectName) 75 | if err != nil { 76 | return err 77 | } 78 | fmt.Println(*topics) 79 | return nil 80 | } 81 | 82 | func get_topic_parsed_check() bool { 83 | if ProjectName == "" || TopicName == "" { 84 | return false 85 | } 86 | return true 87 | } 88 | 89 | func get_topic(dh datahub.DataHubApi) error { 90 | topic, err := dh.GetTopic(ProjectName, TopicName) 91 | if err != nil { 92 | return err 93 | } 94 | fmt.Println(*topic) 95 | return nil 96 | } 97 | 98 | func create_topic_parsed_check() bool { 99 | if ProjectName == "" || TopicName == "" { 100 | return false 101 | } 102 | if strings.ToLower(RecordType) == "tuple" && RecordSchema == "" { 103 | return false 104 | } 105 | return true 106 | } 107 | 108 | func create_topic(dh datahub.DataHubApi) error { 109 | 110 | if strings.ToLower(RecordType) == "tuple" { 111 | recordSchema := datahub.NewRecordSchema() 112 | var schameMap map[string]string 113 | buffer := bytes.NewBufferString(RecordSchema) 114 | err := json.Unmarshal(buffer.Bytes(), &schameMap) 115 | if err != nil { 116 | return err 117 | } 118 | for key, val := range schameMap { 119 | field := datahub.Field{ 120 | Name: key, 121 | Type: datahub.FieldType(strings.ToUpper(val)), 122 | } 123 | recordSchema.AddField(field) 124 | } 125 | if _, err := dh.CreateTupleTopic(ProjectName, TopicName, Comment, ShardCount, Lifecycle, recordSchema); err != nil { 126 | return err 127 | } 128 | } else { 129 | if _, err := dh.CreateBlobTopic(ProjectName, TopicName, Comment, ShardCount, Lifecycle); err != nil { 130 | return err 131 | } 132 | } 133 | if ready := dh.WaitAllShardsReadyWithTime(ProjectName, TopicName, 1); ready { 134 | fmt.Printf("all shard ready? %v\n", ready) 135 | } 136 | fmt.Printf("topic create suc!\n") 137 | return nil 138 | } 139 | 140 | func delete_topic_parsed_check() bool { 141 | if ProjectName == "" || TopicName == "" { 142 | return false 143 | } 144 | return true 145 | } 146 | 147 | func delete_topic(dh datahub.DataHubApi) error { 148 | if _, err := dh.DeleteTopic(ProjectName, TopicName); err != nil { 149 | return err 150 | } 151 | fmt.Printf("del %s suc\n", TopicName) 152 | return nil 153 | } 154 | 155 | func update_topic_parsed_check() bool { 156 | if ProjectName == "" || TopicName == "" { 157 | return false 158 | } 159 | return true 160 | } 161 | 162 | func update_topic(dh datahub.DataHubApi) error { 163 | if _, err := dh.UpdateTopic(ProjectName, TopicName, Comment); err != nil { 164 | return err 165 | } 166 | fmt.Printf("update %s suc\n", TopicName) 167 | return nil 168 | } 169 | -------------------------------------------------------------------------------- /datahub/compress.go: -------------------------------------------------------------------------------- 1 | package datahub 2 | 3 | import ( 4 | "bytes" 5 | "compress/zlib" 6 | "io" 7 | "strings" 8 | 9 | "github.com/pierrec/lz4" 10 | 11 | "github.com/klauspost/compress/zstd" 12 | ) 13 | 14 | // compress type 15 | type CompressorType string 16 | 17 | const ( 18 | NOCOMPRESS CompressorType = "" 19 | LZ4 CompressorType = "lz4" 20 | DEFLATE CompressorType = "deflate" 21 | ZLIB CompressorType = "zlib" // Deprecated: Use DEFLATE instead. 22 | ZSTD CompressorType = "zstd" 23 | ) 24 | 25 | // validate that the type is valid 26 | func validateCompressorType(ct CompressorType) bool { 27 | switch ct { 28 | case NOCOMPRESS, LZ4, DEFLATE, ZLIB, ZSTD: 29 | return true 30 | } 31 | return false 32 | } 33 | 34 | func getCompressTypeFromValue(value int) CompressorType { 35 | switch value { 36 | case 0: 37 | return NOCOMPRESS 38 | case 1: 39 | return DEFLATE 40 | case 2: 41 | return LZ4 42 | case 3: 43 | return ZSTD 44 | default: 45 | return NOCOMPRESS 46 | } 47 | } 48 | 49 | func parseCompressType(str string) CompressorType { 50 | lower := strings.ToLower(str) 51 | switch lower { 52 | case "lz4": 53 | return LZ4 54 | case "deflate": 55 | return DEFLATE 56 | case "zlib": 57 | return ZLIB 58 | case "zstd": 59 | return ZSTD 60 | default: 61 | return NOCOMPRESS 62 | } 63 | } 64 | 65 | func (ct *CompressorType) String() string { 66 | return string(*ct) 67 | } 68 | 69 | func (ct *CompressorType) toValue() int { 70 | switch *ct { 71 | case NOCOMPRESS: 72 | return 0 73 | case DEFLATE: 74 | return 1 75 | case LZ4: 76 | return 2 77 | case ZSTD: 78 | return 3 79 | default: 80 | return 0 81 | } 82 | } 83 | 84 | // Compressor is a interface for the compress 85 | type compressor interface { 86 | Compress(data []byte) ([]byte, error) 87 | DeCompress(data []byte, rawSize int64) ([]byte, error) 88 | } 89 | 90 | type lz4Compressor struct { 91 | } 92 | 93 | func (lc *lz4Compressor) Compress(data []byte) ([]byte, error) { 94 | if len(data) == 0 { 95 | return nil, nil 96 | } 97 | 98 | buf := make([]byte, lz4.CompressBlockBound(len(data))) 99 | ht := make([]int, 64<<10) 100 | n, err := lz4.CompressBlock(data, buf, ht) 101 | if err != nil { 102 | return nil, err 103 | } 104 | 105 | if n == 0 { 106 | return data, nil 107 | } 108 | 109 | return buf[:n], nil 110 | } 111 | 112 | func (lc *lz4Compressor) DeCompress(data []byte, rawSize int64) ([]byte, error) { 113 | // Allocated a very large buffer for decompression. 114 | buf := make([]byte, rawSize) 115 | _, err := lz4.UncompressBlock(data, buf) 116 | if err != nil { 117 | return nil, err 118 | } 119 | return buf, nil 120 | } 121 | 122 | type deflateCompressor struct { 123 | } 124 | 125 | func (dc *deflateCompressor) Compress(data []byte) ([]byte, error) { 126 | var buf bytes.Buffer 127 | w := zlib.NewWriter(&buf) 128 | if _, err := w.Write(data); err != nil { 129 | return nil, err 130 | } 131 | if err := w.Close(); err != nil { 132 | return nil, err 133 | } 134 | return buf.Bytes(), nil 135 | } 136 | 137 | func (dc *deflateCompressor) DeCompress(data []byte, rawSize int64) ([]byte, error) { 138 | b := bytes.NewReader(data) 139 | var buf bytes.Buffer 140 | r, _ := zlib.NewReader(b) 141 | if _, err := io.Copy(&buf, r); err != nil { 142 | return nil, err 143 | } 144 | return buf.Bytes(), nil 145 | } 146 | 147 | type zlibCompressor struct { 148 | } 149 | 150 | func (zc *zlibCompressor) Compress(data []byte) ([]byte, error) { 151 | var buf bytes.Buffer 152 | w := zlib.NewWriter(&buf) 153 | if _, err := w.Write(data); err != nil { 154 | return nil, err 155 | } 156 | if err := w.Close(); err != nil { 157 | return nil, err 158 | } 159 | return buf.Bytes(), nil 160 | } 161 | 162 | func (zc *zlibCompressor) DeCompress(data []byte, rawSize int64) ([]byte, error) { 163 | b := bytes.NewReader(data) 164 | var buf bytes.Buffer 165 | r, _ := zlib.NewReader(b) 166 | if _, err := io.Copy(&buf, r); err != nil { 167 | return nil, err 168 | } 169 | return buf.Bytes(), nil 170 | } 171 | 172 | type zstdCompressor struct { 173 | } 174 | 175 | func (zc *zstdCompressor) Compress(data []byte) ([]byte, error) { 176 | buffer := bytes.NewBuffer(make([]byte, 0, 16*1024)) 177 | writer, err := zstd.NewWriter(buffer, zstd.WithEncoderLevel(zstd.SpeedFastest)) 178 | if err != nil { 179 | return nil, err 180 | } 181 | 182 | if _, err := writer.Write(data); err != nil { 183 | return nil, err 184 | } 185 | 186 | if err := writer.Close(); err != nil { 187 | return nil, err 188 | } 189 | 190 | return buffer.Bytes(), nil 191 | } 192 | 193 | func (zc *zstdCompressor) DeCompress(data []byte, rawSize int64) ([]byte, error) { 194 | reader, err := zstd.NewReader(bytes.NewReader(data)) 195 | if err != nil { 196 | return nil, err 197 | } 198 | 199 | defer reader.Close() 200 | 201 | var buf bytes.Buffer 202 | io.Copy(&buf, reader) 203 | return buf.Bytes(), nil 204 | } 205 | 206 | var compressorMap map[CompressorType]compressor = map[CompressorType]compressor{ 207 | LZ4: &lz4Compressor{}, 208 | DEFLATE: &deflateCompressor{}, 209 | ZLIB: &zlibCompressor{}, 210 | ZSTD: &zstdCompressor{}, 211 | } 212 | 213 | func newCompressor(c CompressorType) compressor { 214 | switch CompressorType(c) { 215 | case LZ4: 216 | return &lz4Compressor{} 217 | case DEFLATE: 218 | return &deflateCompressor{} 219 | case ZLIB: 220 | return &zlibCompressor{} 221 | case ZSTD: 222 | return &zstdCompressor{} 223 | default: 224 | return nil 225 | } 226 | } 227 | 228 | func getCompressor(c CompressorType) compressor { 229 | if c == NOCOMPRESS { 230 | return nil 231 | } 232 | ret, ok := compressorMap[c] 233 | if !ok { 234 | com := newCompressor(c) 235 | compressorMap[c] = com 236 | } 237 | return ret 238 | } 239 | -------------------------------------------------------------------------------- /examples/api/connector.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/aliyun/aliyun-datahub-sdk-go/datahub" 7 | ) 8 | 9 | func main() { 10 | dh = datahub.New(accessId, accessKey, endpoint) 11 | } 12 | 13 | func createConnector() { 14 | odpsTimeRange := 60 15 | odpsPartitionMode := datahub.SystemTimeMode 16 | connectorType := datahub.SinkOdps 17 | 18 | odpsPartitionConfig := datahub.NewPartitionConfig() 19 | odpsPartitionConfig.AddConfig("ds", "%Y%m%d") 20 | odpsPartitionConfig.AddConfig("hh", "%H") 21 | odpsPartitionConfig.AddConfig("mm", "%M") 22 | 23 | sinkOdpsConfig := &datahub.SinkOdpsConfig{ 24 | Endpoint: odpsEndpoint, 25 | Project: odpsProject, 26 | Table: odpsTable, 27 | AccessId: odpsAccessId, 28 | AccessKey: odpsAccessKey, 29 | TimeRange: odpsTimeRange, 30 | PartitionMode: odpsPartitionMode, 31 | PartitionConfig: *odpsPartitionConfig, 32 | } 33 | 34 | fileds := []string{"field1", "field2"} 35 | 36 | ccr, err := dh.CreateConnector(projectName, topicName, connectorType, fileds, *sinkOdpsConfig) 37 | if err != nil { 38 | fmt.Println("create odps connector failed") 39 | fmt.Println(err) 40 | return 41 | } 42 | fmt.Println("create odps connector successful") 43 | fmt.Println(*ccr) 44 | 45 | } 46 | 47 | func getConnector() { 48 | gcr, err := dh.GetConnector(projectName, topicName, connectorId) 49 | if err != nil { 50 | fmt.Println("get odps conector failed") 51 | fmt.Println(err) 52 | return 53 | } 54 | fmt.Println("get odps conector successful") 55 | fmt.Println(*gcr) 56 | } 57 | 58 | func updateConnector() { 59 | gc, err := dh.GetConnector(projectName, topicName, connectorId) 60 | if err != nil { 61 | fmt.Println("get odps connector failed") 62 | fmt.Println(err) 63 | return 64 | } 65 | config, ok := gc.Config.(datahub.SinkOdpsConfig) 66 | if !ok { 67 | fmt.Println("convert config to SinkOdpsConfig failed") 68 | return 69 | } 70 | 71 | // modify the config 72 | config.TimeRange = 200 73 | 74 | if _, err := dh.UpdateConnector(projectName, topicName, connectorId, config); err != nil { 75 | fmt.Println("update odps config failed") 76 | fmt.Println(err) 77 | return 78 | } 79 | fmt.Println("update odps config successful") 80 | } 81 | 82 | func listConnector() { 83 | lc, err := dh.ListConnector(projectName, topicName) 84 | if err != nil { 85 | fmt.Println("get connector list failed") 86 | fmt.Println(err) 87 | return 88 | } 89 | fmt.Println("get connector list successful") 90 | fmt.Println(*lc) 91 | } 92 | 93 | func deleteConnector() { 94 | if _, err := dh.DeleteConnector(projectName, topicName, connectorId); err != nil { 95 | if _, ok := err.(*datahub.ResourceNotFoundError); ok { 96 | fmt.Println("odps connector not found") 97 | } else { 98 | fmt.Println("delete odps connector failed") 99 | fmt.Println(err) 100 | return 101 | } 102 | } 103 | fmt.Println("delete odps connector successful") 104 | } 105 | 106 | func reloadConnector() { 107 | if _, err := dh.ReloadConnector(projectName, topicName, connectorId); err != nil { 108 | fmt.Println("reload connector shard failed") 109 | fmt.Println(err) 110 | return 111 | } 112 | fmt.Println("reload connector shard successful") 113 | 114 | shardId := "2" 115 | if _, err := dh.ReloadConnectorByShard(projectName, topicName, connectorId, shardId); err != nil { 116 | fmt.Println("reload connector shard failed") 117 | fmt.Println(err) 118 | return 119 | } 120 | fmt.Println("reload connector shard successful") 121 | } 122 | 123 | func updateConnectorState() { 124 | if _, err := dh.UpdateConnectorState(projectName, topicName, connectorId, datahub.ConnectorStopped); err != nil { 125 | fmt.Println("update connector state failed") 126 | fmt.Println(err) 127 | return 128 | } 129 | fmt.Println("update connector state successful") 130 | 131 | if _, err := dh.UpdateConnectorState(projectName, topicName, connectorId, datahub.ConnectorRunning); err != nil { 132 | fmt.Println("update connector state failed") 133 | fmt.Println(err) 134 | return 135 | } 136 | fmt.Println("update connector state successful") 137 | } 138 | 139 | func getConnectorShardStatus() { 140 | gcs, err := dh.GetConnectorShardStatus(projectName, topicName, connectorId) 141 | if err != nil { 142 | fmt.Println("get connector shard status failed") 143 | fmt.Println(err) 144 | return 145 | } 146 | fmt.Println("get connector shard status successful") 147 | for shard, status := range gcs.ShardStatus { 148 | fmt.Println(shard, status.State) 149 | } 150 | 151 | shardId := "0" 152 | gc, err := dh.GetConnectorShardStatusByShard(projectName, topicName, connectorId, shardId) 153 | if err != nil { 154 | fmt.Println("get connector shard status failed") 155 | fmt.Println(err) 156 | return 157 | } 158 | fmt.Println("get connector shard status successful") 159 | fmt.Println(*gc) 160 | } 161 | 162 | func updateConnectorOffset() { 163 | shardId := "10" 164 | offset := datahub.ConnectorOffset{ 165 | Timestamp: 1565864139000, 166 | Sequence: 104, 167 | } 168 | 169 | dh.UpdateConnectorState(projectName, topicName, connectorId, datahub.ConnectorStopped) 170 | defer dh.UpdateConnectorState(projectName, topicName, connectorId, datahub.ConnectorRunning) 171 | if err, _ := dh.UpdateConnectorOffset(projectName, topicName, connectorId, shardId, offset); err != nil { 172 | fmt.Println("update connector offset failed") 173 | fmt.Println(err) 174 | return 175 | } 176 | fmt.Println("update connector offset successful") 177 | } 178 | 179 | func doneTime() { 180 | 181 | gcd, err := dh.GetConnectorDoneTime(projectName, topicName, connectorId) 182 | if err != nil { 183 | fmt.Println("get connector done time failed") 184 | fmt.Println(err) 185 | return 186 | } 187 | fmt.Println("get connector done time successful") 188 | fmt.Println(gcd.DoneTime) 189 | } 190 | 191 | func appendConnectorField(dh datahub.DataHub, projectName, topicName, connectorId string) { 192 | if _, err := dh.AppendConnectorField(projectName, topicName, connectorId, "field2"); err != nil { 193 | fmt.Println("append filed failed") 194 | fmt.Println(err) 195 | return 196 | } 197 | fmt.Println("append filed successful") 198 | } 199 | -------------------------------------------------------------------------------- /datahub/data_serializer_test.go: -------------------------------------------------------------------------------- 1 | package datahub 2 | 3 | import ( 4 | "encoding/json" 5 | "math" 6 | "math/rand" 7 | "testing" 8 | "time" 9 | 10 | "github.com/hamba/avro/v2" 11 | "github.com/shopspring/decimal" 12 | "github.com/stretchr/testify/assert" 13 | ) 14 | 15 | type topicSchemaCacheForTest struct { 16 | avroSchema avro.Schema 17 | dhSchema *RecordSchema 18 | } 19 | 20 | func (tsc *topicSchemaCacheForTest) getMaxSchemaVersionId() int { 21 | return 1 22 | } 23 | func (tsc *topicSchemaCacheForTest) getSchemaByVersionId(versionId int) *RecordSchema { 24 | return tsc.dhSchema 25 | } 26 | func (tsc *topicSchemaCacheForTest) getVersionIdBySchema(schema *RecordSchema) int { 27 | return 1 28 | } 29 | func (tsc *topicSchemaCacheForTest) getAvroSchema(schema *RecordSchema) avro.Schema { 30 | return tsc.avroSchema 31 | } 32 | func (tsc *topicSchemaCacheForTest) getAvroSchemaByVersionId(versionId int) avro.Schema { 33 | return tsc.avroSchema 34 | } 35 | 36 | func randomString(length int) string { 37 | const letterBytes = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" 38 | s := make([]byte, 0) 39 | for i := 0; i < length; i++ { 40 | s = append(s, letterBytes[rand.Int()%len(letterBytes)]) 41 | } 42 | 43 | return string(s) 44 | } 45 | 46 | func randomJson() string { 47 | m := make(map[string]string) 48 | m[randomString(10)] = randomString(20) 49 | m[randomString(10)] = randomString(20) 50 | m[randomString(10)] = randomString(20) 51 | m[randomString(10)] = randomString(20) 52 | val, err := json.Marshal(m) 53 | if err != nil { 54 | panic(err) 55 | } 56 | return string(val) 57 | } 58 | 59 | func genTupleRecord(schema *RecordSchema) *TupleRecord { 60 | return genTupleRecordWithNull(schema, 0) 61 | } 62 | 63 | func genTupleRecordWithNull(schema *RecordSchema, nullRatio int) *TupleRecord { 64 | record := NewTupleRecord(schema) 65 | for _, field := range schema.Fields { 66 | if nullRatio > 0 && rand.Intn(100) < nullRatio { 67 | record.SetValueByName(field.Name, nil) 68 | continue 69 | } 70 | switch field.Type { 71 | case BOOLEAN: 72 | record.SetValueByName(field.Name, rand.Int()%2 == 0) 73 | case TINYINT: 74 | record.SetValueByName(field.Name, rand.Intn(math.MaxInt8)) 75 | case SMALLINT: 76 | record.SetValueByName(field.Name, rand.Intn(math.MaxInt16)) 77 | case INTEGER: 78 | record.SetValueByName(field.Name, rand.Intn(math.MaxInt32)) 79 | case BIGINT: 80 | record.SetValueByName(field.Name, rand.Int63()) 81 | case TIMESTAMP: 82 | record.SetValueByName(field.Name, time.Now().UnixMilli()) 83 | case FLOAT: 84 | record.SetValueByName(field.Name, rand.Float32()) 85 | case DOUBLE: 86 | record.SetValueByName(field.Name, rand.Float64()) 87 | case STRING: 88 | record.SetValueByName(field.Name, randomString(64)) 89 | case DECIMAL: 90 | record.SetValueByName(field.Name, decimal.NewFromFloat32(rand.Float32())) 91 | case JSON: 92 | record.SetValueByName(field.Name, randomJson()) 93 | } 94 | } 95 | record.SetAttribute(randomString(5), randomString(10)) 96 | record.SetAttribute(randomString(5), randomString(10)) 97 | return record 98 | } 99 | 100 | // func genBlobRecord() *TupleRecord { 101 | // } 102 | 103 | func TestAvroNormalSerialize(t *testing.T) { 104 | dhSchema := NewRecordSchema() 105 | dhSchema.AddField(Field{Name: "f1", Type: BOOLEAN, AllowNull: true}) 106 | dhSchema.AddField(Field{Name: "f2", Type: TINYINT, AllowNull: true}) 107 | dhSchema.AddField(Field{Name: "f3", Type: SMALLINT, AllowNull: true}) 108 | dhSchema.AddField(Field{Name: "f4", Type: INTEGER, AllowNull: true}) 109 | dhSchema.AddField(Field{Name: "f5", Type: BIGINT, AllowNull: true}) 110 | dhSchema.AddField(Field{Name: "f6", Type: TIMESTAMP, AllowNull: true}) 111 | dhSchema.AddField(Field{Name: "f7", Type: FLOAT, AllowNull: true}) 112 | dhSchema.AddField(Field{Name: "f8", Type: DOUBLE, AllowNull: true}) 113 | dhSchema.AddField(Field{Name: "f9", Type: DECIMAL, AllowNull: true}) 114 | dhSchema.AddField(Field{Name: "f10", Type: STRING, AllowNull: true}) 115 | dhSchema.AddField(Field{Name: "f11", Type: JSON, AllowNull: true}) 116 | 117 | avroSchema, _ := getAvroSchema(dhSchema) 118 | cache := topicSchemaCacheForTest{ 119 | avroSchema: avroSchema, 120 | dhSchema: dhSchema, 121 | } 122 | 123 | ser := newDataSerializer(&cache) 124 | 125 | recordNum := 100 126 | records := make([]IRecord, 0) 127 | for i := 0; i < recordNum; i++ { 128 | records = append(records, genTupleRecord(dhSchema)) 129 | } 130 | 131 | buf, err := ser.serialize(records) 132 | assert.Nil(t, err) 133 | 134 | header := batchHeader{ 135 | schemaVersion: 1, 136 | recordCount: int32(recordNum), 137 | } 138 | dser := newDataDeserializer(&cache) 139 | newRecords, err := dser.deserialize(buf, &header) 140 | assert.Nil(t, err) 141 | 142 | assert.Equal(t, len(records), len(newRecords)) 143 | for i := 0; i < len(records); i++ { 144 | assert.Equal(t, records[i].GetAttributes(), newRecords[i].GetAttributes()) 145 | assert.Equal(t, records[i].GetData(), newRecords[i].GetData()) 146 | } 147 | } 148 | 149 | func TestAvroSerializeWithNullValue(t *testing.T) { 150 | dhSchema := NewRecordSchema() 151 | dhSchema.AddField(Field{Name: "f1", Type: BOOLEAN, AllowNull: true}) 152 | dhSchema.AddField(Field{Name: "f2", Type: TINYINT, AllowNull: true}) 153 | dhSchema.AddField(Field{Name: "f3", Type: SMALLINT, AllowNull: true}) 154 | dhSchema.AddField(Field{Name: "f4", Type: INTEGER, AllowNull: true}) 155 | dhSchema.AddField(Field{Name: "f5", Type: BIGINT, AllowNull: true}) 156 | dhSchema.AddField(Field{Name: "f6", Type: TIMESTAMP, AllowNull: true}) 157 | dhSchema.AddField(Field{Name: "f7", Type: FLOAT, AllowNull: true}) 158 | dhSchema.AddField(Field{Name: "f8", Type: DOUBLE, AllowNull: true}) 159 | dhSchema.AddField(Field{Name: "f9", Type: DECIMAL, AllowNull: true}) 160 | dhSchema.AddField(Field{Name: "f10", Type: STRING, AllowNull: true}) 161 | dhSchema.AddField(Field{Name: "f11", Type: JSON, AllowNull: true}) 162 | 163 | avroSchema, _ := getAvroSchema(dhSchema) 164 | cache := topicSchemaCacheForTest{ 165 | avroSchema: avroSchema, 166 | dhSchema: dhSchema, 167 | } 168 | 169 | ser := newDataSerializer(&cache) 170 | 171 | recordNum := 100 172 | records := make([]IRecord, 0) 173 | for i := 0; i < recordNum; i++ { 174 | records = append(records, genTupleRecordWithNull(dhSchema, 20)) 175 | } 176 | 177 | buf, err := ser.serialize(records) 178 | assert.Nil(t, err) 179 | 180 | header := batchHeader{ 181 | schemaVersion: 1, 182 | recordCount: int32(recordNum), 183 | } 184 | dser := newDataDeserializer(&cache) 185 | newRecords, err := dser.deserialize(buf, &header) 186 | assert.Nil(t, err) 187 | 188 | assert.Equal(t, len(records), len(newRecords)) 189 | for i := 0; i < len(records); i++ { 190 | assert.Equal(t, records[i].GetAttributes(), newRecords[i].GetAttributes()) 191 | assert.Equal(t, records[i].GetData(), newRecords[i].GetData()) 192 | } 193 | } 194 | -------------------------------------------------------------------------------- /datahub/schemaclient.go: -------------------------------------------------------------------------------- 1 | package datahub 2 | 3 | import ( 4 | "fmt" 5 | "math" 6 | "sync" 7 | "sync/atomic" 8 | "time" 9 | 10 | "github.com/hamba/avro/v2" 11 | 12 | log "github.com/sirupsen/logrus" 13 | ) 14 | 15 | const ( 16 | invalidSchemaVersionId = math.MinInt32 17 | blobSchemaVersionId = -1 18 | ) 19 | 20 | var ( 21 | sSchemaOnce sync.Once 22 | sSchemaClient schemaClient 23 | ) 24 | 25 | type topicSchemaCache interface { 26 | getMaxSchemaVersionId() int 27 | getSchemaByVersionId(versionId int) *RecordSchema 28 | getVersionIdBySchema(schema *RecordSchema) int 29 | getAvroSchema(schema *RecordSchema) avro.Schema 30 | getAvroSchemaByVersionId(versionId int) avro.Schema 31 | } 32 | 33 | type topicSchemaItem struct { 34 | accessTime atomic.Value 35 | cache topicSchemaCache 36 | } 37 | 38 | func NewTopicSchemaCache(project string, topic string, client DataHubApi) *topicSchemaItem { 39 | var now atomic.Value 40 | now.Store(time.Now()) 41 | return &topicSchemaItem{ 42 | accessTime: now, 43 | cache: &topicSchemaCacheImpl{ 44 | client: client, 45 | project: project, 46 | topic: topic, 47 | maxSchemaVersionId: -1, 48 | nextFreshTime: now, 49 | }, 50 | } 51 | } 52 | 53 | func (tsi *topicSchemaItem) getSchemaCache() topicSchemaCache { 54 | tsi.accessTime.Store(time.Now()) 55 | return tsi.cache 56 | } 57 | 58 | type schemaClient struct { 59 | lock sync.RWMutex 60 | topicCache map[string]*topicSchemaItem 61 | } 62 | 63 | func schemaClientInstance() *schemaClient { 64 | sSchemaOnce.Do(func() { 65 | sSchemaClient = schemaClient{ 66 | topicCache: map[string]*topicSchemaItem{}, 67 | } 68 | }) 69 | 70 | return &sSchemaClient 71 | } 72 | 73 | func getTopicKey(project, topic string) string { 74 | return fmt.Sprintf("%s/%s", project, topic) 75 | } 76 | 77 | func (sc *schemaClient) addTopicSchemaCache(project, topic string, client DataHubApi) topicSchemaCache { 78 | sc.lock.Lock() 79 | defer sc.lock.Unlock() 80 | 81 | // only ensure not to continue growing 82 | for k, v := range sc.topicCache { 83 | if time.Since(v.accessTime.Load().(time.Time)) > time.Duration(5)*time.Minute { 84 | delete(sc.topicCache, k) 85 | } 86 | } 87 | 88 | cache := NewTopicSchemaCache(project, topic, client) 89 | sc.topicCache[getTopicKey(project, topic)] = cache 90 | return cache.getSchemaCache() 91 | } 92 | 93 | func (sc *schemaClient) getTopicSchemaCache(project, topic string, client DataHubApi) topicSchemaCache { 94 | sc.lock.RLock() 95 | 96 | cache, exists := sc.topicCache[getTopicKey(project, topic)] 97 | if exists { 98 | defer sc.lock.RUnlock() 99 | return cache.getSchemaCache() 100 | } 101 | 102 | sc.lock.RUnlock() 103 | return sc.addTopicSchemaCache(project, topic, client) 104 | } 105 | 106 | type SchemaItem struct { 107 | dhSchema *RecordSchema 108 | avroSchema avro.Schema 109 | } 110 | 111 | type topicSchemaCacheImpl struct { 112 | client DataHubApi 113 | project string 114 | topic string 115 | topicResult *GetTopicResult 116 | maxSchemaVersionId int 117 | schemaMap map[uint32]int 118 | versionMap map[int]SchemaItem 119 | nextFreshTime atomic.Value 120 | lock sync.RWMutex 121 | } 122 | 123 | func (tsc *topicSchemaCacheImpl) freshSchema(force bool) error { 124 | nextTime := tsc.nextFreshTime.Load().(time.Time) 125 | if !force && time.Now().Before(nextTime) { 126 | return nil 127 | } 128 | 129 | // pervent fresh shard by multi goroutine 130 | newNextTime := time.Now().Add(time.Duration(5) * time.Minute) 131 | if !tsc.nextFreshTime.CompareAndSwap(nextTime, newNextTime) { 132 | return nil 133 | } 134 | 135 | var err error 136 | tsc.topicResult, err = tsc.client.GetTopic(tsc.project, tsc.topic) 137 | if err != nil { 138 | return err 139 | } 140 | 141 | res, err := tsc.client.ListTopicSchema(tsc.project, tsc.topic) 142 | if err != nil { 143 | return err 144 | } 145 | 146 | newSchemaList := make([]int, 0) 147 | newSchemaMap := map[uint32]int{} 148 | newVersionMap := map[int]SchemaItem{} 149 | maxVersion := -1 150 | for _, schema := range res.SchemaInfoList { 151 | avroSchema, err := getAvroSchema(&schema.RecordSchema) 152 | if err != nil { 153 | log.Errorf("%s/%s fresh schema failed, error:%v", tsc.project, tsc.topic, err) 154 | return err 155 | } 156 | 157 | if schema.VersionId > maxVersion { 158 | maxVersion = schema.VersionId 159 | } 160 | 161 | newSchemaList = append(newSchemaList, schema.VersionId) 162 | newVersionMap[schema.VersionId] = SchemaItem{ 163 | avroSchema: avroSchema, 164 | dhSchema: &schema.RecordSchema, 165 | } 166 | newSchemaMap[schema.RecordSchema.hashCode()] = schema.VersionId 167 | } 168 | 169 | update := false 170 | tsc.lock.RLock() 171 | if len(newVersionMap) != len(tsc.versionMap) { 172 | update = true 173 | } else { 174 | for versionId := range tsc.versionMap { 175 | if _, ok := newVersionMap[versionId]; !ok { 176 | update = true 177 | break 178 | } 179 | } 180 | } 181 | tsc.lock.RUnlock() 182 | 183 | if !update { 184 | log.Infof("%s/%s fresh schema success, no schema change", tsc.project, tsc.topic) 185 | } else { 186 | tsc.lock.Lock() 187 | defer tsc.lock.Unlock() 188 | tsc.maxSchemaVersionId = maxVersion 189 | tsc.schemaMap = newSchemaMap 190 | tsc.versionMap = newVersionMap 191 | log.Infof("%s/%s fresh schema success, newSchemaVersions:%v", tsc.project, tsc.topic, newSchemaList) 192 | } 193 | return nil 194 | } 195 | 196 | func (tsc *topicSchemaCacheImpl) getMaxSchemaVersionId() int { 197 | tsc.freshSchema(false) 198 | tsc.lock.RLock() 199 | defer tsc.lock.RUnlock() 200 | 201 | return tsc.maxSchemaVersionId 202 | } 203 | 204 | func (tsc *topicSchemaCacheImpl) getSchemaByVersionId(versionId int) *RecordSchema { 205 | tsc.freshSchema(false) 206 | 207 | if versionId >= 0 { 208 | tsc.lock.RLock() 209 | defer tsc.lock.RUnlock() 210 | 211 | if schemaItem, ok := tsc.versionMap[versionId]; ok { 212 | return schemaItem.dhSchema 213 | } 214 | } 215 | 216 | return nil 217 | } 218 | 219 | func (tsc *topicSchemaCacheImpl) getVersionIdBySchema(schema *RecordSchema) int { 220 | if schema == nil { 221 | return blobSchemaVersionId 222 | } 223 | 224 | tsc.freshSchema(false) 225 | tsc.lock.RLock() 226 | defer tsc.lock.RUnlock() 227 | 228 | if version, ok := tsc.schemaMap[schema.hashCode()]; ok { 229 | return version 230 | } 231 | 232 | return invalidSchemaVersionId 233 | } 234 | 235 | func (tsc *topicSchemaCacheImpl) getAvroSchema(schema *RecordSchema) avro.Schema { 236 | if schema == nil { 237 | return getAvroBlobSchema() 238 | } 239 | 240 | tsc.freshSchema(false) 241 | 242 | tsc.lock.RLock() 243 | defer tsc.lock.RUnlock() 244 | if version, ok := tsc.schemaMap[schema.hashCode()]; ok { 245 | return tsc.versionMap[version].avroSchema 246 | } 247 | 248 | return nil 249 | } 250 | 251 | func (tsc *topicSchemaCacheImpl) getAvroSchemaByVersionId(versionId int) avro.Schema { 252 | if versionId < 0 { 253 | return getAvroBlobSchema() 254 | } 255 | 256 | tsc.freshSchema(false) 257 | 258 | tsc.lock.RLock() 259 | defer tsc.lock.RUnlock() 260 | if item, ok := tsc.versionMap[versionId]; ok { 261 | return item.avroSchema 262 | } 263 | 264 | return nil 265 | } 266 | -------------------------------------------------------------------------------- /datahub/batch_serializer_test.go: -------------------------------------------------------------------------------- 1 | package datahub 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func TestBatchNormalSerialize(t *testing.T) { 10 | dhSchema := NewRecordSchema() 11 | dhSchema.AddField(Field{Name: "f1", Type: BOOLEAN, AllowNull: true}) 12 | dhSchema.AddField(Field{Name: "f2", Type: TINYINT, AllowNull: true}) 13 | dhSchema.AddField(Field{Name: "f3", Type: SMALLINT, AllowNull: true}) 14 | dhSchema.AddField(Field{Name: "f4", Type: INTEGER, AllowNull: true}) 15 | dhSchema.AddField(Field{Name: "f5", Type: BIGINT, AllowNull: true}) 16 | dhSchema.AddField(Field{Name: "f6", Type: TIMESTAMP, AllowNull: true}) 17 | dhSchema.AddField(Field{Name: "f7", Type: FLOAT, AllowNull: true}) 18 | dhSchema.AddField(Field{Name: "f8", Type: DOUBLE, AllowNull: true}) 19 | dhSchema.AddField(Field{Name: "f9", Type: DECIMAL, AllowNull: true}) 20 | dhSchema.AddField(Field{Name: "f10", Type: STRING, AllowNull: true}) 21 | dhSchema.AddField(Field{Name: "f11", Type: JSON, AllowNull: true}) 22 | 23 | avroSchema, _ := getAvroSchema(dhSchema) 24 | cache := topicSchemaCacheForTest{ 25 | avroSchema: avroSchema, 26 | dhSchema: dhSchema, 27 | } 28 | 29 | ser := newBatchSerializer("project", "topic", &cache, ZSTD) 30 | 31 | recordNum := 1000 32 | records := make([]IRecord, 0) 33 | for i := 0; i < recordNum; i++ { 34 | records = append(records, genTupleRecord(dhSchema)) 35 | } 36 | 37 | buf, header, err := ser.serialize(records) 38 | assert.Nil(t, err) 39 | 40 | assert.Equal(t, batchMagicNum, header.magic) 41 | assert.Equal(t, int32(1), header.version) 42 | assert.Equal(t, int32(len(buf)), header.length) 43 | assert.Equal(t, int16(3), header.attribute) 44 | assert.Equal(t, int16(2), header.dataType) 45 | assert.Equal(t, int32(1), header.schemaVersion) 46 | assert.Equal(t, int32(40), header.dataOffset) 47 | assert.Equal(t, int32(recordNum), header.recordCount) 48 | 49 | meta := respMeta{ 50 | cursor: "cursor", 51 | nextCursor: "nextCursor", 52 | sequence: 100, 53 | systemTime: 200, 54 | serial: 300, 55 | } 56 | dser := newBatchDeserializer("0", &cache) 57 | newRecords, err := dser.deserialize(buf, &meta) 58 | assert.Nil(t, err) 59 | 60 | assert.Equal(t, len(records), len(newRecords)) 61 | for i := 0; i < len(records); i++ { 62 | assert.Equal(t, records[i].GetAttributes(), newRecords[i].GetAttributes()) 63 | assert.Equal(t, records[i].GetData(), newRecords[i].GetData()) 64 | assert.Equal(t, int64(100), newRecords[i].GetSequence()) 65 | assert.Equal(t, int64(200), newRecords[i].GetSystemTime()) 66 | assert.Equal(t, int64(300), newRecords[i].GetBaseRecord().Serial) 67 | assert.Equal(t, "cursor", newRecords[i].GetBaseRecord().Cursor) 68 | assert.Equal(t, "nextCursor", newRecords[i].GetBaseRecord().NextCursor) 69 | assert.Equal(t, i, newRecords[i].GetBatchIndex()) 70 | } 71 | } 72 | 73 | func TestBatchSerializeWithNullValue(t *testing.T) { 74 | dhSchema := NewRecordSchema() 75 | dhSchema.AddField(Field{Name: "f1", Type: BOOLEAN, AllowNull: true}) 76 | dhSchema.AddField(Field{Name: "f2", Type: TINYINT, AllowNull: true}) 77 | dhSchema.AddField(Field{Name: "f3", Type: SMALLINT, AllowNull: true}) 78 | dhSchema.AddField(Field{Name: "f4", Type: INTEGER, AllowNull: true}) 79 | dhSchema.AddField(Field{Name: "f5", Type: BIGINT, AllowNull: true}) 80 | dhSchema.AddField(Field{Name: "f6", Type: TIMESTAMP, AllowNull: true}) 81 | dhSchema.AddField(Field{Name: "f7", Type: FLOAT, AllowNull: true}) 82 | dhSchema.AddField(Field{Name: "f8", Type: DOUBLE, AllowNull: true}) 83 | dhSchema.AddField(Field{Name: "f9", Type: DECIMAL, AllowNull: true}) 84 | dhSchema.AddField(Field{Name: "f10", Type: STRING, AllowNull: true}) 85 | dhSchema.AddField(Field{Name: "f11", Type: JSON, AllowNull: true}) 86 | 87 | avroSchema, _ := getAvroSchema(dhSchema) 88 | cache := topicSchemaCacheForTest{ 89 | avroSchema: avroSchema, 90 | dhSchema: dhSchema, 91 | } 92 | 93 | ser := newBatchSerializer("project", "topic", &cache, ZSTD) 94 | 95 | recordNum := 1000 96 | records := make([]IRecord, 0) 97 | for i := 0; i < recordNum; i++ { 98 | records = append(records, genTupleRecordWithNull(dhSchema, 20)) 99 | } 100 | 101 | buf, header, err := ser.serialize(records) 102 | assert.Nil(t, err) 103 | 104 | assert.Equal(t, batchMagicNum, header.magic) 105 | assert.Equal(t, int32(1), header.version) 106 | assert.Equal(t, int32(len(buf)), header.length) 107 | assert.Equal(t, int16(3), header.attribute) 108 | assert.Equal(t, int16(2), header.dataType) 109 | assert.Equal(t, int32(1), header.schemaVersion) 110 | assert.Equal(t, int32(40), header.dataOffset) 111 | assert.Equal(t, int32(recordNum), header.recordCount) 112 | 113 | meta := respMeta{ 114 | cursor: "cursor", 115 | nextCursor: "nextCursor", 116 | sequence: 100, 117 | systemTime: 200, 118 | serial: 300, 119 | } 120 | dser := newBatchDeserializer("0", &cache) 121 | newRecords, err := dser.deserialize(buf, &meta) 122 | assert.Nil(t, err) 123 | 124 | assert.Equal(t, len(records), len(newRecords)) 125 | for i := 0; i < len(records); i++ { 126 | assert.Equal(t, records[i].GetAttributes(), newRecords[i].GetAttributes()) 127 | assert.Equal(t, records[i].GetData(), newRecords[i].GetData()) 128 | assert.Equal(t, int64(100), newRecords[i].GetSequence()) 129 | assert.Equal(t, int64(200), newRecords[i].GetSystemTime()) 130 | assert.Equal(t, int64(300), newRecords[i].GetBaseRecord().Serial) 131 | assert.Equal(t, "cursor", newRecords[i].GetBaseRecord().Cursor) 132 | assert.Equal(t, "nextCursor", newRecords[i].GetBaseRecord().NextCursor) 133 | assert.Equal(t, i, newRecords[i].GetBatchIndex()) 134 | } 135 | } 136 | 137 | func TestDeserializeWithTruncateSchema(t *testing.T) { 138 | dhSchema := NewRecordSchema() 139 | dhSchema.AddField(Field{Name: "f1", Type: INTEGER, AllowNull: true}) 140 | dhSchema.AddField(Field{Name: "f2", Type: DOUBLE, AllowNull: true}) 141 | avroSchema, _ := getAvroSchema(dhSchema) 142 | 143 | newSchema := NewRecordSchema() 144 | newSchema.AddField(Field{Name: "f1", Type: INTEGER, AllowNull: true}) 145 | newSchema.AddField(Field{Name: "f2", Type: DOUBLE, AllowNull: true}) 146 | newSchema.AddField(Field{Name: "f3", Type: STRING, AllowNull: true}) 147 | newAvroSchema, _ := getAvroSchema(newSchema) 148 | 149 | serializeCache := topicSchemaCacheForTest{ 150 | avroSchema: avroSchema, 151 | dhSchema: dhSchema, 152 | } 153 | 154 | deserializeCache := topicSchemaCacheForTest{ 155 | avroSchema: newAvroSchema, 156 | dhSchema: newSchema, 157 | } 158 | 159 | ser := newBatchSerializer("project", "topic", &serializeCache, ZSTD) 160 | 161 | recordNum := 1000 162 | records := make([]IRecord, 0) 163 | for i := 0; i < recordNum; i++ { 164 | records = append(records, genTupleRecord(dhSchema)) 165 | } 166 | 167 | buf, header, err := ser.serialize(records) 168 | assert.Nil(t, err) 169 | 170 | assert.Equal(t, batchMagicNum, header.magic) 171 | assert.Equal(t, int32(1), header.version) 172 | assert.Equal(t, int32(len(buf)), header.length) 173 | assert.Equal(t, int16(3), header.attribute) 174 | assert.Equal(t, int16(2), header.dataType) 175 | assert.Equal(t, int32(1), header.schemaVersion) 176 | assert.Equal(t, int32(40), header.dataOffset) 177 | assert.Equal(t, int32(recordNum), header.recordCount) 178 | 179 | meta := respMeta{ 180 | cursor: "cursor", 181 | nextCursor: "nextCursor", 182 | sequence: 100, 183 | systemTime: 200, 184 | serial: 300, 185 | } 186 | dser := newBatchDeserializer("0", &deserializeCache) 187 | newRecords, err := dser.deserialize(buf, &meta) 188 | assert.Nil(t, err) 189 | 190 | assert.Equal(t, len(records), len(newRecords)) 191 | for i := 0; i < len(records); i++ { 192 | assert.Equal(t, records[i].GetAttributes(), newRecords[i].GetAttributes()) 193 | assert.Equal(t, records[i].GetData(), newRecords[i].GetData()) 194 | assert.Equal(t, int64(100), newRecords[i].GetSequence()) 195 | assert.Equal(t, int64(200), newRecords[i].GetSystemTime()) 196 | assert.Equal(t, int64(300), newRecords[i].GetBaseRecord().Serial) 197 | assert.Equal(t, "cursor", newRecords[i].GetBaseRecord().Cursor) 198 | assert.Equal(t, "nextCursor", newRecords[i].GetBaseRecord().NextCursor) 199 | assert.Equal(t, i, newRecords[i].GetBatchIndex()) 200 | } 201 | } 202 | -------------------------------------------------------------------------------- /datahub/data_serializer.go: -------------------------------------------------------------------------------- 1 | package datahub 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | 7 | "github.com/hamba/avro/v2" 8 | "github.com/shopspring/decimal" 9 | ) 10 | 11 | type dataSerializer interface { 12 | serialize(records []IRecord) ([]byte, error) 13 | } 14 | 15 | func newDataSerializer(schemaCache topicSchemaCache) dataSerializer { 16 | return &avroDataSerializer{schemaCache: schemaCache} 17 | } 18 | 19 | type dataDeserializer interface { 20 | deserialize(data []byte, header *batchHeader) ([]IRecord, error) 21 | } 22 | 23 | func newDataDeserializer(schemaCache topicSchemaCache) dataDeserializer { 24 | return &avroDataDeserializer{schemaCache: schemaCache} 25 | } 26 | 27 | type avroDataSerializer struct { 28 | schemaCache topicSchemaCache 29 | } 30 | 31 | func (as *avroDataSerializer) serialize(records []IRecord) ([]byte, error) { 32 | avroSchema, err := as.getSchema(records[0]) 33 | if err != nil { 34 | return nil, err 35 | } 36 | 37 | buffer := bytes.NewBuffer(make([]byte, 0, 16*1024)) 38 | encoder := avro.NewEncoderForSchema(avroSchema, buffer) 39 | 40 | avroRecord := as.genAvroRecord(records[0]) 41 | for _, dhRecord := range records { 42 | // resuse record can reduce cost of allocate memory 43 | as.assignRecord(dhRecord, avroRecord) 44 | err := encoder.Encode(avroRecord) 45 | if err != nil { 46 | return nil, err 47 | } 48 | } 49 | 50 | return buffer.Bytes(), nil 51 | } 52 | 53 | func (as *avroDataSerializer) genAvroRecord(record IRecord) map[string]any { 54 | avroRecord := map[string]any{} 55 | 56 | switch realRecord := record.(type) { 57 | case *TupleRecord: 58 | for _, field := range realRecord.RecordSchema.Fields { 59 | avroRecord[field.Name] = nil 60 | } 61 | case *BlobRecord: 62 | avroRecord[defaultAvroBlobColumnName] = nil 63 | } 64 | 65 | attrCol := map[string]any{} 66 | avroRecord[defaultAvroAttributeName] = attrCol 67 | return avroRecord 68 | } 69 | 70 | func (as *avroDataSerializer) setTupleRecord(dhRecord *TupleRecord, avroRecord map[string]any) { 71 | for _, field := range dhRecord.RecordSchema.Fields { 72 | val, _ := dhRecord.GetValueByName(field.Name) 73 | colVal := as.getColumnValue(val, field.Type) 74 | avroRecord[field.Name] = colVal 75 | } 76 | } 77 | 78 | func (as *avroDataSerializer) setBlobRecord(dhRecord *BlobRecord, avroRecord map[string]any) { 79 | avroRecord[defaultAvroBlobColumnName] = dhRecord.GetRawData() 80 | } 81 | 82 | func (as *avroDataSerializer) assignRecord(record IRecord, avroRecord map[string]any) map[string]any { 83 | switch realRecord := record.(type) { 84 | case *TupleRecord: 85 | as.setTupleRecord(realRecord, avroRecord) 86 | case *BlobRecord: 87 | as.setBlobRecord(realRecord, avroRecord) 88 | } 89 | 90 | attrCol := avroRecord[defaultAvroAttributeName].(map[string]any) 91 | var attrVal map[string]string = nil 92 | if len(record.GetAttributes()) > 0 { 93 | attrVal = record.GetAttributes() 94 | } 95 | attrCol["map"] = attrVal 96 | 97 | return avroRecord 98 | } 99 | 100 | func (as *avroDataSerializer) getSchema(record IRecord) (avro.Schema, error) { 101 | var dhSchema *RecordSchema = nil 102 | tupleRecord, ok := record.(*TupleRecord) 103 | if ok { 104 | dhSchema = tupleRecord.RecordSchema 105 | } 106 | 107 | schema := as.schemaCache.getAvroSchema(dhSchema) 108 | if schema != nil { 109 | return schema, nil 110 | } 111 | 112 | return nil, fmt.Errorf("cannot get avro schema") 113 | } 114 | 115 | func (as *avroDataSerializer) getColumnValue(data DataType, fieldType FieldType) any { 116 | if data == nil { 117 | return nil 118 | } 119 | 120 | var value any 121 | switch v := data.(type) { 122 | case Boolean: 123 | value = bool(v) 124 | case Tinyint: 125 | value = int32(v) 126 | case Smallint: 127 | value = int32(v) 128 | case Integer: 129 | value = int32(v) 130 | case Timestamp: 131 | value = int64(v) 132 | case Bigint: 133 | value = int64(v) 134 | case Float: 135 | value = float32(v) 136 | case Double: 137 | value = float64(v) 138 | default: 139 | value = v.String() 140 | } 141 | 142 | return value 143 | } 144 | 145 | type avroDataDeserializer struct { 146 | schemaCache topicSchemaCache 147 | } 148 | 149 | func (ad *avroDataDeserializer) deserialize(data []byte, header *batchHeader) ([]IRecord, error) { 150 | dhSchema, avroSchema, err := ad.getSchema(header) 151 | if err != nil { 152 | return nil, err 153 | } 154 | 155 | // avro schema cannot be null 156 | if avroSchema == nil || (header.schemaVersion >= 0 && dhSchema == nil) { 157 | return nil, fmt.Errorf("cannot get schema, version:%d", header.schemaVersion) 158 | } 159 | 160 | buffer := bytes.NewBuffer(data) 161 | decoder := avro.NewDecoderForSchema(avroSchema, buffer) 162 | 163 | records := make([]IRecord, 0) 164 | for i := 0; i < int(header.recordCount); i++ { 165 | avroRecord := make(map[string]any) 166 | err := decoder.Decode(&avroRecord) 167 | if err != nil { 168 | return nil, err 169 | } 170 | 171 | dhRecord, err := ad.convertRecord(dhSchema, avroRecord) 172 | if err != nil { 173 | return nil, err 174 | } 175 | 176 | records = append(records, dhRecord) 177 | } 178 | 179 | return records, nil 180 | } 181 | 182 | func (ad *avroDataDeserializer) getSchema(header *batchHeader) (*RecordSchema, avro.Schema, error) { 183 | schema := ad.schemaCache.getSchemaByVersionId(int(header.schemaVersion)) 184 | 185 | truncated := false 186 | dhSchema := schema 187 | if dhSchema != nil && header.schemaColumnNum != 0 && int(header.schemaColumnNum) != dhSchema.Size() { 188 | dhSchema = NewRecordSchema() 189 | for i := 0; i < int(header.schemaColumnNum); i++ { 190 | dhSchema.AddField(schema.Fields[i]) 191 | } 192 | truncated = true 193 | } 194 | 195 | var avroSchema avro.Schema 196 | if !truncated { 197 | avroSchema = ad.schemaCache.getAvroSchemaByVersionId(int(header.schemaVersion)) 198 | } else { 199 | tmp, err := getAvroSchema(dhSchema) 200 | if err != nil { 201 | return nil, nil, err 202 | } 203 | avroSchema = tmp 204 | } 205 | 206 | return dhSchema, avroSchema, nil 207 | } 208 | 209 | func (ad *avroDataDeserializer) convertRecord(dhSchema *RecordSchema, avroRecord map[string]any) (IRecord, error) { 210 | if dhSchema != nil { 211 | return ad.convertTupleRecord(dhSchema, avroRecord) 212 | } else { 213 | return ad.convertBlobRecord(avroRecord) 214 | } 215 | } 216 | 217 | func (ad *avroDataDeserializer) convertTupleRecord(dhSchema *RecordSchema, avroRecord map[string]any) (IRecord, error) { 218 | dhRecord := NewTupleRecord(dhSchema) 219 | 220 | for _, field := range dhSchema.Fields { 221 | err := ad.setColumnValue(dhRecord, avroRecord[field.Name], &field) 222 | if err != nil { 223 | return nil, err 224 | } 225 | } 226 | 227 | err := ad.convertAttribute(avroRecord, dhRecord) 228 | if err != nil { 229 | return nil, err 230 | } 231 | 232 | return dhRecord, nil 233 | } 234 | 235 | func (ad *avroDataDeserializer) setColumnValue(dhRecord *TupleRecord, val any, field *Field) error { 236 | if val == nil { 237 | dhRecord.SetValueByName(field.Name, nil) 238 | return nil 239 | } 240 | 241 | switch field.Type { 242 | case DECIMAL: 243 | tmp, err := decimal.NewFromString(val.(string)) 244 | if err != nil { 245 | return err 246 | } 247 | dhRecord.SetValueByName(field.Name, tmp) 248 | default: 249 | dhRecord.SetValueByName(field.Name, val) 250 | } 251 | return nil 252 | } 253 | 254 | func (ad *avroDataDeserializer) convertBlobRecord(avroRecord map[string]any) (IRecord, error) { 255 | data, ok := avroRecord[defaultAvroBlobColumnName] 256 | if !ok { 257 | return nil, fmt.Errorf("cannot get blob data") 258 | } 259 | 260 | rawData, ok := data.([]byte) 261 | if !ok { 262 | return nil, fmt.Errorf("blob data is not []byte") 263 | } 264 | 265 | dhRecord := NewBlobRecord(rawData) 266 | 267 | err := ad.convertAttribute(avroRecord, dhRecord) 268 | if err != nil { 269 | return nil, err 270 | } 271 | 272 | return dhRecord, nil 273 | } 274 | 275 | func (ad *avroDataDeserializer) convertAttribute(avroRecord map[string]any, dhRecord IRecord) error { 276 | attrCol, ok := avroRecord[defaultAvroAttributeName] 277 | if !ok || attrCol == nil { 278 | return nil 279 | } 280 | 281 | unionAttr, ok := attrCol.(map[string]any) 282 | if !ok { 283 | return fmt.Errorf("attribute column is not map[string]any") 284 | } 285 | 286 | if len(unionAttr) == 0 { 287 | return nil 288 | } 289 | 290 | attrVal, ok := unionAttr["map"] 291 | if !ok { 292 | return nil 293 | } 294 | 295 | attrMap, ok := attrVal.(map[string]any) 296 | if !ok { 297 | return fmt.Errorf("attribute value is not map[string]string") 298 | } 299 | 300 | for k, v := range attrMap { 301 | dhRecord.SetAttribute(k, v.(string)) 302 | } 303 | return nil 304 | } 305 | -------------------------------------------------------------------------------- /datahub/producer.go: -------------------------------------------------------------------------------- 1 | package datahub 2 | 3 | import ( 4 | "fmt" 5 | "math/rand" 6 | "sort" 7 | "sync" 8 | "sync/atomic" 9 | "time" 10 | 11 | log "github.com/sirupsen/logrus" 12 | ) 13 | 14 | // Additional information when send success 15 | type SendDetails struct { 16 | ShardId string 17 | RequestId string 18 | ReqSize int 19 | RawSize int 20 | } 21 | 22 | type Producer interface { 23 | Init() error 24 | 25 | Send(records []IRecord) (*SendDetails, error) 26 | 27 | SendByShard(records []IRecord, shardId string) (*SendDetails, error) 28 | 29 | // GetSchema return the schema for the specified topic. 30 | // If enable multi-version schema, it returns the latest version of the schema. 31 | // Otherwise, it returns the topic schema. 32 | GetSchema() (*RecordSchema, error) 33 | 34 | GetSchemaByVersionId(versionId int) (*RecordSchema, error) 35 | 36 | GetActiveShards() []string 37 | 38 | Close() error 39 | } 40 | 41 | type producerImpl struct { 42 | config *ProducerConfig 43 | project string 44 | topic string 45 | shards []string 46 | index int32 47 | freshShardInterval time.Duration 48 | nextFreshShardTime atomic.Value 49 | mutex sync.RWMutex 50 | client DataHubApi 51 | schemaCache topicSchemaCache 52 | } 53 | 54 | func NewProducer(cfg *ProducerConfig) Producer { 55 | var now atomic.Value 56 | now.Store(time.Now()) 57 | return &producerImpl{ 58 | config: cfg, 59 | project: cfg.Project, 60 | topic: cfg.Topic, 61 | shards: make([]string, 0), 62 | index: 0, 63 | freshShardInterval: time.Minute, 64 | nextFreshShardTime: now, 65 | } 66 | } 67 | 68 | func (pi *producerImpl) initMeta() error { 69 | tmpClient := NewClientWithConfig(pi.config.Endpoint, NewDefaultConfig(), pi.config.Account) 70 | res, err := tmpClient.GetTopic(pi.project, pi.topic) 71 | if err != nil { 72 | return err 73 | } 74 | 75 | if res.extraConfig.listShardInterval != 0 { 76 | pi.freshShardInterval = res.extraConfig.listShardInterval 77 | } 78 | 79 | config := NewDefaultConfig() 80 | 81 | if res.extraConfig.compressType != NOCOMPRESS { 82 | config.CompressorType = res.extraConfig.compressType 83 | } 84 | 85 | if res.EnableSchema { 86 | config.Protocol = Batch 87 | } else { 88 | if res.extraConfig.protocol != unknownProtocol { 89 | config.Protocol = res.extraConfig.protocol 90 | } else { 91 | config.Protocol = pi.config.Protocol 92 | } 93 | } 94 | 95 | userAgent := defaultClientAgent() 96 | if len(pi.config.UserAgent) > 0 { 97 | userAgent = userAgent + " " + pi.config.UserAgent 98 | } 99 | 100 | pi.client = NewClientWithConfig(pi.config.Endpoint, config, pi.config.Account) 101 | pi.client.setUserAgent(userAgent) 102 | pi.schemaCache = schemaClientInstance().getTopicSchemaCache(pi.project, pi.topic, pi.client) 103 | 104 | err = pi.freshShard(true) 105 | if err != nil { 106 | return err 107 | } 108 | 109 | log.Infof("Init %s/%s producer success", pi.project, pi.topic) 110 | return nil 111 | } 112 | 113 | func (pi *producerImpl) Init() error { 114 | return pi.initMeta() 115 | } 116 | 117 | func (pi *producerImpl) Send(records []IRecord) (*SendDetails, error) { 118 | shardId := pi.getNextShard() 119 | if shardId == "" { 120 | return nil, fmt.Errorf("cannot get valid shard") 121 | } 122 | 123 | details, err := pi.SendByShard(records, shardId) 124 | if IsShardSealedError(err) { 125 | pi.freshShard(true) 126 | shardId = pi.getNextShard() 127 | if shardId == "" { 128 | return nil, fmt.Errorf("cannot get valid shard") 129 | } 130 | 131 | details, err = pi.SendByShard(records, shardId) 132 | } 133 | 134 | if err != nil { 135 | return nil, err 136 | } 137 | 138 | return details, nil 139 | } 140 | 141 | func (pi *producerImpl) SendByShard(records []IRecord, shardId string) (*SendDetails, error) { 142 | return pi.sendWithRetry(records, shardId) 143 | } 144 | 145 | func (pi *producerImpl) sendWithRetry(records []IRecord, shardId string) (*SendDetails, error) { 146 | var returnErr error = nil 147 | for i := 0; pi.config.MaxRetry < 0 || i <= pi.config.MaxRetry; i++ { 148 | now := time.Now() 149 | res, err := pi.client.PutRecordsByShard(pi.project, pi.topic, shardId, records) 150 | if err == nil { 151 | if log.IsLevelEnabled(log.DebugLevel) { 152 | log.Debugf("%s/%s/%s send records %d success, cost: %v, rid:%s", 153 | pi.project, pi.topic, shardId, len(records), time.Since(now), res.RequestId) 154 | } 155 | 156 | return &SendDetails{ 157 | ReqSize: res.ReqSize, 158 | RawSize: res.RawSize, 159 | RequestId: res.RequestId, 160 | ShardId: shardId, 161 | }, nil 162 | } 163 | 164 | if !IsRetryableError(err) { 165 | log.Errorf("%s/%s/%s send records %d failed, cost:%v, error:%v", 166 | pi.project, pi.topic, shardId, len(records), time.Since(now), err) 167 | return nil, err 168 | } 169 | 170 | returnErr = err 171 | sleepTime := pi.config.RetryInterval 172 | if IsNetworkError(err) { 173 | if log.IsLevelEnabled(log.DebugLevel) { 174 | log.Debugf("%s/%s/%s send records %d with network error, cost: %v, error:%v", 175 | pi.project, pi.topic, shardId, len(records), time.Since(now), err) 176 | } 177 | } else if IsLimitExceedError(err) { 178 | sleepTime = 100 * time.Millisecond 179 | log.Warnf("%s/%s/%s send records %d exceed limit, cost:%v, error:%v", 180 | pi.project, pi.topic, shardId, len(records), time.Since(now), err) 181 | } else if IsRetryableError(err) { 182 | log.Warnf("%s/%s/%s send records %d failed, cost:%v, error:%v", 183 | pi.project, pi.topic, shardId, len(records), time.Since(now), err) 184 | } 185 | time.Sleep(sleepTime) 186 | } 187 | return nil, returnErr 188 | } 189 | 190 | func (pi *producerImpl) getNextIndex() int { 191 | if pi.config.SendStrategy == RoundRobin { 192 | return int(atomic.AddInt32(&pi.index, 1)) 193 | } else { 194 | return rand.Int() 195 | } 196 | } 197 | 198 | func (pi *producerImpl) getNextShard() string { 199 | pi.freshShard(false) 200 | index := pi.getNextIndex() 201 | pi.mutex.RLock() 202 | defer pi.mutex.RUnlock() 203 | 204 | if len(pi.shards) == 0 { 205 | return "" 206 | } 207 | 208 | idx := index % len(pi.shards) 209 | return pi.shards[idx] 210 | } 211 | 212 | func shardsEqual(shards1, shards2 []string) bool { 213 | if len(shards1) != len(shards2) { 214 | return false 215 | } 216 | 217 | if len(shards1) == 0 { 218 | return true 219 | } 220 | 221 | for idx := range shards1 { 222 | if shards1[idx] != shards2[idx] { 223 | return false 224 | } 225 | } 226 | 227 | return true 228 | } 229 | 230 | func (pi *producerImpl) freshShard(force bool) error { 231 | nextTime := pi.nextFreshShardTime.Load().(time.Time) 232 | if !force && time.Now().Before(nextTime) { 233 | return nil 234 | } 235 | 236 | // pervent fresh shard by multi goroutine 237 | newNextTime := time.Now().Add(pi.freshShardInterval) 238 | if !pi.nextFreshShardTime.CompareAndSwap(nextTime, newNextTime) { 239 | return nil 240 | } 241 | 242 | res, err := pi.client.ListShard(pi.project, pi.topic) 243 | if err != nil { 244 | return err 245 | } 246 | 247 | newShards := make([]string, 0) 248 | for _, shard := range res.Shards { 249 | if shard.State == ACTIVE { 250 | newShards = append(newShards, shard.ShardId) 251 | } 252 | } 253 | 254 | if len(newShards) == 0 { 255 | log.Warnf("%s/%s fresh shard list failed, no active shard, rid:%s", 256 | pi.project, pi.topic, res.RequestId) 257 | return fmt.Errorf("no active shard") 258 | } 259 | 260 | sort.Strings(newShards) 261 | 262 | pi.mutex.Lock() 263 | defer pi.mutex.Unlock() 264 | 265 | if shardsEqual(pi.shards, newShards) { 266 | log.Infof("%s/%s fresh shard success, no shard update, current:%s", 267 | pi.project, pi.topic, newShards) 268 | } else { 269 | pi.shards = newShards 270 | log.Infof("%s/%s fresh shard list success, newShards:%v", 271 | pi.project, pi.topic, newShards) 272 | } 273 | 274 | return nil 275 | } 276 | 277 | func (pi *producerImpl) GetSchema() (*RecordSchema, error) { 278 | return pi.GetSchemaByVersionId(-1) 279 | } 280 | 281 | func (pi *producerImpl) GetSchemaByVersionId(versionId int) (*RecordSchema, error) { 282 | if versionId < 0 { 283 | versionId = pi.schemaCache.getMaxSchemaVersionId() 284 | } 285 | 286 | if versionId < 0 { // blob 287 | return nil, nil 288 | } 289 | 290 | schema := pi.schemaCache.getSchemaByVersionId(versionId) 291 | if schema != nil { 292 | return schema, nil 293 | } 294 | 295 | return nil, fmt.Errorf("%s/%s schema not found, version:%d", pi.project, pi.topic, versionId) 296 | } 297 | 298 | func (pi *producerImpl) GetActiveShards() []string { 299 | pi.mutex.RLock() 300 | defer pi.mutex.RUnlock() 301 | 302 | dst := make([]string, len(pi.shards)) 303 | copy(dst, pi.shards) 304 | return dst 305 | } 306 | 307 | func (pi *producerImpl) Close() error { 308 | return nil 309 | } 310 | -------------------------------------------------------------------------------- /test/e2e/batch_test.go: -------------------------------------------------------------------------------- 1 | package e2e 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | "time" 7 | 8 | "github.com/shopspring/decimal" 9 | "github.com/stretchr/testify/assert" 10 | "github.com/aliyun/aliyun-datahub-sdk-go/datahub" 11 | ) 12 | 13 | func TestBatch(t *testing.T) { 14 | projectName = projectName + "_batch" 15 | // try clear pre data 16 | batchClient.DeleteTopic(projectName, batchTupleTopicName) 17 | batchClient.DeleteTopic(projectName, batchBlobTopicName) 18 | batchClient.DeleteProject(projectName) 19 | 20 | var shardId = "0" 21 | 22 | cp, err := batchClient.CreateProject(projectName, "project created by go sdk batch mode") 23 | assert.Nil(t, err) 24 | assert.NotNil(t, cp) 25 | 26 | doBlobBatch(t, shardId) 27 | 28 | doTupleBatch(t, shardId) 29 | 30 | } 31 | 32 | func doBlobBatch(t *testing.T, shardId string) { 33 | // blob topic 34 | cbt, err := batchClient.CreateBlobTopic(projectName, batchBlobTopicName, "blob topic created by go sdk batch mode", 1, 1) 35 | assert.Nil(t, err) 36 | assert.NotNil(t, cbt) 37 | 38 | nowMS := time.Now().UnixNano() / 100000 39 | blobRecords := make([]datahub.IRecord, 0) 40 | blobRecord1 := datahub.NewBlobRecord([]byte("1, blob data write by batch mode."), nowMS) 41 | blobRecords = append(blobRecords, blobRecord1) 42 | 43 | blobRecord2 := datahub.NewBlobRecord([]byte("1, blob data write by batch mode."), nowMS) 44 | blobRecords = append(blobRecords, blobRecord2) 45 | 46 | blobRecord3 := datahub.NewBlobRecord([]byte("1, blob data write by batch mode."), nowMS) 47 | blobRecords = append(blobRecords, blobRecord3) 48 | 49 | result, err := batchClient.PutRecordsByShard(projectName, batchBlobTopicName, shardId, blobRecords) 50 | assert.Nil(t, err) 51 | assert.NotNil(t, result) 52 | 53 | //time.Sleep(6) 54 | nowMS = time.Now().UnixNano() / 100000 55 | blobRecords = make([]datahub.IRecord, 0) 56 | blobRecord4 := datahub.NewBlobRecord([]byte("4, blob data write by batch mode."), nowMS) 57 | blobRecords = append(blobRecords, blobRecord4) 58 | 59 | blobRecord5 := datahub.NewBlobRecord([]byte("5, blob data write by batch mode."), nowMS) 60 | blobRecords = append(blobRecords, blobRecord5) 61 | 62 | blobRecord6 := datahub.NewBlobRecord([]byte("6, blob data write by batch mode."), nowMS) 63 | blobRecords = append(blobRecords, blobRecord6) 64 | 65 | blobRecord7 := datahub.NewBlobRecord([]byte("7, blob data write by batch mode."), nowMS) 66 | blobRecords = append(blobRecords, blobRecord7) 67 | 68 | result, err = batchClient.PutRecordsByShard(projectName, batchBlobTopicName, shardId, blobRecords) 69 | assert.Nil(t, err) 70 | assert.NotNil(t, result) 71 | 72 | bcur, err := batchClient.GetCursor(projectName, batchBlobTopicName, shardId, datahub.OLDEST) 73 | assert.Nil(t, err) 74 | assert.NotNil(t, bcur) 75 | 76 | br, err := batchClient.GetBlobRecords(projectName, batchBlobTopicName, shardId, bcur.Cursor, 6) 77 | assert.Nil(t, err) 78 | assert.NotNil(t, br) 79 | 80 | for _, record := range br.Records { 81 | data, ok := record.(*datahub.BlobRecord) 82 | assert.True(t, ok) 83 | fmt.Println(data.String()) 84 | } 85 | } 86 | 87 | func doTupleBatch(t *testing.T, shardId string) { 88 | // tuple topic 89 | recordSchema1 := datahub.NewRecordSchema() 90 | recordSchema1.AddField(datahub.Field{Name: "f1", Type: datahub.TINYINT, AllowNull: true}). 91 | AddField(datahub.Field{Name: "f2", Type: datahub.SMALLINT, AllowNull: true}). 92 | AddField(datahub.Field{Name: "f3", Type: datahub.INTEGER, AllowNull: true}). 93 | AddField(datahub.Field{Name: "f4", Type: datahub.BIGINT, AllowNull: false}). 94 | AddField(datahub.Field{Name: "f5", Type: datahub.TIMESTAMP, AllowNull: true}). 95 | AddField(datahub.Field{Name: "f6", Type: datahub.FLOAT, AllowNull: true}). 96 | AddField(datahub.Field{Name: "f7", Type: datahub.DOUBLE, AllowNull: true}). 97 | AddField(datahub.Field{Name: "f8", Type: datahub.DECIMAL, AllowNull: false}). 98 | AddField(datahub.Field{Name: "f9", Type: datahub.BOOLEAN, AllowNull: true}). 99 | AddField(datahub.Field{Name: "f10", Type: datahub.STRING, AllowNull: true}). 100 | AddField(datahub.Field{Name: "f11", Type: datahub.STRING, AllowNull: true}) 101 | 102 | recordSchema2 := datahub.NewRecordSchema() 103 | recordSchema2.AddField(datahub.Field{Name: "field1", Type: datahub.STRING, AllowNull: true}). 104 | AddField(datahub.Field{Name: "field2", Type: datahub.BIGINT, AllowNull: false}). 105 | AddField(datahub.Field{Name: "field3", Type: datahub.BIGINT, AllowNull: false}) 106 | 107 | ctt, err := batchClient.CreateTupleTopic(projectName, batchTupleTopicName, "tuple topic created by go sdk batch mode", 1, 1, recordSchema1) 108 | assert.Nil(t, err) 109 | assert.NotNil(t, ctt) 110 | 111 | rt, err := batchClient.RegisterTopicSchema(projectName, batchTupleTopicName, recordSchema2) 112 | assert.Nil(t, err) 113 | assert.NotNil(t, rt) 114 | assert.Equal(t, rt.StatusCode, 201) 115 | 116 | lt, err := batchClient.ListTopicSchema(projectName, batchTupleTopicName) 117 | assert.Nil(t, err) 118 | assert.NotNil(t, lt) 119 | assert.Equal(t, lt.SchemaInfoList[0].RecordSchema.String(), recordSchema1.String()) 120 | assert.Equal(t, lt.SchemaInfoList[0].VersionId, 0) 121 | assert.Equal(t, lt.SchemaInfoList[1].RecordSchema.String(), recordSchema2.String()) 122 | assert.Equal(t, lt.SchemaInfoList[1].VersionId, 1) 123 | 124 | gs, err := batchClient.GetTopicSchemaByVersion(projectName, batchTupleTopicName, 1) 125 | assert.Nil(t, err) 126 | assert.NotNil(t, gs) 127 | assert.Equal(t, gs.VersionId, 1) 128 | assert.Equal(t, gs.RecordSchema.String(), recordSchema2.String()) 129 | 130 | gs, err = batchClient.GetTopicSchemaBySchema(projectName, batchTupleTopicName, recordSchema2) 131 | assert.Nil(t, err) 132 | assert.NotNil(t, gs) 133 | assert.Equal(t, gs.VersionId, 1) 134 | assert.Equal(t, gs.RecordSchema.String(), recordSchema2.String()) 135 | 136 | records := make([]datahub.IRecord, 0) 137 | record1 := datahub.NewTupleRecord(recordSchema1, 0) 138 | record1.SetValueByName("f1", 11) 139 | record1.SetValueByName("f2", 222) 140 | record1.SetValueByName("f3", 33333) 141 | record1.SetValueByName("f4", 44444444) 142 | record1.SetValueByName("f5", 56789) 143 | record1.SetValueByName("f6", float32(3.145)) 144 | record1.SetValueByName("f7", 3.146) 145 | val, _ := decimal.NewFromString("789.123456") 146 | record1.SetValueByName("f8", val) 147 | record1.SetValueByName("f9", true) 148 | record1.SetValueByName("f10", "1234567894546asdf") 149 | record1.SetAttribute("bbbbb", "ffffffffaaaaaaa") 150 | record1.SetAttribute("aaaaa", "ffffffffbbbbbb") 151 | records = append(records, record1) 152 | 153 | record2 := datahub.NewTupleRecord(recordSchema2, 0) 154 | record2.SetValueByName("field1", "test2") 155 | record2.SetValueByName("field2", 111) 156 | record2.SetValueByName("field3", 123) 157 | records = append(records, record2) 158 | 159 | record3 := datahub.NewTupleRecord(recordSchema2, 0) 160 | record3.SetValueByName("field1", "test3") 161 | record3.SetValueByName("field2", 222) 162 | record3.SetValueByName("field3", 333) 163 | record3.SetAttribute("key1", "value1") 164 | record3.SetAttribute("key2", "value2") 165 | record3.SetAttribute("key3", "value3") 166 | records = append(records, record3) 167 | 168 | record4 := datahub.NewTupleRecord(recordSchema2, 0) 169 | record4.SetValueByName("field1", "test4") 170 | record4.SetValueByName("field2", 2222) 171 | record4.SetValueByName("field3", 3333) 172 | record4.SetAttribute("key1", "value1") 173 | record4.SetAttribute("key2", "value2") 174 | record4.SetAttribute("key3", "value3") 175 | records = append(records, record4) 176 | 177 | ret, err := batchClient.PutRecordsByShard(projectName, batchTupleTopicName, shardId, records) 178 | assert.Nil(t, err) 179 | assert.NotNil(t, ret) 180 | 181 | gc, err := batchClient.GetCursor(projectName, batchTupleTopicName, shardId, datahub.OLDEST) 182 | assert.Nil(t, err) 183 | assert.NotNil(t, gc) 184 | 185 | gb, err := batchClient.GetTupleRecords(projectName, batchTupleTopicName, shardId, gc.Cursor, 100, nil) 186 | assert.Nil(t, err) 187 | assert.NotNil(t, gb) 188 | assert.Equal(t, gb.StartSequence, int64(0)) 189 | assert.Equal(t, gb.LatestSequence, int64(0)) 190 | assert.Equal(t, gb.RecordCount, 4) 191 | assert.Equal(t, len(gb.Records), 4) 192 | 193 | tupleRecord, ok := gb.Records[0].(*datahub.TupleRecord) 194 | assert.True(t, ok) 195 | assert.EqualValues(t, 11, tupleRecord.GetValueByIdx(0)) 196 | assert.EqualValues(t, 222, tupleRecord.GetValueByIdx(1)) 197 | assert.EqualValues(t, 33333, tupleRecord.GetValueByIdx(2)) 198 | assert.EqualValues(t, 44444444, tupleRecord.GetValueByIdx(3)) 199 | assert.EqualValues(t, 0xddd5, tupleRecord.GetValueByIdx(4)) 200 | assert.EqualValues(t, 3.145, tupleRecord.GetValueByIdx(5)) 201 | assert.EqualValues(t, 3.146, tupleRecord.GetValueByIdx(6)) 202 | assert.EqualValues(t, "789.123456", tupleRecord.GetValueByIdx(7).String()) 203 | assert.EqualValues(t, true, tupleRecord.GetValueByIdx(8)) 204 | assert.EqualValues(t, "1234567894546asdf", tupleRecord.GetValueByIdx(9)) 205 | assert.EqualValues(t, nil, tupleRecord.GetValueByIdx(10)) 206 | assert.Equal(t, map[string]interface{}(map[string]interface{}{"aaaaa": "ffffffffbbbbbb", "bbbbb": "ffffffffaaaaaaa"}), tupleRecord.GetAttributes()) 207 | 208 | tupleRecord, ok = gb.Records[1].(*datahub.TupleRecord) 209 | assert.True(t, ok) 210 | assert.EqualValues(t, "test2", tupleRecord.GetValueByIdx(0)) 211 | assert.EqualValues(t, 111, tupleRecord.GetValueByIdx(1)) 212 | assert.EqualValues(t, 123, tupleRecord.GetValueByIdx(2)) 213 | 214 | tupleRecord, ok = gb.Records[2].(*datahub.TupleRecord) 215 | assert.True(t, ok) 216 | assert.EqualValues(t, "test3", tupleRecord.GetValueByIdx(0)) 217 | assert.EqualValues(t, 222, tupleRecord.GetValueByIdx(1)) 218 | assert.EqualValues(t, 333, tupleRecord.GetValueByIdx(2)) 219 | 220 | tupleRecord, ok = gb.Records[3].(*datahub.TupleRecord) 221 | assert.True(t, ok) 222 | assert.EqualValues(t, "test4", tupleRecord.GetValueByIdx(0)) 223 | assert.EqualValues(t, 2222, tupleRecord.GetValueByIdx(1)) 224 | assert.EqualValues(t, 3333, tupleRecord.GetValueByIdx(2)) 225 | } 226 | -------------------------------------------------------------------------------- /datahub/batch_serializer.go: -------------------------------------------------------------------------------- 1 | package datahub 2 | 3 | import ( 4 | "bytes" 5 | "encoding/binary" 6 | "fmt" 7 | ) 8 | 9 | const ( 10 | defaultBatchHeaderSize = 36 11 | currentBatchHeaderSize = 40 12 | avroDataType = 2 13 | ) 14 | 15 | var ( 16 | batchMagicBytes = []byte{'D', 'H', 'U', 'B'} 17 | batchMagicNum = int32(binary.LittleEndian.Uint32(batchMagicBytes)) 18 | ) 19 | 20 | type respMeta struct { 21 | cursor string 22 | nextCursor string 23 | sequence int64 24 | systemTime int64 25 | serial int64 26 | } 27 | 28 | type batchHeader struct { 29 | fmt.Stringer 30 | magic int32 31 | version int32 32 | length int32 33 | rawSize int32 34 | crc32 uint32 35 | attribute int16 36 | dataType int16 37 | schemaVersion int32 38 | dataOffset int32 39 | recordCount int32 40 | 41 | // extra info 42 | schemaColumnNum int32 43 | } 44 | 45 | func newBatchHeader() *batchHeader { 46 | return &batchHeader{ 47 | magic: batchMagicNum, 48 | version: 1, 49 | length: 0, 50 | rawSize: 0, 51 | crc32: 0, 52 | attribute: 0, 53 | dataType: avroDataType, 54 | schemaVersion: 0, 55 | dataOffset: currentBatchHeaderSize, 56 | recordCount: 0, 57 | schemaColumnNum: 0, 58 | } 59 | } 60 | 61 | func (bh *batchHeader) String() string { 62 | return fmt.Sprintf("version:%d, length:%d, rawSize:%d, crc32:%d, attribute:%d, dataType:%d, schemaVersion:%d, dataOffset:%d, recordCount:%d, schemaColumnNum:%d", 63 | bh.version, bh.length, bh.rawSize, bh.crc32, bh.attribute, bh.dataType, bh.schemaVersion, bh.dataOffset, bh.recordCount, bh.schemaColumnNum) 64 | } 65 | 66 | func setCompressType(attrbuite int16, cType CompressorType) int16 { 67 | return int16((uint16(attrbuite) & uint16(0xfffc)) | uint16(cType.toValue())) 68 | } 69 | 70 | func getCompressType(attribute int16) CompressorType { 71 | val := attribute & 0x0003 72 | return getCompressTypeFromValue(int(val)) 73 | } 74 | 75 | func (serializer *batchSerializer) serializeBatchHeader(bHeader *batchHeader) []byte { 76 | buf := make([]byte, currentBatchHeaderSize) 77 | copy(buf, batchMagicBytes) 78 | binary.LittleEndian.PutUint32(buf[4:], uint32(bHeader.version)) 79 | binary.LittleEndian.PutUint32(buf[8:], uint32(bHeader.length)) 80 | binary.LittleEndian.PutUint32(buf[12:], uint32(bHeader.rawSize)) 81 | binary.LittleEndian.PutUint32(buf[16:], uint32(bHeader.crc32)) 82 | binary.LittleEndian.PutUint16(buf[20:], uint16(bHeader.attribute)) 83 | binary.LittleEndian.PutUint16(buf[22:], uint16(bHeader.dataType)) 84 | binary.LittleEndian.PutUint32(buf[24:], uint32(bHeader.schemaVersion)) 85 | binary.LittleEndian.PutUint32(buf[28:], uint32(bHeader.dataOffset)) 86 | binary.LittleEndian.PutUint32(buf[32:], uint32(bHeader.recordCount)) 87 | binary.LittleEndian.PutUint32(buf[36:], uint32(bHeader.schemaColumnNum)) 88 | return buf 89 | } 90 | 91 | type batchSerializer struct { 92 | project string 93 | topic string 94 | cType CompressorType 95 | serializer dataSerializer 96 | cache topicSchemaCache 97 | } 98 | 99 | func newBatchSerializer(project, topic string, schemaCache topicSchemaCache, cType CompressorType) *batchSerializer { 100 | return &batchSerializer{ 101 | project: project, 102 | topic: topic, 103 | cType: cType, 104 | serializer: newDataSerializer(schemaCache), 105 | cache: schemaCache, 106 | } 107 | } 108 | 109 | func (bs *batchSerializer) serialize(records []IRecord) ([]byte, *batchHeader, error) { 110 | err := bs.preCheck(records) 111 | if err != nil { 112 | return nil, nil, err 113 | } 114 | 115 | schemaVersionId, err := bs.getSchemaVersion(records[0]) 116 | if err != nil { 117 | return nil, nil, err 118 | } 119 | 120 | rawBuf, err := bs.serializer.serialize(records) 121 | if err != nil { 122 | return nil, nil, err 123 | } 124 | 125 | var attrbuite int16 = 0 126 | buf, err := bs.compress(rawBuf, &attrbuite) 127 | if err != nil { 128 | return nil, nil, err 129 | } 130 | 131 | columnNum := 0 132 | if tr, ok := records[0].(*TupleRecord); ok { 133 | columnNum = tr.RecordSchema.Size() 134 | } 135 | 136 | header := &batchHeader{ 137 | magic: batchMagicNum, 138 | version: 1, 139 | length: int32(len(buf)) + currentBatchHeaderSize, 140 | rawSize: int32(len(rawBuf)), 141 | crc32: calculateCrc32(buf), 142 | attribute: attrbuite, 143 | dataType: avroDataType, 144 | schemaVersion: schemaVersionId, 145 | dataOffset: currentBatchHeaderSize, 146 | recordCount: int32(len(records)), 147 | schemaColumnNum: int32(columnNum), 148 | } 149 | 150 | headerBuf := bs.serializeBatchHeader(header) 151 | 152 | res := bytes.NewBuffer(headerBuf) 153 | _, err = res.Write(buf) 154 | if err != nil { 155 | return nil, nil, err 156 | } 157 | 158 | return res.Bytes(), header, nil 159 | } 160 | 161 | func (bs *batchSerializer) preCheck(record []IRecord) error { 162 | if len(record) == 0 { 163 | return fmt.Errorf("records is empty") 164 | } 165 | 166 | var schemaCode uint32 = 0 167 | tupleRecord, ok := record[0].(*TupleRecord) 168 | if ok { 169 | schemaCode = tupleRecord.RecordSchema.hashCode() 170 | } 171 | 172 | for _, record := range record { 173 | switch realRecord := record.(type) { 174 | case *BlobRecord: 175 | if schemaCode != 0 { 176 | return fmt.Errorf("blob record can not be mixed with tuple record in single request") 177 | } 178 | case *TupleRecord: 179 | if schemaCode == 0 { 180 | return fmt.Errorf("blob record can not be mixed with tuple record in single request") 181 | } 182 | if realRecord.RecordSchema.hashCode() != schemaCode { 183 | return fmt.Errorf("record schema is not same in single request") 184 | } 185 | } 186 | } 187 | return nil 188 | } 189 | 190 | func (bs *batchSerializer) getSchemaVersion(record IRecord) (int32, error) { 191 | var dhSchema *RecordSchema = nil 192 | tupleRecord, ok := record.(*TupleRecord) 193 | if ok { 194 | dhSchema = tupleRecord.RecordSchema 195 | } 196 | 197 | versionId := bs.cache.getVersionIdBySchema(dhSchema) 198 | if versionId == invalidSchemaVersionId { 199 | schemaStr := "nil" 200 | if dhSchema != nil { 201 | schemaStr = dhSchema.String() 202 | } 203 | return 0, fmt.Errorf("%s/%s schema not found, schema:%s", bs.project, bs.topic, schemaStr) 204 | } 205 | return int32(versionId), nil 206 | } 207 | 208 | func (bs *batchSerializer) compress(data []byte, attrbuite *int16) ([]byte, error) { 209 | *attrbuite = setCompressType(*attrbuite, bs.cType) 210 | compressor := getCompressor(bs.cType) 211 | if compressor != nil { 212 | cData, err := compressor.Compress(data) 213 | if err != nil { 214 | return nil, err 215 | } 216 | 217 | return cData, nil 218 | } 219 | 220 | return data, nil 221 | } 222 | 223 | type batchDeserializer struct { 224 | shardId string 225 | deserializer dataDeserializer 226 | } 227 | 228 | func newBatchDeserializer(shardId string, schemaCache topicSchemaCache) *batchDeserializer { 229 | return &batchDeserializer{ 230 | shardId: shardId, 231 | deserializer: newDataDeserializer(schemaCache), 232 | } 233 | } 234 | 235 | func (bd *batchDeserializer) deserialize(data []byte, meta *respMeta) ([]IRecord, error) { 236 | header, err := parseBatchHeader(data) 237 | if err != nil { 238 | return nil, err 239 | } 240 | 241 | rawBuf, err := bd.decompress(data[header.dataOffset:], header) 242 | if err != nil { 243 | return nil, err 244 | } 245 | 246 | records, err := bd.deserializer.deserialize(rawBuf, header) 247 | if err != nil { 248 | return nil, err 249 | } 250 | 251 | for idx, record := range records { 252 | record.setMetaInfo(meta.sequence, meta.systemTime, meta.serial, idx, bd.shardId, meta.cursor, meta.nextCursor) 253 | } 254 | 255 | return records, nil 256 | } 257 | 258 | func (deserializer *batchDeserializer) decompress(data []byte, header *batchHeader) ([]byte, error) { 259 | cType := getCompressType(header.attribute) 260 | compressor := getCompressor(cType) 261 | if compressor == nil { 262 | return data, nil 263 | } 264 | 265 | buf, err := compressor.DeCompress(data, int64(header.rawSize)) 266 | if err != nil { 267 | return nil, err 268 | } 269 | 270 | return buf, nil 271 | } 272 | 273 | func parseBatchHeader(data []byte) (*batchHeader, error) { 274 | if len(data) < defaultBatchHeaderSize { 275 | return nil, fmt.Errorf("read batch header fail, current length[%d] not enough", len(data)) 276 | } 277 | 278 | header := newBatchHeader() 279 | header.magic = int32(binary.LittleEndian.Uint32(data[0:])) 280 | header.version = int32(binary.LittleEndian.Uint32(data[4:])) 281 | header.length = int32(binary.LittleEndian.Uint32(data[8:])) 282 | header.rawSize = int32(binary.LittleEndian.Uint32(data[12:])) 283 | header.crc32 = binary.LittleEndian.Uint32(data[16:]) 284 | header.attribute = int16(binary.LittleEndian.Uint16(data[20:])) 285 | header.dataType = int16(binary.LittleEndian.Uint16(data[22:])) 286 | header.schemaVersion = int32(binary.LittleEndian.Uint32(data[24:])) 287 | header.dataOffset = int32(binary.LittleEndian.Uint32(data[28:])) 288 | header.recordCount = int32(binary.LittleEndian.Uint32(data[32:])) 289 | 290 | if header.dataOffset > int32(len(data)) { 291 | return nil, fmt.Errorf("read batch header fail, current length[%d] not enough", len(data)) 292 | } 293 | 294 | if header.dataOffset >= currentBatchHeaderSize { 295 | header.schemaColumnNum = int32(binary.LittleEndian.Uint32(data[36:])) 296 | } 297 | 298 | if header.magic != batchMagicNum { 299 | return nil, fmt.Errorf("check magic number fail") 300 | } 301 | 302 | if header.length != int32(len(data)) { 303 | return nil, fmt.Errorf("check payload length fail, expect:%d, real:%d", header.length, len(data)) 304 | } 305 | 306 | if header.crc32 != 0 { 307 | calCrc := calculateCrc32(data[header.dataOffset:]) 308 | if calCrc != header.crc32 { 309 | return nil, fmt.Errorf("check crc fail. expect:%d, real:%d", header.crc32, calCrc) 310 | } 311 | } 312 | 313 | if header.dataType != avroDataType { 314 | return nil, fmt.Errorf("only support avro data type, real:%d", header.dataType) 315 | } 316 | 317 | return header, nil 318 | } 319 | -------------------------------------------------------------------------------- /datahub/record.go: -------------------------------------------------------------------------------- 1 | package datahub 2 | 3 | import ( 4 | "encoding/base64" 5 | "encoding/json" 6 | "fmt" 7 | "reflect" 8 | ) 9 | 10 | // BaseRecord 11 | type BaseRecord struct { 12 | ShardId string `json:"ShardId,omitempty"` 13 | PartitionKey string `json:"PartitionKey,omitempty"` 14 | HashKey string `json:"HashKey,omitempty"` 15 | SystemTime int64 `json:"SystemTime,omitempty"` 16 | Sequence int64 `json:"Sequence,omitempty"` 17 | BatchIndex int `json:"-"` 18 | Cursor string `json:"Cursor,omitempty"` 19 | NextCursor string `json:"NextCursor,omitempty"` 20 | Serial int64 `json:"Serial,omitempty"` 21 | Attributes map[string]string `json:"Attributes,omitempty"` 22 | } 23 | 24 | func (br *BaseRecord) GetSystemTime() int64 { 25 | return br.SystemTime 26 | } 27 | 28 | func (br *BaseRecord) GetSequence() int64 { 29 | return br.Sequence 30 | } 31 | 32 | func (br *BaseRecord) GetBatchIndex() int { 33 | return br.BatchIndex 34 | } 35 | 36 | // SetAttribute set or modify(if exist) attribute 37 | func (br *BaseRecord) SetAttribute(key string, val string) { 38 | if br.Attributes == nil { 39 | br.Attributes = make(map[string]string) 40 | } 41 | br.Attributes[key] = val 42 | } 43 | 44 | func (br *BaseRecord) GetAttributes() map[string]string { 45 | return br.Attributes 46 | } 47 | 48 | func (br *BaseRecord) setMetaInfo(sequence, systemTime, serial int64, index int, shardId, cursor, nextCursor string) { 49 | br.Sequence = sequence 50 | br.SystemTime = systemTime 51 | br.Serial = serial 52 | br.BatchIndex = index 53 | br.ShardId = shardId 54 | br.Cursor = cursor 55 | br.NextCursor = nextCursor 56 | } 57 | 58 | func (br *BaseRecord) SetShardId(shardId string) { 59 | br.ShardId = shardId 60 | } 61 | 62 | func (br *BaseRecord) SetPartitionKey(key string) { 63 | br.PartitionKey = key 64 | } 65 | 66 | // RecordEntry 67 | type RecordEntry struct { 68 | Data interface{} `json:"Data"` 69 | BaseRecord 70 | } 71 | 72 | // IRecord record interface 73 | type IRecord interface { 74 | fmt.Stringer 75 | GetSystemTime() int64 76 | GetSequence() int64 77 | GetBatchIndex() int 78 | GetData() interface{} 79 | fillData(data interface{}) error 80 | GetBaseRecord() BaseRecord 81 | SetBaseRecord(baseRecord BaseRecord) 82 | SetAttribute(key string, val string) 83 | GetAttributes() map[string]string 84 | setMetaInfo(sequence, systemTime, serial int64, index int, shardId, cursor, nextCursor string) 85 | SetShardId(shardId string) 86 | SetPartitionKey(key string) 87 | GetSize() int 88 | } 89 | 90 | // BlobRecord blob type record 91 | type BlobRecord struct { 92 | RawData []byte 93 | BaseRecord 94 | } 95 | 96 | // NewBlobRecord new a tuple type record from given record schema 97 | func NewBlobRecord(bytedata []byte) *BlobRecord { 98 | br := &BlobRecord{} 99 | br.RawData = bytedata 100 | br.Attributes = make(map[string]string) 101 | return br 102 | } 103 | 104 | func (br *BlobRecord) String() string { 105 | record := struct { 106 | Data string `json:"Data"` 107 | Attributes map[string]string `json:"Attributes"` 108 | }{ 109 | Data: string(br.RawData), 110 | Attributes: br.Attributes, 111 | } 112 | byts, _ := json.Marshal(record) 113 | return string(byts) 114 | } 115 | 116 | // FillData implement of IRecord interface 117 | func (br *BlobRecord) fillData(data interface{}) error { 118 | switch v := data.(type) { 119 | case string: 120 | bytedata, err := base64.StdEncoding.DecodeString(v) 121 | if err != nil { 122 | return err 123 | } 124 | br.RawData = bytedata 125 | case []byte: 126 | br.RawData = v 127 | default: 128 | return fmt.Errorf("invalid data type: %s", reflect.TypeOf(data)) 129 | } 130 | return nil 131 | } 132 | 133 | // GetData implement of IRecord interface 134 | func (br *BlobRecord) GetData() interface{} { 135 | return br.RawData 136 | } 137 | 138 | func (br *BlobRecord) GetRawData() []byte { 139 | return br.RawData 140 | } 141 | 142 | // GetBaseRecord get base record enbry 143 | func (br *BlobRecord) GetBaseRecord() BaseRecord { 144 | return br.BaseRecord 145 | } 146 | 147 | func (br *BlobRecord) SetBaseRecord(baseRecord BaseRecord) { 148 | br.BaseRecord = baseRecord 149 | } 150 | 151 | func (br *BlobRecord) GetSize() int { 152 | size := 0 153 | for k, v := range br.Attributes { 154 | size += len(k) + len(v) 155 | } 156 | 157 | size += len(br.RawData) 158 | return size 159 | } 160 | 161 | // TupleRecord tuple type record 162 | type TupleRecord struct { 163 | RecordSchema *RecordSchema 164 | Values []DataType 165 | BaseRecord 166 | } 167 | 168 | // NewTupleRecord new a tuple type record from given record schema 169 | func NewTupleRecord(schema *RecordSchema) *TupleRecord { 170 | tr := &TupleRecord{} 171 | if schema != nil { 172 | tr.RecordSchema = schema 173 | tr.Values = make([]DataType, schema.Size()) 174 | } 175 | tr.Attributes = make(map[string]string) 176 | for idx := range tr.Values { 177 | tr.Values[idx] = nil 178 | } 179 | return tr 180 | } 181 | 182 | func NewTupleRecordFromJson(schema *RecordSchema, jsonBuf []byte, opts ...JsonParseOption) (*TupleRecord, error) { 183 | record := NewTupleRecord(schema) 184 | 185 | cfg, err := getJsonParseConfig(opts...) 186 | if err != nil { 187 | return nil, err 188 | } 189 | 190 | obj, err := parseJson(jsonBuf) 191 | if err != nil { 192 | return nil, err 193 | } 194 | 195 | for k, v := range obj { 196 | err = record.SetValueByName(k, v) 197 | if err != nil { 198 | if IsFieldNotExistsError(err) && cfg.ignoreNotExistKey { 199 | continue 200 | } 201 | return nil, err 202 | } 203 | } 204 | 205 | return record, nil 206 | } 207 | 208 | func (tr *TupleRecord) String() string { 209 | record := struct { 210 | Values []DataType `json:"Values"` 211 | Attributes map[string]string `json:"Attributes"` 212 | }{ 213 | Values: tr.Values, 214 | Attributes: tr.Attributes, 215 | } 216 | byts, _ := json.Marshal(record) 217 | return string(byts) 218 | } 219 | 220 | // SetValueByIdx set a value by idx 221 | func (tr *TupleRecord) SetValueByIdx(idx int, val any) error { 222 | if idx < 0 || idx >= tr.RecordSchema.Size() { 223 | return newFieldNotExistsError(fmt.Sprintf("field index[%d] out of range", idx)) 224 | } 225 | 226 | field := tr.RecordSchema.Fields[idx] 227 | if val == nil && !field.AllowNull { 228 | return fmt.Errorf("[%s] not allow null", field.Name) 229 | } 230 | 231 | v, err := validateFieldValue(field.Type, val) 232 | if err != nil { 233 | return err 234 | } 235 | tr.Values[idx] = v 236 | return nil 237 | } 238 | 239 | // SetValueByName set a value by name 240 | func (tr *TupleRecord) SetValueByName(name string, val any) error { 241 | idx := tr.RecordSchema.GetFieldIndex(name) 242 | if idx < 0 { 243 | return newFieldNotExistsError(fmt.Sprintf("field[%s] not exist", name)) 244 | } 245 | return tr.SetValueByIdx(idx, val) 246 | } 247 | 248 | func (tr *TupleRecord) GetValueByIdx(idx int) (DataType, error) { 249 | if idx < 0 || idx >= tr.RecordSchema.Size() { 250 | return nil, newFieldNotExistsError(fmt.Sprintf("field index[%d] out of range", idx)) 251 | } 252 | return tr.Values[idx], nil 253 | } 254 | 255 | func (tr *TupleRecord) GetValueByName(name string) (DataType, error) { 256 | idx := tr.RecordSchema.GetFieldIndex(name) 257 | if idx < 0 { 258 | return nil, newFieldNotExistsError(fmt.Sprintf("field[%s] not exist", name)) 259 | } 260 | return tr.GetValueByIdx(idx) 261 | } 262 | 263 | func (tr *TupleRecord) GetValues() map[string]DataType { 264 | values := make(map[string]DataType) 265 | for i, f := range tr.RecordSchema.Fields { 266 | values[f.Name] = tr.Values[i] 267 | } 268 | return values 269 | } 270 | 271 | // SetValues batch set values 272 | func (tr *TupleRecord) SetValues(values []DataType) error { 273 | if fsize := tr.RecordSchema.Size(); fsize != len(values) { 274 | return fmt.Errorf("values size not match field size(field.size=%d, values.size=%d)", fsize, len(values)) 275 | } 276 | 277 | for idx, val := range values { 278 | v, err := validateFieldValue(tr.RecordSchema.Fields[idx].Type, val) 279 | if err != nil { 280 | return err 281 | } 282 | tr.Values[idx] = v 283 | } 284 | return nil 285 | } 286 | 287 | // FillData implement of IRecord interface 288 | func (tr *TupleRecord) fillData(data interface{}) error { 289 | datas, ok := data.([]interface{}) 290 | if !ok { 291 | return fmt.Errorf("data must be array") 292 | } 293 | //else if fsize := tr.RecordSchema.Size(); len(datas) != fsize { 294 | // return fmt.Errorf("data array size not match field size(field.size=%d, values.size=%d)", fsize, len(datas)) 295 | //} 296 | for idx, v := range datas { 297 | if v != nil { 298 | s, ok := v.(string) 299 | if !ok { 300 | return fmt.Errorf("data value type[%T] illegal", v) 301 | } 302 | tv, err := castValueFromString(s, tr.RecordSchema.Fields[idx].Type) 303 | if err != nil { 304 | return err 305 | } 306 | tr.Values[idx] = tv 307 | } 308 | } 309 | return nil 310 | } 311 | 312 | // GetData implement of IRecord interface 313 | func (tr *TupleRecord) GetData() interface{} { 314 | result := make([]interface{}, len(tr.Values)) 315 | for idx, val := range tr.Values { 316 | if val != nil { 317 | result[idx] = val.String() 318 | } else { 319 | result[idx] = nil 320 | } 321 | } 322 | return result 323 | } 324 | 325 | func (tr *TupleRecord) GetSize() int { 326 | size := 0 327 | for k, v := range tr.Attributes { 328 | size += len(k) + len(v) 329 | } 330 | 331 | for _, val := range tr.Values { 332 | if val != nil { 333 | size += len(val.String()) 334 | } 335 | } 336 | 337 | for _, val := range tr.Values { 338 | if val != nil { 339 | size += val.Size() 340 | } 341 | } 342 | return size 343 | } 344 | 345 | // GetBaseRecord get base record entry 346 | func (tr *TupleRecord) GetBaseRecord() BaseRecord { 347 | return tr.BaseRecord 348 | } 349 | 350 | func (tr *TupleRecord) SetBaseRecord(baseRecord BaseRecord) { 351 | tr.BaseRecord = baseRecord 352 | } 353 | 354 | type FailedRecord struct { 355 | Index int `json:"Index"` 356 | ErrorCode string `json:"ErrorCode"` 357 | ErrorMessage string `json:"ErrorMessage"` 358 | } 359 | -------------------------------------------------------------------------------- /datahub/restclient.go: -------------------------------------------------------------------------------- 1 | package datahub 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "crypto/hmac" 7 | "crypto/sha1" 8 | "encoding/base64" 9 | "fmt" 10 | "io" 11 | "net" 12 | "net/http" 13 | "os" 14 | "sort" 15 | "strconv" 16 | "strings" 17 | "time" 18 | 19 | log "github.com/sirupsen/logrus" 20 | ) 21 | 22 | const ( 23 | httpHeaderAcceptEncoding = "Accept-Encoding" 24 | httpHeaderAuthorization = "Authorization" 25 | httpHeadercacheControl = "Cache-Control" 26 | httpHeaderChunked = "chunked" 27 | httpHeaderClientVersion = "x-datahub-client-version" 28 | httpHeaderContentDisposition = "Content-Disposition" 29 | httpHeaderContentEncoding = "Content-Encoding" 30 | httpHeaderContentLength = "Content-Length" 31 | httpHeaderContentMD5 = "Content-MD5" 32 | httpHeaderContentType = "Content-Type" 33 | httpHeaderDate = "Date" 34 | httpHeaderETAG = "ETag" 35 | httpHeaderEXPIRES = "Expires" 36 | httpHeaderHost = "Host" 37 | httpHeaderlastModified = "Last-Modified" 38 | httpHeaderLocation = "Location" 39 | httpHeaderRange = "Range" 40 | httpHeaderRawSize = "x-datahub-content-raw-size" 41 | httpHeaderRequestAction = "x-datahub-request-action" 42 | httpHeaderRequestId = "x-datahub-request-id" 43 | httpHeaderSecurityToken = "x-datahub-security-token" 44 | httpHeaderDwarfToken = "x-datahub-dwarf-token" 45 | httpHeaderDwarfSign = "x-datahub-dwarf-sign" 46 | httpHeaderTransferEncoding = "Transfer-Encoding" 47 | httpHeaderUserAgent = "User-Agent" 48 | httpHeaderConnectorMode = "mode" 49 | ) 50 | 51 | const ( 52 | httpFilterQuery = "filter" 53 | httpJsonContent = "application/json" 54 | httpProtoContent = "application/x-protobuf" 55 | httpProtoBatchContent = "application/x-binary" 56 | httpPublistContent = "pub" 57 | httpSubscribeContent = "sub" 58 | ) 59 | 60 | const ( 61 | datahubHeadersPrefix = "x-datahub-" 62 | ) 63 | 64 | func init() { 65 | // Log as JSON instead of the default ASCII formatter. 66 | log.SetFormatter(&log.TextFormatter{}) 67 | 68 | // Output to stdout instead of the default stderr 69 | // Can be any io.Writer, see below for File examples 70 | log.SetOutput(os.Stdout) 71 | 72 | // Only log the level severity or above. 73 | dev := strings.ToLower(os.Getenv("GODATAHUB_DEV")) 74 | switch dev { 75 | case "true": 76 | log.SetLevel(log.DebugLevel) 77 | default: 78 | log.SetLevel(log.WarnLevel) 79 | } 80 | } 81 | 82 | // DialContextFn was defined to make code more readable. 83 | type DialContextFn func(ctx context.Context, network, address string) (net.Conn, error) 84 | 85 | // TraceDialContext implements our own dialer in order to trace conn info. 86 | func TraceDialContext(ctimeout time.Duration) DialContextFn { 87 | dialer := &net.Dialer{ 88 | Timeout: ctimeout, 89 | KeepAlive: ctimeout, 90 | } 91 | return func(ctx context.Context, network, addr string) (net.Conn, error) { 92 | conn, err := dialer.DialContext(ctx, network, addr) 93 | if err != nil { 94 | return nil, err 95 | } 96 | return conn, nil 97 | } 98 | } 99 | 100 | // RestClient rest客户端 101 | type RestClient struct { 102 | // Endpoint datahub服务的endpint 103 | Endpoint string 104 | // Useragent user agent 105 | Useragent string 106 | // HttpClient http client 107 | HttpClient *http.Client 108 | // Account 109 | Account Account 110 | CompressorType CompressorType 111 | Protocol Protocol 112 | } 113 | 114 | // NewRestClient create a new rest client 115 | func NewRestClient(endpoint string, useragent string, httpClient *http.Client, account Account, cType CompressorType, protocol Protocol) *RestClient { 116 | endpoint = strings.TrimSuffix(endpoint, "/") 117 | return &RestClient{ 118 | Endpoint: endpoint, 119 | Useragent: useragent, 120 | HttpClient: httpClient, 121 | Account: account, 122 | CompressorType: cType, 123 | Protocol: protocol, 124 | } 125 | } 126 | 127 | // Get send HTTP Get method request 128 | func (client *RestClient) Get(resource string, model RequestModel) ([]byte, *CommonResponseResult, error) { 129 | return client.request(http.MethodGet, resource, model) 130 | } 131 | 132 | // Post send HTTP Post method request 133 | func (client *RestClient) Post(resource string, model RequestModel) ([]byte, *CommonResponseResult, error) { 134 | return client.request(http.MethodPost, resource, model) 135 | } 136 | 137 | // Put send HTTP Put method request 138 | func (client *RestClient) Put(resource string, model RequestModel) (interface{}, *CommonResponseResult, error) { 139 | return client.request(http.MethodPut, resource, model) 140 | } 141 | 142 | // Delete send HTTP Delete method request 143 | func (client *RestClient) Delete(resource string, model RequestModel) (interface{}, *CommonResponseResult, error) { 144 | return client.request(http.MethodDelete, resource, model) 145 | } 146 | 147 | func (client *RestClient) request(method, resource string, requestModel RequestModel) ([]byte, *CommonResponseResult, error) { 148 | url := fmt.Sprintf("%s%s", client.Endpoint, resource) 149 | 150 | header := map[string]string{ 151 | httpHeaderClientVersion: DATAHUB_CLIENT_VERSION, 152 | httpHeaderDate: time.Now().UTC().Format(http.TimeFormat), 153 | httpHeaderUserAgent: client.Useragent, 154 | httpHeaderContentType: httpJsonContent, 155 | } 156 | 157 | //serialization 158 | reqBody, reqInfo, err := requestModel.requestBodyEncode() 159 | if err != nil { 160 | return nil, nil, err 161 | } 162 | 163 | client.compressIfNeed(header, &reqBody) 164 | reqSize := len(reqBody) 165 | 166 | if client.Account.GetSecurityToken() != "" { 167 | header[httpHeaderSecurityToken] = client.Account.GetSecurityToken() 168 | } 169 | 170 | if credential, ok := client.Account.(*DwarfCredential); ok && 171 | len(credential.DwarfToken) > 0 && len(credential.DwarfSign) > 0 { 172 | header[httpHeaderDwarfToken] = credential.DwarfToken 173 | header[httpHeaderDwarfSign] = credential.DwarfSign 174 | } 175 | 176 | req, err := http.NewRequest(method, url, bytes.NewBuffer(reqBody)) 177 | if err != nil { 178 | return nil, nil, err 179 | } 180 | 181 | for k, v := range requestModel.getExtraHeader() { 182 | header[k] = v 183 | } 184 | 185 | query := req.URL.Query() 186 | for k, v := range requestModel.getExtraQuery() { 187 | query.Add(k, v) 188 | } 189 | req.URL.RawQuery = query.Encode() 190 | 191 | for k, v := range header { 192 | req.Header.Add(k, v) 193 | } 194 | 195 | client.buildSignature(&req.Header, method, req.URL.RequestURI()) 196 | 197 | resp, err := client.HttpClient.Do(req) 198 | if err != nil { 199 | if strings.Contains(err.Error(), "EOF") { 200 | return nil, nil, newNetworkError(err) 201 | } 202 | return nil, nil, err 203 | } 204 | defer resp.Body.Close() 205 | respBody, err := io.ReadAll(resp.Body) 206 | 207 | if err != nil { 208 | return nil, nil, err 209 | } 210 | 211 | //decompress 212 | if err := client.decompress(&respBody, &resp.Header); err != nil { 213 | return nil, nil, err 214 | } 215 | 216 | //detect error 217 | respResult, err := newCommonResponseResult(resp.StatusCode, &resp.Header, respBody) 218 | if log.IsLevelEnabled(log.DebugLevel) { 219 | n := len(reqBody) 220 | if n > 100 { 221 | n = 100 222 | } 223 | log.Debugf("request id: %s\nrequest url: %s\nrequest headers: %v\nrequest body: %s\nresponse headers: %v\nresponse body: %s", 224 | respResult.RequestId, url, req.Header, string(reqBody[:n]), resp.Header, string(respBody)) 225 | } 226 | 227 | if err != nil { 228 | return nil, nil, err 229 | } 230 | 231 | respResult.RawSize = reqInfo.rawSzie 232 | respResult.ReqSize = reqSize 233 | return respBody, respResult, nil 234 | } 235 | 236 | func (client *RestClient) buildSignature(header *http.Header, method, url string) { 237 | builder := make([]string, 0, 5) 238 | builder = append(builder, method) 239 | builder = append(builder, header.Get(httpHeaderContentType)) 240 | builder = append(builder, header.Get(httpHeaderDate)) 241 | 242 | headersToSign := make(map[string][]string) 243 | for k, v := range *header { 244 | lower := strings.ToLower(k) 245 | if strings.HasPrefix(lower, datahubHeadersPrefix) { 246 | headersToSign[lower] = v 247 | } 248 | } 249 | 250 | keys := make([]string, len(headersToSign)) 251 | for k := range headersToSign { 252 | keys = append(keys, k) 253 | } 254 | sort.Strings(keys) 255 | for _, k := range keys { 256 | for _, v := range headersToSign[k] { 257 | builder = append(builder, fmt.Sprintf("%s:%s", k, v)) 258 | } 259 | } 260 | 261 | builder = append(builder, url) 262 | canonString := strings.Join(builder, "\n") 263 | 264 | hash := hmac.New(sha1.New, []byte(client.Account.GetAccountKey())) 265 | hash.Write([]byte(canonString)) 266 | crypto := hash.Sum(nil) 267 | signature := base64.StdEncoding.EncodeToString(crypto) 268 | authorization := fmt.Sprintf("DATAHUB %s:%s", client.Account.GetAccountId(), signature) 269 | 270 | header.Add(httpHeaderAuthorization, authorization) 271 | } 272 | 273 | func (client *RestClient) compressIfNeed(header map[string]string, reqBody *[]byte) { 274 | if client.CompressorType == NOCOMPRESS { 275 | return 276 | } 277 | compressor := getCompressor(client.CompressorType) 278 | if compressor != nil { 279 | header[httpHeaderAcceptEncoding] = client.CompressorType.String() 280 | compressedReqBody, err := compressor.Compress(*reqBody) 281 | if err != nil { 282 | log.Warningf("compress failed, give up compression, error:%v", err) 283 | } else if len(compressedReqBody) > len(*reqBody) { 284 | if log.IsLevelEnabled(log.DebugLevel) { 285 | log.Debugf("compress invalid, give up compression, rawSize:%d, compressSize:%d", 286 | len(*reqBody), len(compressedReqBody)) 287 | } 288 | } else { 289 | header[httpHeaderContentEncoding] = client.CompressorType.String() 290 | //header[httpHeaderAcceptEncoding] = client.CompressorType.String() 291 | header[httpHeaderRawSize] = strconv.Itoa(len(*reqBody)) 292 | *reqBody = compressedReqBody 293 | } 294 | } 295 | header[httpHeaderContentLength] = strconv.Itoa(len(*reqBody)) 296 | } 297 | 298 | func (client *RestClient) decompress(respBody *[]byte, header *http.Header) error { 299 | encoding := header.Get(httpHeaderContentEncoding) 300 | if encoding == "" { 301 | return nil 302 | } 303 | compressor := getCompressor(CompressorType(encoding)) 304 | if compressor == nil { 305 | return fmt.Errorf("not support the compress mode %s ", encoding) 306 | } 307 | rawSize := header.Get(httpHeaderRawSize) 308 | //str convert to int64 309 | size, err := strconv.ParseInt(rawSize, 10, 64) 310 | if err != nil { 311 | return err 312 | } 313 | 314 | buf, err := compressor.DeCompress(*respBody, size) 315 | if err != nil { 316 | return err 317 | } 318 | *respBody = buf 319 | return nil 320 | } 321 | -------------------------------------------------------------------------------- /datahub/rw_api_test.go: -------------------------------------------------------------------------------- 1 | package datahub 2 | 3 | import ( 4 | "encoding/hex" 5 | "net/http" 6 | "net/http/httptest" 7 | "testing" 8 | 9 | "github.com/aliyun/alibaba-cloud-sdk-go/sdk/requests" 10 | "github.com/stretchr/testify/assert" 11 | ) 12 | 13 | func TestPutBlobRecordsPB(t *testing.T) { 14 | ts := httptest.NewServer(http.HandlerFunc(func(writer http.ResponseWriter, request *http.Request) { 15 | assert.Equal(t, requests.POST, request.Method) 16 | assert.Equal(t, "/projects/test_project/topics/test_topic/shards", request.URL.EscapedPath()) 17 | assert.Equal(t, "application/x-protobuf", request.Header.Get("Content-Type")) 18 | assert.Equal(t, "pub", request.Header.Get("x-datahub-request-action")) 19 | 20 | writer.Header().Set("x-datahub-request-id", "request_id") 21 | writer.WriteHeader(http.StatusOK) 22 | respBody, err := hex.DecodeString("444855426c75b46a000000020800") 23 | assert.Nil(t, err) 24 | _, _ = writer.Write(respBody) 25 | })) 26 | 27 | defer ts.Close() 28 | 29 | cfg := NewDefaultConfig() 30 | cfg.Protocol = Protobuf 31 | dh := NewClientWithConfig(ts.URL, cfg, NewAliyunAccount("a", "a")) 32 | 33 | records := make([]IRecord, 0) 34 | record1 := NewBlobRecord([]byte("AAAA")) 35 | record1.ShardId = "0" 36 | record1.SetAttribute("key1", "value1") 37 | records = append(records, record1) 38 | 39 | record2 := NewBlobRecord([]byte("BBBB")) 40 | record2.ShardId = "1" 41 | record2.SetAttribute("key2", "value2") 42 | records = append(records, record2) 43 | 44 | ret, err := dh.PutRecords("test_project", "test_topic", records) 45 | assert.Nil(t, err) 46 | assert.NotNil(t, ret) 47 | assert.Equal(t, http.StatusOK, ret.StatusCode) 48 | assert.Equal(t, "request_id", ret.RequestId) 49 | assert.Equal(t, 0, ret.FailedRecordCount) 50 | assert.Equal(t, []FailedRecord(nil), ret.FailedRecords) 51 | } 52 | 53 | func TestPutBlobRecordsByShardPB(t *testing.T) { 54 | ts := httptest.NewServer(http.HandlerFunc(func(writer http.ResponseWriter, request *http.Request) { 55 | assert.Equal(t, requests.POST, request.Method) 56 | assert.Equal(t, "/projects/test_project/topics/test_topic/shards/0", request.URL.EscapedPath()) 57 | assert.Equal(t, "application/x-protobuf", request.Header.Get("Content-Type")) 58 | assert.Equal(t, "pub", request.Header.Get("x-datahub-request-action")) 59 | 60 | writer.Header().Set("x-datahub-request-id", "request_id") 61 | writer.WriteHeader(http.StatusOK) 62 | respBody, err := hex.DecodeString("444855426c75b46a000000020800") 63 | assert.Nil(t, err) 64 | _, _ = writer.Write(respBody) 65 | })) 66 | 67 | defer ts.Close() 68 | 69 | cfg := NewDefaultConfig() 70 | cfg.Protocol = Protobuf 71 | dh := NewClientWithConfig(ts.URL, cfg, NewAliyunAccount("a", "a")) 72 | 73 | records := make([]IRecord, 0) 74 | record1 := NewBlobRecord([]byte("AAAA")) 75 | record1.SetAttribute("key1", "value1") 76 | records = append(records, record1) 77 | 78 | record2 := NewBlobRecord([]byte("BBBB")) 79 | record2.SetAttribute("key2", "value2") 80 | records = append(records, record2) 81 | 82 | ret, err := dh.PutRecordsByShard("test_project", "test_topic", "0", records) 83 | assert.Nil(t, err) 84 | assert.NotNil(t, ret) 85 | assert.Equal(t, http.StatusOK, ret.StatusCode) 86 | assert.Equal(t, "request_id", ret.RequestId) 87 | } 88 | 89 | func TestPutTupleRecordsPB(t *testing.T) { 90 | ts := httptest.NewServer(http.HandlerFunc(func(writer http.ResponseWriter, request *http.Request) { 91 | assert.Equal(t, requests.POST, request.Method) 92 | assert.Equal(t, "/projects/test_project/topics/test_topic/shards", request.URL.EscapedPath()) 93 | assert.Equal(t, "application/x-protobuf", request.Header.Get("Content-Type")) 94 | assert.Equal(t, "pub", request.Header.Get("x-datahub-request-action")) 95 | 96 | writer.Header().Set("x-datahub-request-id", "request_id") 97 | writer.WriteHeader(http.StatusOK) 98 | respBody, err := hex.DecodeString("444855426c75b46a000000020800") 99 | assert.Nil(t, err) 100 | _, _ = writer.Write(respBody) 101 | })) 102 | 103 | defer ts.Close() 104 | 105 | cfg := NewDefaultConfig() 106 | cfg.Protocol = Protobuf 107 | dh := NewClientWithConfig(ts.URL, cfg, NewAliyunAccount("a", "a")) 108 | 109 | recordSchema := NewRecordSchema() 110 | recordSchema.AddField(Field{Name: "f1", Type: BIGINT, AllowNull: true}) 111 | recordSchema.AddField(Field{Name: "f2", Type: STRING, AllowNull: true}) 112 | 113 | records := make([]IRecord, 0) 114 | record1 := NewTupleRecord(recordSchema) 115 | record1.ShardId = "0" 116 | record1.SetValueByName("f1", 1) 117 | record1.SetValueByName("f2", "test") 118 | record1.SetAttribute("key1", "value1") 119 | records = append(records, record1) 120 | 121 | record2 := NewTupleRecord(recordSchema) 122 | record2.ShardId = "1" 123 | record2.SetValueByName("f1", 1) 124 | records = append(records, record2) 125 | 126 | ret, err := dh.PutRecords("test_project", "test_topic", records) 127 | assert.Nil(t, err) 128 | assert.NotNil(t, ret) 129 | assert.Equal(t, http.StatusOK, ret.StatusCode) 130 | assert.Equal(t, "request_id", ret.RequestId) 131 | assert.Equal(t, 0, ret.FailedRecordCount) 132 | assert.Equal(t, []FailedRecord(nil), ret.FailedRecords) 133 | } 134 | 135 | func TestPutTupleRecordsByShardPB(t *testing.T) { 136 | ts := httptest.NewServer(http.HandlerFunc(func(writer http.ResponseWriter, request *http.Request) { 137 | assert.Equal(t, requests.POST, request.Method) 138 | assert.Equal(t, "/projects/test_project/topics/test_topic/shards/0", request.URL.EscapedPath()) 139 | assert.Equal(t, "application/x-protobuf", request.Header.Get("Content-Type")) 140 | assert.Equal(t, "pub", request.Header.Get("x-datahub-request-action")) 141 | 142 | writer.Header().Set("x-datahub-request-id", "request_id") 143 | writer.WriteHeader(http.StatusOK) 144 | respBody, err := hex.DecodeString("444855426c75b46a000000020800") 145 | assert.Nil(t, err) 146 | _, _ = writer.Write(respBody) 147 | })) 148 | 149 | defer ts.Close() 150 | 151 | cfg := NewDefaultConfig() 152 | cfg.Protocol = Protobuf 153 | dh := NewClientWithConfig(ts.URL, cfg, NewAliyunAccount("a", "a")) 154 | 155 | recordSchema := NewRecordSchema() 156 | recordSchema.AddField(Field{Name: "f1", Type: BIGINT, AllowNull: true}) 157 | recordSchema.AddField(Field{Name: "f2", Type: STRING, AllowNull: true}) 158 | 159 | records := make([]IRecord, 0) 160 | record1 := NewTupleRecord(recordSchema) 161 | record1.SetValueByName("f1", 1) 162 | record1.SetValueByName("f2", "test") 163 | record1.SetAttribute("key1", "value1") 164 | records = append(records, record1) 165 | 166 | record2 := NewTupleRecord(recordSchema) 167 | record2.SetValueByName("f1", 1) 168 | records = append(records, record2) 169 | 170 | ret, err := dh.PutRecordsByShard("test_project", "test_topic", "0", records) 171 | assert.Nil(t, err) 172 | assert.NotNil(t, ret) 173 | assert.Equal(t, http.StatusOK, ret.StatusCode) 174 | assert.Equal(t, "request_id", ret.RequestId) 175 | } 176 | 177 | func TestGetBlobRecordsPB(t *testing.T) { 178 | ts := httptest.NewServer(http.HandlerFunc(func(writer http.ResponseWriter, request *http.Request) { 179 | assert.Equal(t, requests.POST, request.Method) 180 | assert.Equal(t, "/projects/test_project/topics/test_topic/shards/0", request.URL.EscapedPath()) 181 | assert.Equal(t, "application/x-protobuf", request.Header.Get("Content-Type")) 182 | assert.Equal(t, "sub", request.Header.Get("x-datahub-request-action")) 183 | 184 | writer.Header().Set("x-datahub-request-id", "request_id") 185 | writer.WriteHeader(http.StatusOK) 186 | 187 | respBody, err := hex.DecodeString("44485542ec953a350000009c0a20333030303630343232306164303030303030303030303030303030313030303010011800226b222033303030363034323230616430303030303030303030303030303030303030302a20333030303630343232306164303030303030303030303030303030313030303030003890cdbe92802f42100a0e0a046b657931120676616c7565314a080a060a04414141415000280330a1cdbe92802f") 188 | assert.Nil(t, err) 189 | _, _ = writer.Write(respBody) 190 | })) 191 | 192 | defer ts.Close() 193 | 194 | cfg := NewDefaultConfig() 195 | cfg.Protocol = Protobuf 196 | dh := NewClientWithConfig(ts.URL, cfg, NewAliyunAccount("a", "a")) 197 | 198 | ret, err := dh.GetBlobRecords("test_project", "test_topic", "0", "30005af19b3800000000000000000000", 1) 199 | assert.Nil(t, err) 200 | assert.NotNil(t, ret) 201 | assert.Equal(t, http.StatusOK, ret.StatusCode) 202 | assert.Equal(t, "request_id", ret.RequestId) 203 | assert.Equal(t, "3000604220ad00000000000000010000", ret.NextCursor) 204 | assert.Equal(t, 1, ret.RecordCount) 205 | assert.Equal(t, int64(0), ret.StartSequence) 206 | assert.Nil(t, ret.RecordSchema) 207 | assert.Equal(t, 1, len(ret.Records)) 208 | data, ok := ret.Records[0].(*BlobRecord) 209 | assert.True(t, ok) 210 | assert.Equal(t, "AAAA", string(data.RawData)) 211 | assert.Equal(t, map[string]string(map[string]string{"key1": "value1"}), data.Attributes) 212 | } 213 | 214 | func TestGetTupleRecordsPB(t *testing.T) { 215 | ts := httptest.NewServer(http.HandlerFunc(func(writer http.ResponseWriter, request *http.Request) { 216 | assert.Equal(t, requests.POST, request.Method) 217 | assert.Equal(t, "/projects/test_project/topics/test_topic/shards/0", request.URL.EscapedPath()) 218 | assert.Equal(t, "application/x-protobuf", request.Header.Get("Content-Type")) 219 | assert.Equal(t, "sub", request.Header.Get("x-datahub-request-action")) 220 | 221 | writer.Header().Set("x-datahub-request-id", "request_id") 222 | writer.WriteHeader(http.StatusOK) 223 | 224 | respBody, err := hex.DecodeString("44485542c0c58c45000000a10a203330303036303432316437313030303030303030303030303030303130303030100118002270222033303030363034323164373130303030303030303030303030303030303030302a203330303036303432316437313030303030303030303030303030303130303030300038f8858c92802f42100a0e0a046b657931120676616c7565314a0d0a060a04746573740a030a01315000280530edb28f92802f") 225 | assert.Nil(t, err) 226 | _, _ = writer.Write(respBody) 227 | })) 228 | 229 | defer ts.Close() 230 | 231 | cfg := NewDefaultConfig() 232 | cfg.Protocol = Protobuf 233 | dh := NewClientWithConfig(ts.URL, cfg, NewAliyunAccount("a", "a")) 234 | 235 | recordSchema := NewRecordSchema() 236 | recordSchema.AddField(Field{Name: "field1", Type: STRING, AllowNull: true}) 237 | recordSchema.AddField(Field{Name: "field2", Type: BIGINT, AllowNull: false}) 238 | 239 | ret, err := dh.GetTupleRecords("test_project", "test_topic", "0", "30005af19b3800000000000000000000", 1, recordSchema) 240 | assert.Nil(t, err) 241 | assert.NotNil(t, ret) 242 | assert.Equal(t, http.StatusOK, ret.StatusCode) 243 | assert.Equal(t, "request_id", ret.RequestId) 244 | assert.Equal(t, "300060421d7100000000000000010000", ret.NextCursor) 245 | assert.Equal(t, 1, ret.RecordCount) 246 | assert.Equal(t, int64(0), ret.StartSequence) 247 | assert.NotNil(t, ret.RecordSchema) 248 | assert.Equal(t, 1, len(ret.Records)) 249 | data, ok := ret.Records[0].(*TupleRecord) 250 | assert.True(t, ok) 251 | assert.EqualValues(t, "test", data.Values[0]) 252 | assert.EqualValues(t, 1, data.Values[1]) 253 | } 254 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /datahub/datahub.go: -------------------------------------------------------------------------------- 1 | package datahub 2 | 3 | func NewClientWithConfig(endpoint string, config *Config, account Account) DataHubApi { 4 | config.UserAgent = DefaultUserAgent() + " " + config.UserAgent 5 | if config.HttpClient == nil { 6 | config.HttpClient = DefaultHttpClient() 7 | } 8 | if !validateCompressorType(config.CompressorType) { 9 | config.CompressorType = LZ4 10 | } 11 | 12 | userAgent := DefaultUserAgent() 13 | if config.UserAgent != "" { 14 | userAgent = userAgent + " " + config.UserAgent 15 | } 16 | 17 | dh := &DataHub{ 18 | Client: NewRestClient(endpoint, userAgent, config.HttpClient, 19 | account, config.CompressorType, config.Protocol), 20 | } 21 | 22 | if config.Protocol == Batch { 23 | // compress data in batch record, no need to compress http body 24 | if config.CompressorType != NOCOMPRESS { 25 | dh.Client.CompressorType = NOCOMPRESS 26 | } 27 | 28 | return &DataHubBatch{ 29 | DataHub: *dh, 30 | compressType: config.CompressorType, 31 | } 32 | } else { 33 | return &DataHubPB{ 34 | DataHub: *dh, 35 | } 36 | } 37 | } 38 | 39 | func New(accessId, accessKey, endpoint string) DataHubApi { 40 | config := NewDefaultConfig() 41 | return NewClientWithConfig(endpoint, config, NewAliyunAccount(accessId, accessKey)) 42 | } 43 | 44 | func NewBatchClient(accessId, accessKey, endpoint string) DataHubApi { 45 | config := NewDefaultConfig() 46 | config.Protocol = Batch 47 | return NewClientWithConfig(endpoint, config, NewAliyunAccount(accessId, accessKey)) 48 | } 49 | 50 | // Datahub provides restful apis for visiting examples service. 51 | type DataHubApi interface { 52 | setUserAgent(userAgent string) 53 | 54 | // List all projects the user owns. 55 | ListProject() (*ListProjectResult, error) 56 | 57 | // List all projects the user owns with filter. 58 | ListProjectWithFilter(filter string) (*ListProjectResult, error) 59 | 60 | // Create a examples project. 61 | CreateProject(projectName, comment string) (*CreateProjectResult, error) 62 | 63 | // Update project information. Only support comment 64 | UpdateProject(projectName, comment string) (*UpdateProjectResult, error) 65 | 66 | // Delete the specified project. If any topics exist in the project, the delete operation will fail. 67 | DeleteProject(projectName string) (*DeleteProjectResult, error) 68 | 69 | // Get the information of the specified project. 70 | GetProject(projectName string) (*GetProjectResult, error) 71 | 72 | // Update project vpc white list. 73 | UpdateProjectVpcWhitelist(projectName, vpcIds string) (*UpdateProjectVpcWhitelistResult, error) 74 | 75 | // Wait for all shards' status of this topic is ACTIVE. Default timeout is 60s. 76 | WaitAllShardsReady(projectName, topicName string) bool 77 | 78 | // Wait for all shards' status of this topic is ACTIVE. 79 | // The unit is seconds. 80 | // If timeout < 0, it will block util all shards ready 81 | WaitAllShardsReadyWithTime(projectName, topicName string, timeout int64) bool 82 | 83 | // List all topics in the project. 84 | ListTopic(projectName string) (*ListTopicResult, error) 85 | 86 | // List all topics in the project with filter. 87 | ListTopicWithFilter(projectName, filter string) (*ListTopicResult, error) 88 | 89 | // Create a examples topic with type: BLOB 90 | CreateBlobTopic(projectName, topicName, comment string, shardCount, lifeCycle int) (*CreateBlobTopicResult, error) 91 | 92 | // Create a examples topic with type: TUPLE 93 | CreateTupleTopic(projectName, topicName, comment string, shardCount, lifeCycle int, recordSchema *RecordSchema) (*CreateTupleTopicResult, error) 94 | 95 | // Create topic with specific parameter 96 | CreateTopicWithPara(projectName, topicName string, para *CreateTopicParameter) (*CreateTopicWithParaResult, error) 97 | 98 | // Update topic meta information. 99 | UpdateTopic(projectName, topicName, comment string) (*UpdateTopicResult, error) 100 | 101 | // Update topic meta information. Only support comment and lifeCycle now. 102 | UpdateTopicWithPara(projectName, topicName string, para *UpdateTopicParameter) (*UpdateTopicResult, error) 103 | 104 | // Delete a specified topic. 105 | DeleteTopic(projectName, topicName string) (*DeleteTopicResult, error) 106 | 107 | // Get the information of the specified topic. 108 | GetTopic(projectName, topicName string) (*GetTopicResult, error) 109 | 110 | // List shard information {ShardEntry} of a topic. 111 | ListShard(projectName, topicName string) (*ListShardResult, error) 112 | 113 | // Split a shard. In function, sdk will automatically compute the split key which is used to split shard. 114 | SplitShard(projectName, topicName, shardId string) (*SplitShardResult, error) 115 | 116 | // Split a shard by the specified splitKey. 117 | SplitShardBySplitKey(projectName, topicName, shardId, splitKey string) (*SplitShardResult, error) 118 | 119 | // Merge the specified shard and its adjacent shard. Only adjacent shards can be merged. 120 | MergeShard(projectName, topicName, shardId, adjacentShardId string) (*MergeShardResult, error) 121 | 122 | // Extend shard num. 123 | ExtendShard(projectName, topicName string, shardCount int) (*ExtendShardResult, error) 124 | 125 | // Get the data cursor of a shard. This function support OLDEST, LATEST, SYSTEM_TIME and SEQUENCE. 126 | // If choose OLDEST or LATEST, the last parameter will not be needed. 127 | // if choose SYSTEM_TIME or SEQUENCE. it needs to a parameter as sequence num or timestamp. 128 | GetCursor(projectName, topicName, shardId string, ctype CursorType, param ...int64) (*GetCursorResult, error) 129 | 130 | // Write data records into a DataHub topic. 131 | // The PutRecordsResult includes unsuccessfully processed records. 132 | // Datahub attempts to process all records in each record. 133 | // A single record failure does not stop the processing of subsequent records. 134 | PutRecords(projectName, topicName string, records []IRecord) (*PutRecordsResult, error) 135 | 136 | PutRecordsByShard(projectName, topicName, shardId string, records []IRecord) (*PutRecordsByShardResult, error) 137 | 138 | // Get the TUPLE records of a shard. 139 | GetTupleRecords(projectName, topicName, shardId, cursor string, limit int, recordSchema *RecordSchema) (*GetRecordsResult, error) 140 | 141 | // Get the BLOB records of a shard. 142 | GetBlobRecords(projectName, topicName, shardId, cursor string, limit int) (*GetRecordsResult, error) 143 | 144 | // Append a field to a TUPLE topic. 145 | // Field AllowNull should be true. 146 | AppendField(projectName, topicName string, field Field) (*AppendFieldResult, error) 147 | 148 | // Get metering info of the specified shard 149 | GetMeterInfo(projectName, topicName, shardId string) (*GetMeterInfoResult, error) 150 | 151 | // List name of connectors. 152 | ListConnector(projectName, topicName string) (*ListConnectorResult, error) 153 | 154 | // Create data connectors. 155 | CreateConnector(projectName, topicName string, cType ConnectorType, columnFields []string, config interface{}) (*CreateConnectorResult, error) 156 | 157 | // Create connector with start time(unit:ms) 158 | CreateConnectorWithStartTime(projectName, topicName string, cType ConnectorType, 159 | columnFields []string, sinkStartTime int64, config interface{}) (*CreateConnectorResult, error) 160 | 161 | // Create connector with parameter 162 | CreateConnectorWithPara(projectName, topicName string, para *CreateConnectorParameter) (*CreateConnectorResult, error) 163 | 164 | // Update connector config of the specified data connector. 165 | // Config should be SinkOdpsConfig, SinkOssConfig ... 166 | UpdateConnector(projectName, topicName, connectorId string, config interface{}) (*UpdateConnectorResult, error) 167 | 168 | // Update connector with parameter 169 | UpdateConnectorWithPara(projectName, topicName, connectorId string, para *UpdateConnectorParameter) (*UpdateConnectorResult, error) 170 | 171 | // Delete a data connector. 172 | DeleteConnector(projectName, topicName, connectorId string) (*DeleteConnectorResult, error) 173 | 174 | // Get information of the specified data connector. 175 | GetConnector(projectName, topicName, connectorId string) (*GetConnectorResult, error) 176 | 177 | // Get the done time of a data connector. This method mainly used to get MaxCompute synchronize point. 178 | GetConnectorDoneTime(projectName, topicName, connectorId string) (*GetConnectorDoneTimeResult, error) 179 | 180 | // Get the detail information of the shard task which belongs to the specified data connector. 181 | GetConnectorShardStatus(projectName, topicName, connectorId string) (*GetConnectorShardStatusResult, error) 182 | 183 | // Get the detail information of the shard task which belongs to the specified data connector. 184 | GetConnectorShardStatusByShard(projectName, topicName, connectorId, shardId string) (*GetConnectorShardStatusByShardResult, error) 185 | 186 | // Reload a data connector. 187 | ReloadConnector(projectName, topicName, connectorId string) (*ReloadConnectorResult, error) 188 | 189 | // Reload the specified shard of the data connector. 190 | ReloadConnectorByShard(projectName, topicName, connectorId, shardId string) (*ReloadConnectorByShardResult, error) 191 | 192 | // Update the state of the data connector 193 | UpdateConnectorState(projectName, topicName, connectorId string, state ConnectorState) (*UpdateConnectorStateResult, error) 194 | 195 | // Update connector sink offset. The operation must be operated after connector stopped. 196 | UpdateConnectorOffset(projectName, topicName, connectorId, shardId string, offset ConnectorOffset) (*UpdateConnectorOffsetResult, error) 197 | 198 | // Append data connector field. 199 | // Before run this method, you should ensure that this field is in both the topic and the connector. 200 | AppendConnectorField(projectName, topicName, connectorId, fieldName string) (*AppendConnectorFieldResult, error) 201 | 202 | // List subscriptions in the topic. 203 | ListSubscription(projectName, topicName string, pageIndex, pageSize int) (*ListSubscriptionResult, error) 204 | 205 | // Create a subscription, and then you should commit offsets with this subscription. 206 | CreateSubscription(projectName, topicName, comment string) (*CreateSubscriptionResult, error) 207 | 208 | // Update a subscription. Now only support update comment information. 209 | UpdateSubscription(projectName, topicName, subId, comment string) (*UpdateSubscriptionResult, error) 210 | 211 | // Delete a subscription. 212 | DeleteSubscription(projectName, topicName, subId string) (*DeleteSubscriptionResult, error) 213 | 214 | // Get the detail information of a subscription. 215 | GetSubscription(projectName, topicName, subId string) (*GetSubscriptionResult, error) 216 | 217 | // Update a subscription' state. You can change the state of a subscription to SUB_ONLINE or SUB_OFFLINE. 218 | // When offline, you can not commit offsets of the subscription. 219 | UpdateSubscriptionState(projectName, topicName, subId string, state SubscriptionState) (*UpdateSubscriptionStateResult, error) 220 | 221 | // Init and get a subscription session, and returns offset if any offset stored before. 222 | // Subscription should be initialized before use. This operation makes sure that only one client use this subscription. 223 | // If this function be called in elsewhere, the seesion will be invalid and can not commit offsets of the subscription. 224 | OpenSubscriptionSession(projectName, topicName, subId string, shardIds []string) (*OpenSubscriptionSessionResult, error) 225 | 226 | // Get offsets of a subscription.This method dost not return sessionId in SubscriptionOffset. 227 | // Only the SubscriptionOffset containing sessionId can commit offset. 228 | GetSubscriptionOffset(projectName, topicName, subId string, shardIds []string) (*GetSubscriptionOffsetResult, error) 229 | 230 | // Update offsets of shards to server. This operation allows you store offsets on the server side. 231 | CommitSubscriptionOffset(projectName, topicName, subId string, offsets map[string]SubscriptionOffset) (*CommitSubscriptionOffsetResult, error) 232 | 233 | // Reset offsets of shards to server. This operation allows you reset offsets on the server side. 234 | ResetSubscriptionOffset(projectName, topicName, subId string, offsets map[string]SubscriptionOffset) (*ResetSubscriptionOffsetResult, error) 235 | 236 | // Heartbeat request to let server know consumer status. 237 | Heartbeat(projectName, topicName, consumerGroup, consumerId string, versionId int64, holdShardList, readEndShardList []string) (*HeartbeatResult, error) 238 | 239 | // Join a consumer group. 240 | JoinGroup(projectName, topicName, consumerGroup string, sessionTimeout int64) (*JoinGroupResult, error) 241 | 242 | // Sync consumer group info. 243 | SyncGroup(projectName, topicName, consumerGroup, consumerId string, versionId int64, releaseShardList, readEndShardList []string) (*SyncGroupResult, error) 244 | 245 | // Leave consumer group info. 246 | LeaveGroup(projectName, topicName, consumerGroup, consumerId string, versionId int64) (*LeaveGroupResult, error) 247 | 248 | // List topic schema. 249 | ListTopicSchema(projectName, topicName string) (*ListTopicSchemaResult, error) 250 | 251 | // Get topic schema by versionId. 252 | GetTopicSchemaByVersion(projectName, topicName string, versionId int) (*GetTopicSchemaResult, error) 253 | 254 | // Get topic schema by schema string. 255 | GetTopicSchemaBySchema(projectName, topicName string, recordSchema *RecordSchema) (*GetTopicSchemaResult, error) 256 | 257 | // Register schema to a topic. 258 | RegisterTopicSchema(projectName, topicName string, recordSchema *RecordSchema) (*RegisterTopicSchemaResult, error) 259 | 260 | // Delete topic schema by versionId 261 | DeleteTopicSchema(projectName, topicName string, versionId int) (*DeleteTopicSchemaResult, error) 262 | } 263 | --------------------------------------------------------------------------------