├── .circleci └── config.yml ├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── affiliation ├── dto.go ├── identityprovider.go ├── identityprovider_test.go └── mocks │ └── db_connector.go ├── affs.go ├── api.go ├── bugzilla ├── README.md ├── const.go ├── dto.go ├── enricher.go ├── enricher_test.go ├── fetcher.go ├── html_parser.go ├── html_parser_test.go ├── manager.go ├── mapping.go ├── mocks │ ├── activity.html │ ├── affiliation_client.go │ ├── auth0_client.go │ ├── es_client_provider.go │ ├── http_client_provider.go │ └── slack_provider.go └── raw-sample.json ├── bugzillarest ├── const.go ├── dto.go ├── enricher.go ├── enricher_test.go ├── fetcher.go ├── fetcher_test.go ├── manager.go ├── manager_test.go ├── mapping.go └── mocks │ ├── affiliation_client.go │ ├── auth0_client_provider.go │ ├── auth_client_provider.go │ ├── es_client_provider.go │ └── http_client_provider.go ├── build └── options.go ├── cmd └── dads │ └── dads.go ├── compare_rocketchat.sh ├── const.go ├── content ├── context.go ├── context_test.go ├── coverage.cov ├── db └── connector.go ├── detect-removed-commits.sh ├── dockerhub ├── README.md ├── const.go ├── dto.go ├── enricher.go ├── enricher_test.go ├── fetcher.go ├── fetcher_test.go ├── manager.go ├── mapping.go └── mocks │ ├── auth0_client.go │ ├── es_client_provider.go │ └── http_client_provider.go ├── ds.go ├── dsconfluence.go ├── dsgerrit.go ├── dsgit.go ├── dsgithub.go ├── dsgroupsio.go ├── dsjira.go ├── dsrocketchat.go ├── dsstub.go ├── email.go ├── email_test.go ├── error.go ├── es.go ├── exec.go ├── flag.go ├── gitops.py ├── go.mod ├── go.sum ├── googlegroups ├── cmd.sh ├── const.go ├── dto.go ├── enricher.go ├── enricher_test.go ├── fetcher.go ├── fetcher_test.go ├── ggmbox.py ├── manager.go └── mocks │ ├── affiliation_client.go │ └── auth_client_provider.go ├── jenkins ├── const.go ├── enricher.go ├── enricher_test.go ├── fetcher.go ├── fetcher_test.go ├── manager.go ├── mapping.go └── models.go ├── json.go ├── json.json ├── log.go ├── map.json ├── mbox.go ├── notused ├── dsgroupsio.g_ ├── mbox.g_ └── mbox_test.g_ ├── pipermail ├── const.go ├── downloader.go ├── dto.go ├── enricher.go ├── enricher_test.go ├── fetcher.go ├── fetcher_test.go ├── manager.go ├── mapping.go ├── mbox.go ├── mocks │ ├── affiliation_client.go │ ├── es_client_provider.go │ └── http_client_provider.go └── scrapper.go ├── redacted.go ├── regexp_test.go ├── scripts ├── bugzilla.sh ├── bugzillarest.sh ├── check_confluence.sh ├── compare_confluence.sh ├── compare_gerrit.sh ├── compare_git.sh ├── compare_github.sh ├── compare_groupsio.sh ├── compare_jira.sh ├── confluences.sh ├── coverage.sh ├── debug.sh ├── dockerhub.sh ├── find.sh ├── fix_loc.sh ├── for_each_go_file.sh ├── gerrits.sh ├── git-non-local.sh ├── git.sh ├── git_log.sh ├── git_trailers.sh ├── github.sh ├── github_api_calls.sh ├── googlegroups.sh ├── jenkins.sh ├── manual_gerrit.sh ├── mk.go.bak ├── pipermail.sh └── vet_files.sh ├── sds-rocketchat.sh ├── sql.go ├── sql └── update_empty_names.sql ├── test └── time.go ├── threads.go ├── threads_test.go ├── time.go ├── time_test.go ├── token.go ├── util ├── dto.go ├── failureHandler.go └── helper.go ├── utils.go ├── uuid.go ├── uuid.py └── uuid ├── Dockerfile ├── README.md ├── build.sh ├── compile.sh ├── compile_cython.sh ├── requirements.txt ├── run.sh └── uuid.pyx /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | # Check https://circleci.com/docs/2.0/language-go/ for more details 2 | version: 2 3 | jobs: 4 | build: 5 | docker: 6 | - image: golang:1.15 7 | working_directory: /go/src/github.com/{{ORG_NAME}}/{{REPO_NAME}} 8 | steps: 9 | - checkout 10 | 11 | - run: go get -u golang.org/x/lint/golint 12 | - run: go get golang.org/x/tools/cmd/goimports 13 | - run: go get github.com/jgautheron/usedexports 14 | - run: go get -u github.com/kisielk/errcheck 15 | - run: make check 16 | - run: make test 17 | - run: make test-coverage 18 | - run: make build 19 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Binaries for programs and plugins 2 | *.exe 3 | *.exe~ 4 | *.dll 5 | *.so 6 | *.dylib 7 | *.log 8 | # Test binary, built with `go test -c` 9 | *.test 10 | 11 | # Output of the go coverage tool, specifically when used with LiteIDE 12 | *.out 13 | 14 | # Dependency directories (remove the comment below to include it) 15 | # vendor/ 16 | 17 | # LG 18 | *.swp 19 | /dads 20 | /out 21 | /out-raw 22 | /out-rich 23 | *.secret 24 | *.mbox 25 | /data 26 | /local.sql 27 | /dads.json 28 | /dads.txt 29 | /p2o.json 30 | /p2o.txt 31 | /log.txt 32 | /DS.md 33 | /report.txt 34 | /login.json 35 | /cookies.curl 36 | /groups.txt 37 | /check.txt 38 | /uuid/uuid 39 | /uuid/duuid 40 | /git-*.json 41 | /github-*.json 42 | /scripts/git.trailers 43 | 44 | .DS_Store 45 | .idea 46 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | GO_LIB_FILES=affs.go context.go const.go ds.go dsconfluence.go dsgerrit.go dsgit.go dsgithub.go dsgroupsio.go dsjira.go dsrocketchat.go dsstub.go email.go es.go error.go exec.go json.go log.go mbox.go redacted.go sql.go threads.go time.go utils.go uuid.go api.go token.go 2 | GO_BIN_FILES=cmd/dads/dads.go 3 | GO_TEST_FILES=context_test.go email_test.go regexp_test.go time_test.go threads_test.go 4 | GO_LIBTEST_FILES=test/time.go 5 | GO_BIN_CMDS=github.com/LF-Engineering/da-ds/cmd/dads 6 | # for race CGO_ENABLED=1 7 | # GO_ENV=CGO_ENABLED=1 8 | GO_ENV=CGO_ENABLED=0 9 | # for race -race 10 | # GO_BUILD=go build -ldflags '-s -w' -race 11 | GO_BUILD=go build -ldflags '-s -w' 12 | GO_INSTALL=go install -ldflags '-s' 13 | GO_FMT=gofmt -s -w 14 | GO_LINT=golint -set_exit_status 15 | GO_VET=go vet 16 | GO_IMPORTS=goimports -w 17 | GO_USEDEXPORTS=usedexports 18 | GO_ERRCHECK=errcheck -asserts -ignoretests -ignoregenerated 19 | GO_TEST=go test 20 | BINARIES=dads 21 | STRIP=strip 22 | PKG_LIST := $(shell go list ./... | grep -v mock) 23 | PRODUCT_NAME?=da-ds 24 | COMMIT=`git rev-parse --short HEAD` 25 | LDFLAGS=-ldflags "-s -w -extldflags '-static' -X build.GitCommit=$(COMMIT)" 26 | 27 | all: check build 28 | 29 | build: cmd/dads/dads.go ${GO_LIB_FILES} 30 | ${GO_ENV} ${GO_BUILD} -o dads ${LDFLAGS} cmd/dads/dads.go 31 | 32 | fmt: ${GO_BIN_FILES} ${GO_LIB_FILES} ${GO_TEST_FILES} ${GO_LIBTEST_FILES} 33 | ./scripts/for_each_go_file.sh "${GO_FMT}" 34 | 35 | lint: ## Lint the files 36 | golint -set_exit_status $(PKG_LIST) 37 | 38 | vet: ${GO_BIN_FILES} ${GO_LIB_FILES} ${GO_TEST_FILES} ${GO_LIBTEST_FILES} 39 | go vet $(PKG_LIST) 40 | 41 | imports: ${GO_BIN_FILES} ${GO_LIB_FILES} ${GO_TEST_FILES} ${GO_LIBTEST_FILES} 42 | ./scripts/for_each_go_file.sh "${GO_IMPORTS}" 43 | 44 | usedexports: ${GO_BIN_FILES} ${GO_LIB_FILES} ${GO_TEST_FILES} ${GO_LIBTEST_FILES} 45 | ${GO_USEDEXPORTS} ./... 46 | 47 | errcheck: ${GO_BIN_FILES} ${GO_LIB_FILES} ${GO_TEST_FILES} ${GO_LIBTEST_FILES} 48 | ${GO_ERRCHECK} ./... 49 | 50 | test: 51 | go test -v $(PKG_LIST) 52 | 53 | test-coverage: 54 | ./scripts/coverage.sh 55 | 56 | check: fmt lint imports vet usedexports errcheck 57 | 58 | install: check ${BINARIES} 59 | ${GO_ENV} ${GO_INSTALL} ${GO_BIN_CMDS} 60 | 61 | strip: ${BINARIES} 62 | ${STRIP} ${BINARIES} 63 | 64 | clean: 65 | rm -f ${BINARIES} 66 | 67 | .PHONY: test build 68 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # dads 2 | 3 | DevAnalytics data source 4 | 5 | - Jira: implemented 6 | - Groups.io: wip 7 | -------------------------------------------------------------------------------- /affiliation/dto.go: -------------------------------------------------------------------------------- 1 | package affiliation 2 | 3 | import "database/sql" 4 | 5 | // Identity contains sortingHat user Identity 6 | type Identity struct { 7 | ID sql.NullString 8 | UUID sql.NullString 9 | Name sql.NullString 10 | Username sql.NullString 11 | Email sql.NullString 12 | Domain sql.NullString 13 | Gender sql.NullString 14 | GenderACC *int `db:"gender_acc"` 15 | OrgName sql.NullString 16 | IsBot bool `db:"is_bot"` 17 | MultiOrgNames []string 18 | } 19 | -------------------------------------------------------------------------------- /affiliation/identityprovider.go: -------------------------------------------------------------------------------- 1 | package affiliation 2 | 3 | import ( 4 | "fmt" 5 | "time" 6 | ) 7 | 8 | // IdentityProvider manages user identities 9 | type IdentityProvider struct { 10 | db DBConnector 11 | } 12 | 13 | // DBConnector contains dataAccess functionalities 14 | type DBConnector interface { 15 | Get(dest interface{}, query string, args ...interface{}) error 16 | Select(dest interface{}, query string, args ...interface{}) error 17 | } 18 | 19 | // NewIdentityProvider initiates a new IdentityProvider instance 20 | func NewIdentityProvider(db DBConnector) *IdentityProvider { 21 | return &IdentityProvider{db: db} 22 | } 23 | 24 | // GetIdentity ... 25 | func (i *IdentityProvider) GetIdentity(key string, val string) (*Identity, error) { 26 | query := fmt.Sprintf(`SELECT 27 | identities.id, 28 | identities.uuid, 29 | profiles.name, 30 | identities.username, 31 | profiles.email, 32 | profiles.gender, 33 | profiles.gender_acc, 34 | profiles.is_bot 35 | FROM 36 | identities LEFT JOIN (profiles) 37 | ON (identities.uuid = profiles.uuid) 38 | where 39 | identities.%s='%s';`, key, val) 40 | 41 | var identity Identity 42 | err := i.db.Get(&identity, query) 43 | if err != nil { 44 | return nil, err 45 | } 46 | 47 | return &identity, nil 48 | } 49 | 50 | // GetOrganizations gets user's enrolled organizations until given time 51 | func (i *IdentityProvider) GetOrganizations(uuid string, date time.Time) ([]string, error) { 52 | query := fmt.Sprintf(`select distinct o.name 53 | from enrollments e, organizations o 54 | where e.organization_id = o.id and 55 | e.uuid = '%s' and 56 | '%s' between e.start and e.end order by e.id desc`, 57 | uuid, date.Format(time.RFC3339)) 58 | 59 | var multiOrg []string 60 | err := i.db.Select(&multiOrg, query) 61 | if err != nil { 62 | return nil, err 63 | } 64 | 65 | return multiOrg, nil 66 | } 67 | -------------------------------------------------------------------------------- /affiliation/identityprovider_test.go: -------------------------------------------------------------------------------- 1 | package affiliation 2 | 3 | import ( 4 | "database/sql" 5 | "fmt" 6 | "reflect" 7 | "testing" 8 | "time" 9 | 10 | "github.com/stretchr/testify/mock" 11 | 12 | "github.com/LF-Engineering/da-ds/affiliation/mocks" 13 | 14 | "github.com/stretchr/testify/assert" 15 | ) 16 | 17 | func TestGetIdentityByUsername(t *testing.T) { 18 | // Arrange 19 | dataBase := &mocks.DBConnector{} 20 | key := "username" 21 | val := "vvavrychuk" 22 | query := fmt.Sprintf(`SELECT 23 | identities.id, 24 | identities.uuid, 25 | profiles.name, 26 | identities.username, 27 | profiles.email, 28 | profiles.gender, 29 | profiles.gender_acc, 30 | profiles.is_bot 31 | FROM 32 | identities LEFT JOIN (profiles) 33 | ON (identities.uuid = profiles.uuid) 34 | where 35 | identities.%s='%s';`, key, val) 36 | 37 | ide := Identity{} 38 | dataBase.On("Get", &ide, query).Run(func(args mock.Arguments) { 39 | email := "ayman@gmail.com" 40 | o := Identity{ 41 | ID: sql.NullString{String: "5", Valid: true}, 42 | UUID: sql.NullString{String: "5", Valid: true}, 43 | Name: sql.NullString{String: "vvavrychuk", Valid: true}, 44 | Username: sql.NullString{String: "vvavrychuk", Valid: true}, 45 | Email: sql.NullString{String: email, Valid: true}, 46 | Domain: sql.NullString{String: "inc.com", Valid: true}, 47 | Gender: sql.NullString{}, 48 | GenderACC: nil, 49 | OrgName: sql.NullString{}, 50 | IsBot: false, 51 | } 52 | reflect.ValueOf(args.Get(0)).Elem().Set(reflect.ValueOf(o)) 53 | }).Return(nil) 54 | 55 | // Act 56 | srv := NewIdentityProvider(dataBase) 57 | res, err := srv.GetIdentity(key, val) 58 | // Assert 59 | assert.NoError(t, err) 60 | assert.Equal(t, res.UUID.String, "5") 61 | assert.Equal(t, res.Domain.String, "inc.com") 62 | assert.Equal(t, res.Email.String, "ayman@gmail.com") 63 | assert.Equal(t, res.IsBot, false) 64 | 65 | } 66 | 67 | func TestGetOrganizations(t *testing.T) { 68 | // Arrange 69 | dataBase := &mocks.DBConnector{} 70 | fakeUUID := "fakeUUID" 71 | date := time.Now() 72 | query := fmt.Sprintf(`select distinct o.name 73 | from enrollments e, organizations o 74 | where e.organization_id = o.id and 75 | e.uuid = '%s' and 76 | '%s' between e.start and e.end order by e.id desc`, 77 | fakeUUID, date.Format(time.RFC3339)) 78 | var orgs []string 79 | dataBase.On("Select", &orgs, query).Run(func(args mock.Arguments) { 80 | o := []string{ 81 | "LF", 82 | "LFX", 83 | } 84 | reflect.ValueOf(args.Get(0)).Elem().Set(reflect.ValueOf(o)) 85 | }).Return(nil) 86 | 87 | // Act 88 | srv := NewIdentityProvider(dataBase) 89 | res, err := srv.GetOrganizations(fakeUUID, date) 90 | 91 | // Assert 92 | assert.NoError(t, err) 93 | assert.Equal(t, 2, len(res)) 94 | assert.Equal(t, "LF", res[0]) 95 | assert.Equal(t, "LFX", res[1]) 96 | } 97 | -------------------------------------------------------------------------------- /affiliation/mocks/db_connector.go: -------------------------------------------------------------------------------- 1 | // Code generated by mockery v2.3.0. DO NOT EDIT. 2 | 3 | package mocks 4 | 5 | import ( 6 | sqlx "github.com/jmoiron/sqlx" 7 | mock "github.com/stretchr/testify/mock" 8 | ) 9 | 10 | // DBConnector is an autogenerated mock type for the DBConnector type 11 | type DBConnector struct { 12 | mock.Mock 13 | } 14 | 15 | // Get provides a mock function with given fields: dest, query, args 16 | func (_m *DBConnector) Get(dest interface{}, query string, args ...interface{}) error { 17 | var _ca []interface{} 18 | _ca = append(_ca, dest, query) 19 | _ca = append(_ca, args...) 20 | ret := _m.Called(_ca...) 21 | 22 | var r0 error 23 | if rf, ok := ret.Get(0).(func(interface{}, string, ...interface{}) error); ok { 24 | r0 = rf(dest, query, args...) 25 | } else { 26 | r0 = ret.Error(0) 27 | } 28 | 29 | return r0 30 | } 31 | 32 | // MustBegin provides a mock function with given fields: 33 | func (_m *DBConnector) MustBegin() *sqlx.Tx { 34 | ret := _m.Called() 35 | 36 | var r0 *sqlx.Tx 37 | if rf, ok := ret.Get(0).(func() *sqlx.Tx); ok { 38 | r0 = rf() 39 | } else { 40 | if ret.Get(0) != nil { 41 | r0 = ret.Get(0).(*sqlx.Tx) 42 | } 43 | } 44 | 45 | return r0 46 | } 47 | 48 | // Select provides a mock function with given fields: dest, query, args 49 | func (_m *DBConnector) Select(dest interface{}, query string, args ...interface{}) error { 50 | var _ca []interface{} 51 | _ca = append(_ca, dest, query) 52 | _ca = append(_ca, args...) 53 | ret := _m.Called(_ca...) 54 | 55 | var r0 error 56 | if rf, ok := ret.Get(0).(func(interface{}, string, ...interface{}) error); ok { 57 | r0 = rf(dest, query, args...) 58 | } else { 59 | r0 = ret.Error(0) 60 | } 61 | 62 | return r0 63 | } 64 | -------------------------------------------------------------------------------- /bugzilla/README.md: -------------------------------------------------------------------------------- 1 | Bugzilla Datasource 2 | ========= 3 | 4 | Bugzilla datasource is a package to fetch data 5 | from dockerhub API and save it into Elasticsearch 6 | and Enrich saved data. 7 | 8 | 9 | ### Bugzilla Running instructions 10 | 11 | To run Bugzilla datasource from dads you 12 | must set proper environment variables to 13 | select Bugzilla as an engine and other 14 | parameters that determine the intended behavior. 15 | 16 | These are the needed environment variables to run Bugzilla DA-DS: 17 | - DA_BUGZILLA_ENDPOINT={} 18 | - bugzilla origin url 19 | - DA_BUGZILLA_AFFILIATION_CONN_STRING={} 20 | - Affiliation database connection string 21 | - DA_BUGZILLA_FETCHER_BACKEND_VERSION={} 22 | - Fetcher version 23 | - DA_BUGZILLA_ENRICHER_BACKEND_VERSION={} 24 | - Enricher version 25 | - DA_BUGZILLA_FETCH={1,0} 26 | - To decide whether will fetch raw data or not. 27 | - DA_BUGZILLA_ENRICH={1,0} 28 | - To decide whether will do enrich raw data or not. 29 | - DA_BUGZILLA_ES_URL='' 30 | - Elastic search url. 31 | - DA_BUGZILLA_ES_USERNAME='' 32 | - Elastic search credentials 33 | - DA_BUGZILLA_ES_PASSWORD='' 34 | - Elastic search credentials 35 | - DA_BUGZILLA_ES_INDEX='' 36 | - Elastic search index name . 37 | - DA_BUGZILLA_FROM_DATE='' 38 | - Optional, date to start syncing from. 39 | - DA_BUGZILLA_PROJECT='' 40 | - Slug name of a project e.g. yocto. 41 | - DA_BUGZILLA_FETCH_SIZE=25 42 | - total number of fetched items per request. 43 | - DA_BUGZILLA_ENRICH_SIZE=25 44 | - total number of enriched items per request 45 | -------------------------------------------------------------------------------- /bugzilla/const.go: -------------------------------------------------------------------------------- 1 | package bugzilla 2 | 3 | const ( 4 | // Category type 5 | Category = "bug" 6 | // Bugzilla - DS name 7 | Bugzilla string = "bugzilla" 8 | ) 9 | -------------------------------------------------------------------------------- /bugzilla/html_parser.go: -------------------------------------------------------------------------------- 1 | package bugzilla 2 | 3 | import ( 4 | "bytes" 5 | "strings" 6 | 7 | "github.com/PuerkitoBio/goquery" 8 | ) 9 | 10 | // GetActivityLen gets count of searched items 11 | func GetActivityLen(query string, body []byte) (int, []Activity, error) { 12 | r := bytes.NewReader(body) 13 | doc, err := goquery.NewDocumentFromReader(r) 14 | act := make([]Activity, 0) 15 | if err != nil { 16 | return 0, act, err 17 | } 18 | activityCount := 0 19 | doc.Find(query).Each(func(i int, selection *goquery.Selection) { 20 | if len(selection.Find("td").Nodes) == 5 { 21 | var ac Activity 22 | selection.Find("td").Each(func(x int, selection *goquery.Selection) { 23 | val := strings.TrimPrefix(strings.TrimSuffix(strings.TrimSpace(selection.Text()), "\""), "\"") 24 | switch x { 25 | case 0: 26 | ac.Who = val 27 | break 28 | case 1: 29 | ac.When = val 30 | break 31 | case 2: 32 | ac.What = val 33 | break 34 | case 3: 35 | ac.Removed = val 36 | break 37 | case 4: 38 | ac.Added = val 39 | break 40 | } 41 | }) 42 | act = append(act, ac) 43 | activityCount++ 44 | } 45 | }) 46 | 47 | return activityCount, act, nil 48 | } 49 | -------------------------------------------------------------------------------- /bugzilla/html_parser_test.go: -------------------------------------------------------------------------------- 1 | package bugzilla 2 | 3 | import ( 4 | "io/ioutil" 5 | "testing" 6 | 7 | "github.com/stretchr/testify/assert" 8 | ) 9 | 10 | func TestGetActivityLen(t *testing.T) { 11 | // Arrange 12 | body, err := ioutil.ReadFile("./mocks/activity.html") 13 | 14 | // Act 15 | count, _, err := GetActivityLen("#bugzilla-body tr", body) 16 | 17 | // Assert 18 | assert.NoError(t, err) 19 | assert.Equal(t, 6, count) 20 | } 21 | -------------------------------------------------------------------------------- /bugzilla/mapping.go: -------------------------------------------------------------------------------- 1 | package bugzilla 2 | 3 | var ( 4 | 5 | // BugzillaRawMapping - bugzilla raw index mapping 6 | BugzillaRawMapping = []byte(`{"mappings": 7 | {"dynamic":true, 8 | "properties":{ 9 | "metadata__updated_on":{"type":"date"}, 10 | "metadata__timestamp":{"type":"date"}, 11 | "creation_ts":{"type":"date"}, 12 | "changed_at":{"type":"date"}, 13 | "delta_ts":{"type":"date"}, 14 | "short_description":{"type":"text","index":true}, 15 | "backend_version":{"type":"keyword"}, 16 | "backend_name":{"type":"keyword"}, 17 | "bug_status":{"type":"keyword"}, 18 | "priority":{"type":"keyword"}, 19 | "severity":{"type":"keyword"} 20 | }}}`) 21 | 22 | // BugzillaEnrichMapping - bugzilla enriched index mapping 23 | BugzillaEnrichMapping = []byte(`{"mappings":{"dynamic_templates":[{"notanalyzed":{"match":"*","match_mapping_type":"string","mapping":{"type":"keyword"}}},{"int_to_float":{"match":"*","match_mapping_type":"long","mapping":{"type":"float"}}},{"formatdate":{"match":"*","match_mapping_type":"date","mapping":{"format":"strict_date_optional_time||epoch_millis","type":"date"}}}]}}`) 24 | ) 25 | -------------------------------------------------------------------------------- /bugzilla/mocks/affiliation_client.go: -------------------------------------------------------------------------------- 1 | // Code generated by mockery v2.3.0. DO NOT EDIT. 2 | 3 | package mocks 4 | 5 | import ( 6 | affiliation "github.com/LF-Engineering/dev-analytics-libraries/affiliation" 7 | 8 | mock "github.com/stretchr/testify/mock" 9 | ) 10 | 11 | // AffiliationClient is an autogenerated mock type for the AffiliationClient type 12 | type AffiliationClient struct { 13 | mock.Mock 14 | } 15 | 16 | // AddIdentity provides a mock function with given fields: identity 17 | func (_m *AffiliationClient) AddIdentity(identity *affiliation.Identity) bool { 18 | ret := _m.Called(identity) 19 | 20 | var r0 bool 21 | if rf, ok := ret.Get(0).(func(*affiliation.Identity) bool); ok { 22 | r0 = rf(identity) 23 | } else { 24 | r0 = ret.Get(0).(bool) 25 | } 26 | 27 | return r0 28 | } 29 | 30 | // GetIdentityByUser provides a mock function with given fields: key, value 31 | func (_m *AffiliationClient) GetIdentityByUser(key string, value string) (*affiliation.AffIdentity, error) { 32 | ret := _m.Called(key, value) 33 | 34 | var r0 *affiliation.AffIdentity 35 | if rf, ok := ret.Get(0).(func(string, string) *affiliation.AffIdentity); ok { 36 | r0 = rf(key, value) 37 | } else { 38 | if ret.Get(0) != nil { 39 | r0 = ret.Get(0).(*affiliation.AffIdentity) 40 | } 41 | } 42 | 43 | var r1 error 44 | if rf, ok := ret.Get(1).(func(string, string) error); ok { 45 | r1 = rf(key, value) 46 | } else { 47 | r1 = ret.Error(1) 48 | } 49 | 50 | return r0, r1 51 | } 52 | -------------------------------------------------------------------------------- /bugzilla/mocks/auth0_client.go: -------------------------------------------------------------------------------- 1 | // Code generated by mockery v2.3.0. DO NOT EDIT. 2 | 3 | package mocks 4 | 5 | import mock "github.com/stretchr/testify/mock" 6 | 7 | // Auth0Client is an autogenerated mock type for the Auth0Client type 8 | type Auth0Client struct { 9 | mock.Mock 10 | } 11 | 12 | // GetToken provides a mock function with given fields: 13 | func (_m *Auth0Client) GetToken() (string, error) { 14 | ret := _m.Called() 15 | 16 | var r0 string 17 | if rf, ok := ret.Get(0).(func() string); ok { 18 | r0 = rf() 19 | } else { 20 | r0 = ret.Get(0).(string) 21 | } 22 | 23 | var r1 error 24 | if rf, ok := ret.Get(1).(func() error); ok { 25 | r1 = rf() 26 | } else { 27 | r1 = ret.Error(1) 28 | } 29 | 30 | return r0, r1 31 | } 32 | -------------------------------------------------------------------------------- /bugzilla/mocks/es_client_provider.go: -------------------------------------------------------------------------------- 1 | // Code generated by mockery v2.3.0. DO NOT EDIT. 2 | 3 | package mocks 4 | 5 | import ( 6 | elastic "github.com/LF-Engineering/dev-analytics-libraries/elastic" 7 | mock "github.com/stretchr/testify/mock" 8 | 9 | time "time" 10 | ) 11 | 12 | // ESClientProvider is an autogenerated mock type for the ESClientProvider type 13 | type ESClientProvider struct { 14 | mock.Mock 15 | } 16 | 17 | // Add provides a mock function with given fields: index, documentID, body 18 | func (_m *ESClientProvider) Add(index string, documentID string, body []byte) ([]byte, error) { 19 | ret := _m.Called(index, documentID, body) 20 | 21 | var r0 []byte 22 | if rf, ok := ret.Get(0).(func(string, string, []byte) []byte); ok { 23 | r0 = rf(index, documentID, body) 24 | } else { 25 | if ret.Get(0) != nil { 26 | r0 = ret.Get(0).([]byte) 27 | } 28 | } 29 | 30 | var r1 error 31 | if rf, ok := ret.Get(1).(func(string, string, []byte) error); ok { 32 | r1 = rf(index, documentID, body) 33 | } else { 34 | r1 = ret.Error(1) 35 | } 36 | 37 | return r0, r1 38 | } 39 | 40 | // Bulk provides a mock function with given fields: body 41 | func (_m *ESClientProvider) Bulk(body []byte) ([]byte, error) { 42 | ret := _m.Called(body) 43 | 44 | var r0 []byte 45 | if rf, ok := ret.Get(0).(func([]byte) []byte); ok { 46 | r0 = rf(body) 47 | } else { 48 | if ret.Get(0) != nil { 49 | r0 = ret.Get(0).([]byte) 50 | } 51 | } 52 | 53 | var r1 error 54 | if rf, ok := ret.Get(1).(func([]byte) error); ok { 55 | r1 = rf(body) 56 | } else { 57 | r1 = ret.Error(1) 58 | } 59 | 60 | return r0, r1 61 | } 62 | 63 | // BulkInsert provides a mock function with given fields: data 64 | func (_m *ESClientProvider) BulkInsert(data []elastic.BulkData) ([]byte, error) { 65 | ret := _m.Called(data) 66 | 67 | var r0 []byte 68 | if rf, ok := ret.Get(0).(func([]elastic.BulkData) []byte); ok { 69 | r0 = rf(data) 70 | } else { 71 | if ret.Get(0) != nil { 72 | r0 = ret.Get(0).([]byte) 73 | } 74 | } 75 | 76 | var r1 error 77 | if rf, ok := ret.Get(1).(func([]elastic.BulkData) error); ok { 78 | r1 = rf(data) 79 | } else { 80 | r1 = ret.Error(1) 81 | } 82 | 83 | return r0, r1 84 | } 85 | 86 | // CreateIndex provides a mock function with given fields: index, body 87 | func (_m *ESClientProvider) CreateIndex(index string, body []byte) ([]byte, error) { 88 | ret := _m.Called(index, body) 89 | 90 | var r0 []byte 91 | if rf, ok := ret.Get(0).(func(string, []byte) []byte); ok { 92 | r0 = rf(index, body) 93 | } else { 94 | if ret.Get(0) != nil { 95 | r0 = ret.Get(0).([]byte) 96 | } 97 | } 98 | 99 | var r1 error 100 | if rf, ok := ret.Get(1).(func(string, []byte) error); ok { 101 | r1 = rf(index, body) 102 | } else { 103 | r1 = ret.Error(1) 104 | } 105 | 106 | return r0, r1 107 | } 108 | 109 | // DelayOfCreateIndex provides a mock function with given fields: ex, uin, du, index, data 110 | func (_m *ESClientProvider) DelayOfCreateIndex(ex func(string, []byte) ([]byte, error), uin uint, du time.Duration, index string, data []byte) error { 111 | ret := _m.Called(ex, uin, du, index, data) 112 | 113 | var r0 error 114 | if rf, ok := ret.Get(0).(func(func(string, []byte) ([]byte, error), uint, time.Duration, string, []byte) error); ok { 115 | r0 = rf(ex, uin, du, index, data) 116 | } else { 117 | r0 = ret.Error(0) 118 | } 119 | 120 | return r0 121 | } 122 | 123 | // Get provides a mock function with given fields: index, query, result 124 | func (_m *ESClientProvider) Get(index string, query map[string]interface{}, result interface{}) error { 125 | ret := _m.Called(index, query, result) 126 | 127 | var r0 error 128 | if rf, ok := ret.Get(0).(func(string, map[string]interface{}, interface{}) error); ok { 129 | r0 = rf(index, query, result) 130 | } else { 131 | r0 = ret.Error(0) 132 | } 133 | 134 | return r0 135 | } 136 | 137 | // GetStat provides a mock function with given fields: index, field, aggType, mustConditions, mustNotConditions 138 | func (_m *ESClientProvider) GetStat(index string, field string, aggType string, mustConditions []map[string]interface{}, mustNotConditions []map[string]interface{}) (time.Time, error) { 139 | ret := _m.Called(index, field, aggType, mustConditions, mustNotConditions) 140 | 141 | var r0 time.Time 142 | if rf, ok := ret.Get(0).(func(string, string, string, []map[string]interface{}, []map[string]interface{}) time.Time); ok { 143 | r0 = rf(index, field, aggType, mustConditions, mustNotConditions) 144 | } else { 145 | r0 = ret.Get(0).(time.Time) 146 | } 147 | 148 | var r1 error 149 | if rf, ok := ret.Get(1).(func(string, string, string, []map[string]interface{}, []map[string]interface{}) error); ok { 150 | r1 = rf(index, field, aggType, mustConditions, mustNotConditions) 151 | } else { 152 | r1 = ret.Error(1) 153 | } 154 | 155 | return r0, r1 156 | } 157 | -------------------------------------------------------------------------------- /bugzilla/mocks/http_client_provider.go: -------------------------------------------------------------------------------- 1 | // Code generated by mockery v2.3.0. DO NOT EDIT. 2 | 3 | package mocks 4 | 5 | import mock "github.com/stretchr/testify/mock" 6 | 7 | // HTTPClientProvider is an autogenerated mock type for the HTTPClientProvider type 8 | type HTTPClientProvider struct { 9 | mock.Mock 10 | } 11 | 12 | // Request provides a mock function with given fields: url, method, header, body, params 13 | func (_m *HTTPClientProvider) Request(url string, method string, header map[string]string, body []byte, params map[string]string) (int, []byte, error) { 14 | ret := _m.Called(url, method, header, body, params) 15 | 16 | var r0 int 17 | if rf, ok := ret.Get(0).(func(string, string, map[string]string, []byte, map[string]string) int); ok { 18 | r0 = rf(url, method, header, body, params) 19 | } else { 20 | r0 = ret.Get(0).(int) 21 | } 22 | 23 | var r1 []byte 24 | if rf, ok := ret.Get(1).(func(string, string, map[string]string, []byte, map[string]string) []byte); ok { 25 | r1 = rf(url, method, header, body, params) 26 | } else { 27 | if ret.Get(1) != nil { 28 | r1 = ret.Get(1).([]byte) 29 | } 30 | } 31 | 32 | var r2 error 33 | if rf, ok := ret.Get(2).(func(string, string, map[string]string, []byte, map[string]string) error); ok { 34 | r2 = rf(url, method, header, body, params) 35 | } else { 36 | r2 = ret.Error(2) 37 | } 38 | 39 | return r0, r1, r2 40 | } 41 | 42 | // RequestCSV provides a mock function with given fields: url 43 | func (_m *HTTPClientProvider) RequestCSV(url string) ([][]string, error) { 44 | ret := _m.Called(url) 45 | 46 | var r0 [][]string 47 | if rf, ok := ret.Get(0).(func(string) [][]string); ok { 48 | r0 = rf(url) 49 | } else { 50 | if ret.Get(0) != nil { 51 | r0 = ret.Get(0).([][]string) 52 | } 53 | } 54 | 55 | var r1 error 56 | if rf, ok := ret.Get(1).(func(string) error); ok { 57 | r1 = rf(url) 58 | } else { 59 | r1 = ret.Error(1) 60 | } 61 | 62 | return r0, r1 63 | } 64 | -------------------------------------------------------------------------------- /bugzilla/mocks/slack_provider.go: -------------------------------------------------------------------------------- 1 | // Code generated by mockery v2.3.0. DO NOT EDIT. 2 | 3 | package mocks 4 | 5 | import mock "github.com/stretchr/testify/mock" 6 | 7 | // SlackProvider is an autogenerated mock type for the SlackProvider type 8 | type SlackProvider struct { 9 | mock.Mock 10 | } 11 | 12 | // SendText provides a mock function with given fields: text 13 | func (_m *SlackProvider) SendText(text string) error { 14 | ret := _m.Called(text) 15 | 16 | var r0 error 17 | if rf, ok := ret.Get(0).(func(string) error); ok { 18 | r0 = rf(text) 19 | } else { 20 | r0 = ret.Error(0) 21 | } 22 | 23 | return r0 24 | } 25 | -------------------------------------------------------------------------------- /bugzillarest/const.go: -------------------------------------------------------------------------------- 1 | package bugzillarest 2 | 3 | const ( 4 | // Category type 5 | Category = "bug" 6 | // BugzillaRest - DS name 7 | BugzillaRest string = "bugzillarest" 8 | ) 9 | -------------------------------------------------------------------------------- /bugzillarest/mapping.go: -------------------------------------------------------------------------------- 1 | package bugzillarest 2 | 3 | var ( 4 | 5 | // BugzillaRestRawMapping - bugzilla raw index mapping 6 | BugzillaRestRawMapping = []byte(`{ 7 | "mappings": 8 | {"dynamic":true, 9 | "properties":{ 10 | "metadata__updated_on":{"type":"date"}, 11 | "metadata__timestamp":{"type":"date"}, 12 | "updated_on":{"type":"date"}, 13 | "timestamp":{"type":"date"}, 14 | "short_description":{"type":"text","index":true}, 15 | "backend_version":{"type":"keyword"}, 16 | "backend_name":{"type":"keyword"}, 17 | "status":{"type":"keyword"}, 18 | "priority":{"type":"keyword"}, 19 | "severity":{"type":"keyword"}, 20 | "uuid":{"type": "keyword"}, 21 | "origin":{"type":"keyword"}, 22 | "tag":{"type":"keyword"} 23 | }} 24 | }`) 25 | 26 | // BugzillaRestEnrichMapping - bugzilla rest enriched index mapping 27 | BugzillaRestEnrichMapping = []byte(`{"mappings":{"dynamic_templates":[{"notanalyzed":{"match":"*","match_mapping_type":"string","mapping":{"type":"keyword"}}},{"int_to_float":{"match":"*","match_mapping_type":"long","mapping":{"type":"float"}}},{"formatdate":{"match":"*","match_mapping_type":"date","mapping":{"format":"strict_date_optional_time||epoch_millis","type":"date"}}}]}}`) 28 | ) 29 | -------------------------------------------------------------------------------- /bugzillarest/mocks/affiliation_client.go: -------------------------------------------------------------------------------- 1 | // Code generated by mockery v0.0.0-dev. DO NOT EDIT. 2 | 3 | package mocks 4 | 5 | import ( 6 | affiliation "github.com/LF-Engineering/dev-analytics-libraries/affiliation" 7 | 8 | mock "github.com/stretchr/testify/mock" 9 | ) 10 | 11 | // AffiliationClient is an autogenerated mock type for the AffiliationClient type 12 | type AffiliationClient struct { 13 | mock.Mock 14 | } 15 | 16 | // AddIdentity provides a mock function with given fields: identity 17 | func (_m *AffiliationClient) AddIdentity(identity *affiliation.Identity) bool { 18 | ret := _m.Called(identity) 19 | 20 | var r0 bool 21 | if rf, ok := ret.Get(0).(func(*affiliation.Identity) bool); ok { 22 | r0 = rf(identity) 23 | } else { 24 | r0 = ret.Get(0).(bool) 25 | } 26 | 27 | return r0 28 | } 29 | 30 | // GetIdentityByUser provides a mock function with given fields: key, value 31 | func (_m *AffiliationClient) GetIdentityByUser(key string, value string) (*affiliation.AffIdentity, error) { 32 | ret := _m.Called(key, value) 33 | 34 | var r0 *affiliation.AffIdentity 35 | if rf, ok := ret.Get(0).(func(string, string) *affiliation.AffIdentity); ok { 36 | r0 = rf(key, value) 37 | } else { 38 | if ret.Get(0) != nil { 39 | r0 = ret.Get(0).(*affiliation.AffIdentity) 40 | } 41 | } 42 | 43 | var r1 error 44 | if rf, ok := ret.Get(1).(func(string, string) error); ok { 45 | r1 = rf(key, value) 46 | } else { 47 | r1 = ret.Error(1) 48 | } 49 | 50 | return r0, r1 51 | } 52 | -------------------------------------------------------------------------------- /bugzillarest/mocks/auth0_client_provider.go: -------------------------------------------------------------------------------- 1 | // Code generated by mockery v0.0.0-dev. DO NOT EDIT. 2 | 3 | package mocks 4 | 5 | import mock "github.com/stretchr/testify/mock" 6 | 7 | // Auth0ClientProvider is an autogenerated mock type for the Auth0ClientProvider type 8 | type Auth0ClientProvider struct { 9 | mock.Mock 10 | } 11 | 12 | // GetToken provides a mock function with given fields: 13 | func (_m *Auth0ClientProvider) GetToken() (string, error) { 14 | ret := _m.Called() 15 | 16 | var r0 string 17 | if rf, ok := ret.Get(0).(func() string); ok { 18 | r0 = rf() 19 | } else { 20 | r0 = ret.Get(0).(string) 21 | } 22 | 23 | var r1 error 24 | if rf, ok := ret.Get(1).(func() error); ok { 25 | r1 = rf() 26 | } else { 27 | r1 = ret.Error(1) 28 | } 29 | 30 | return r0, r1 31 | } 32 | -------------------------------------------------------------------------------- /bugzillarest/mocks/auth_client_provider.go: -------------------------------------------------------------------------------- 1 | // Code generated by mockery v0.0.0-dev. DO NOT EDIT. 2 | 3 | package mocks 4 | 5 | import mock "github.com/stretchr/testify/mock" 6 | 7 | // AuthClientProvider is an autogenerated mock type for the AuthClientProvider type 8 | type AuthClientProvider struct { 9 | mock.Mock 10 | } 11 | 12 | // GetToken provides a mock function with given fields: env 13 | func (_m *AuthClientProvider) GetToken(env string) (string, error) { 14 | ret := _m.Called(env) 15 | 16 | var r0 string 17 | if rf, ok := ret.Get(0).(func(string) string); ok { 18 | r0 = rf(env) 19 | } else { 20 | r0 = ret.Get(0).(string) 21 | } 22 | 23 | var r1 error 24 | if rf, ok := ret.Get(1).(func(string) error); ok { 25 | r1 = rf(env) 26 | } else { 27 | r1 = ret.Error(1) 28 | } 29 | 30 | return r0, r1 31 | } 32 | -------------------------------------------------------------------------------- /bugzillarest/mocks/es_client_provider.go: -------------------------------------------------------------------------------- 1 | // Code generated by mockery v0.0.0-dev. DO NOT EDIT. 2 | 3 | package mocks 4 | 5 | import ( 6 | elastic "github.com/LF-Engineering/dev-analytics-libraries/elastic" 7 | mock "github.com/stretchr/testify/mock" 8 | 9 | time "time" 10 | ) 11 | 12 | // ESClientProvider is an autogenerated mock type for the ESClientProvider type 13 | type ESClientProvider struct { 14 | mock.Mock 15 | } 16 | 17 | // Add provides a mock function with given fields: index, documentID, body 18 | func (_m *ESClientProvider) Add(index string, documentID string, body []byte) ([]byte, error) { 19 | ret := _m.Called(index, documentID, body) 20 | 21 | var r0 []byte 22 | if rf, ok := ret.Get(0).(func(string, string, []byte) []byte); ok { 23 | r0 = rf(index, documentID, body) 24 | } else { 25 | if ret.Get(0) != nil { 26 | r0 = ret.Get(0).([]byte) 27 | } 28 | } 29 | 30 | var r1 error 31 | if rf, ok := ret.Get(1).(func(string, string, []byte) error); ok { 32 | r1 = rf(index, documentID, body) 33 | } else { 34 | r1 = ret.Error(1) 35 | } 36 | 37 | return r0, r1 38 | } 39 | 40 | // Bulk provides a mock function with given fields: body 41 | func (_m *ESClientProvider) Bulk(body []byte) ([]byte, error) { 42 | ret := _m.Called(body) 43 | 44 | var r0 []byte 45 | if rf, ok := ret.Get(0).(func([]byte) []byte); ok { 46 | r0 = rf(body) 47 | } else { 48 | if ret.Get(0) != nil { 49 | r0 = ret.Get(0).([]byte) 50 | } 51 | } 52 | 53 | var r1 error 54 | if rf, ok := ret.Get(1).(func([]byte) error); ok { 55 | r1 = rf(body) 56 | } else { 57 | r1 = ret.Error(1) 58 | } 59 | 60 | return r0, r1 61 | } 62 | 63 | // BulkInsert provides a mock function with given fields: data 64 | func (_m *ESClientProvider) BulkInsert(data []elastic.BulkData) ([]byte, error) { 65 | ret := _m.Called(data) 66 | 67 | var r0 []byte 68 | if rf, ok := ret.Get(0).(func([]elastic.BulkData) []byte); ok { 69 | r0 = rf(data) 70 | } else { 71 | if ret.Get(0) != nil { 72 | r0 = ret.Get(0).([]byte) 73 | } 74 | } 75 | 76 | var r1 error 77 | if rf, ok := ret.Get(1).(func([]elastic.BulkData) error); ok { 78 | r1 = rf(data) 79 | } else { 80 | r1 = ret.Error(1) 81 | } 82 | 83 | return r0, r1 84 | } 85 | 86 | // CreateIndex provides a mock function with given fields: index, body 87 | func (_m *ESClientProvider) CreateIndex(index string, body []byte) ([]byte, error) { 88 | ret := _m.Called(index, body) 89 | 90 | var r0 []byte 91 | if rf, ok := ret.Get(0).(func(string, []byte) []byte); ok { 92 | r0 = rf(index, body) 93 | } else { 94 | if ret.Get(0) != nil { 95 | r0 = ret.Get(0).([]byte) 96 | } 97 | } 98 | 99 | var r1 error 100 | if rf, ok := ret.Get(1).(func(string, []byte) error); ok { 101 | r1 = rf(index, body) 102 | } else { 103 | r1 = ret.Error(1) 104 | } 105 | 106 | return r0, r1 107 | } 108 | 109 | // DelayOfCreateIndex provides a mock function with given fields: ex, uin, du, index, data 110 | func (_m *ESClientProvider) DelayOfCreateIndex(ex func(string, []byte) ([]byte, error), uin uint, du time.Duration, index string, data []byte) error { 111 | ret := _m.Called(ex, uin, du, index, data) 112 | 113 | var r0 error 114 | if rf, ok := ret.Get(0).(func(func(string, []byte) ([]byte, error), uint, time.Duration, string, []byte) error); ok { 115 | r0 = rf(ex, uin, du, index, data) 116 | } else { 117 | r0 = ret.Error(0) 118 | } 119 | 120 | return r0 121 | } 122 | 123 | // Get provides a mock function with given fields: index, query, result 124 | func (_m *ESClientProvider) Get(index string, query map[string]interface{}, result interface{}) error { 125 | ret := _m.Called(index, query, result) 126 | 127 | var r0 error 128 | if rf, ok := ret.Get(0).(func(string, map[string]interface{}, interface{}) error); ok { 129 | r0 = rf(index, query, result) 130 | } else { 131 | r0 = ret.Error(0) 132 | } 133 | 134 | return r0 135 | } 136 | 137 | // GetStat provides a mock function with given fields: index, field, aggType, mustConditions, mustNotConditions 138 | func (_m *ESClientProvider) GetStat(index string, field string, aggType string, mustConditions []map[string]interface{}, mustNotConditions []map[string]interface{}) (time.Time, error) { 139 | ret := _m.Called(index, field, aggType, mustConditions, mustNotConditions) 140 | 141 | var r0 time.Time 142 | if rf, ok := ret.Get(0).(func(string, string, string, []map[string]interface{}, []map[string]interface{}) time.Time); ok { 143 | r0 = rf(index, field, aggType, mustConditions, mustNotConditions) 144 | } else { 145 | r0 = ret.Get(0).(time.Time) 146 | } 147 | 148 | var r1 error 149 | if rf, ok := ret.Get(1).(func(string, string, string, []map[string]interface{}, []map[string]interface{}) error); ok { 150 | r1 = rf(index, field, aggType, mustConditions, mustNotConditions) 151 | } else { 152 | r1 = ret.Error(1) 153 | } 154 | 155 | return r0, r1 156 | } 157 | -------------------------------------------------------------------------------- /bugzillarest/mocks/http_client_provider.go: -------------------------------------------------------------------------------- 1 | // Code generated by mockery v0.0.0-dev. DO NOT EDIT. 2 | 3 | package mocks 4 | 5 | import mock "github.com/stretchr/testify/mock" 6 | 7 | // HTTPClientProvider is an autogenerated mock type for the HTTPClientProvider type 8 | type HTTPClientProvider struct { 9 | mock.Mock 10 | } 11 | 12 | // Request provides a mock function with given fields: url, method, header, body, params 13 | func (_m *HTTPClientProvider) Request(url string, method string, header map[string]string, body []byte, params map[string]string) (int, []byte, error) { 14 | ret := _m.Called(url, method, header, body, params) 15 | 16 | var r0 int 17 | if rf, ok := ret.Get(0).(func(string, string, map[string]string, []byte, map[string]string) int); ok { 18 | r0 = rf(url, method, header, body, params) 19 | } else { 20 | r0 = ret.Get(0).(int) 21 | } 22 | 23 | var r1 []byte 24 | if rf, ok := ret.Get(1).(func(string, string, map[string]string, []byte, map[string]string) []byte); ok { 25 | r1 = rf(url, method, header, body, params) 26 | } else { 27 | if ret.Get(1) != nil { 28 | r1 = ret.Get(1).([]byte) 29 | } 30 | } 31 | 32 | var r2 error 33 | if rf, ok := ret.Get(2).(func(string, string, map[string]string, []byte, map[string]string) error); ok { 34 | r2 = rf(url, method, header, body, params) 35 | } else { 36 | r2 = ret.Error(2) 37 | } 38 | 39 | return r0, r1, r2 40 | } 41 | -------------------------------------------------------------------------------- /build/options.go: -------------------------------------------------------------------------------- 1 | package build 2 | 3 | var ( 4 | // Version is component version 5 | Version = "latest" 6 | // GitCommit is commit hash associated with the version 7 | GitCommit = "HEAD" 8 | // AppName defines default application name 9 | AppName = "da-ds" 10 | ) 11 | -------------------------------------------------------------------------------- /compare_rocketchat.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # ES_URL=... 3 | # _ID=955e415bcfeffb7e1e431cb625d20fa7195a7d03 4 | # curl -s "${ES_URL}/dads-confluence/_search" | jq '.hits.hits[]._source.uuid' 5 | if [ -z "${ES_URL}" ] 6 | then 7 | echo "$0: you must set ES_URL" 8 | exit 1 9 | fi 10 | if [ -z "${_ID}" ] 11 | then 12 | echo "$0: you must set _ID" 13 | exit 2 14 | fi 15 | curl -s -H 'Content-Type: application/json' "${ES_URL}/dads-rocketchat-test/_search" -d "{\"query\":{\"term\":{\"_id\":\"${_ID}\"}}}" | jq '.' > dads.json 16 | curl -s -H 'Content-Type: application/json' "${ES_URL}/sds-hyperledger-cactus-rocketchat/_search" -d "{\"query\":{\"term\":{\"_id\":\"${_ID}\"}}}" | jq '.' > p2o.json 17 | cat p2o.json | sort -r | uniq > tmp && mv tmp p2o.txt 18 | cat dads.json | sort -r | uniq > tmp && mv tmp dads.txt 19 | echo "da-ds:" > report.txt 20 | echo '-------------------------------------------' >> report.txt 21 | cat dads.txt >> report.txt 22 | echo '-------------------------------------------' >> report.txt 23 | echo "p2o:" >> report.txt 24 | echo '-------------------------------------------' >> report.txt 25 | cat p2o.txt >> report.txt 26 | echo '-------------------------------------------' >> report.txt 27 | -------------------------------------------------------------------------------- /const.go: -------------------------------------------------------------------------------- 1 | package dads 2 | 3 | // CacheCleanupProb - probability of cache cleanup in % 4 | const CacheCleanupProb int = 2 5 | 6 | // OK - common constant string 7 | const OK string = "ok" 8 | 9 | // DADSOrigin - common constant string 10 | const DADSOrigin string = "dads" 11 | 12 | // Groupsio - common constant string 13 | const Groupsio string = "groupsio" 14 | 15 | // Jira - common constant string 16 | const Jira string = "jira" 17 | 18 | // Git - common constant string 19 | const Git string = "git" 20 | 21 | // GitHub - common constant string 22 | const GitHub string = "github" 23 | 24 | // Gerrit - common constant string 25 | const Gerrit string = "gerrit" 26 | 27 | // Confluence - common constant string 28 | const Confluence string = "confluence" 29 | 30 | // Rocketchat - common constant string 31 | const Rocketchat string = "rocketchat" 32 | 33 | // Stub - common constant string 34 | const Stub string = "stub" 35 | 36 | // Get - common constant string 37 | const Get string = "GET" 38 | 39 | // Post - common constant string 40 | const Post string = "POST" 41 | 42 | // Put - common constant string 43 | const Put string = "PUT" 44 | 45 | // Delete - common constant string 46 | const Delete string = "DELETE" 47 | 48 | // DefaultDateField - default date field 49 | const DefaultDateField string = "metadata__updated_on" 50 | 51 | // DefaultEnrichDateField - default date field 52 | const DefaultEnrichDateField string = "metadata__enriched_on" 53 | 54 | // DefaultOffsetField - default offset field 55 | const DefaultOffsetField string = "updated_on" 56 | 57 | // DefaultTimestampField - default timestamp field 58 | const DefaultTimestampField string = "metadata__timestamp" 59 | 60 | // DefaultOriginField - default origin field 61 | const DefaultOriginField string = "origin" 62 | 63 | // DefaultTagField - default tag field 64 | const DefaultTagField string = "tag" 65 | 66 | // DefaultIDField - default id field 67 | const DefaultIDField string = "id" 68 | 69 | // DefaultAuthorField - default author field 70 | const DefaultAuthorField string = "author" 71 | 72 | // Issue - common constant string 73 | const Issue string = "issue" 74 | 75 | // Comment - common constant string 76 | const Comment string = "comment" 77 | 78 | // TooManyScrolls - this appearch in error message when too many scrolls are created 79 | const TooManyScrolls = "Trying to create too many scroll contexts" 80 | 81 | // NoSearchContextFound - this appearch when processing takes so long, that the scroll expires 82 | const NoSearchContextFound = "No search context found for id" 83 | 84 | // Nil - common constant string 85 | const Nil = "none" 86 | 87 | // None - common constant string 88 | const None = "None" 89 | 90 | // Offset - common constant string 91 | const Offset = "offset" 92 | 93 | // UUID - common constant string 94 | const UUID = "uuid" 95 | 96 | // ID - common constant string 97 | const ID = "id" 98 | 99 | // Unknown - common constant string 100 | const Unknown = "Unknown" 101 | 102 | // Wait59m - common constant string 103 | const Wait59m = "59m" 104 | 105 | // Redacted - [redacted] 106 | const Redacted string = "[redacted]" 107 | 108 | // Message - common constant string 109 | const Message = "message" 110 | 111 | // ContentType - common constant string 112 | const ContentType = "Content-Type" 113 | 114 | // LowerContentType - common constant string 115 | const LowerContentType = "content-type" 116 | 117 | // Author - default author field 118 | const Author string = "author" 119 | 120 | // Recipient - default author field 121 | const Recipient string = "recipient" 122 | 123 | // From - common constant string 124 | const From = "from" 125 | 126 | // Commit - common constant string 127 | const Commit = "commit" 128 | 129 | // Review - common constant string 130 | const Review = "review" 131 | 132 | // Patchset - common constant string 133 | const Patchset = "patchset" 134 | 135 | // Changeset - common constant string 136 | const Changeset = "changeset" 137 | 138 | // Approval - common constant string 139 | const Approval = "approval" 140 | 141 | // HistoricalContent - common constant string 142 | const HistoricalContent = "historical content" 143 | 144 | // ProjectSlug - common constant string 145 | const ProjectSlug = "project_slug" 146 | 147 | // DadsWarning - common constant string 148 | const DadsWarning = "da-ds WARNING" 149 | 150 | // MissingName - common constant string 151 | const MissingName = "-MISSING-NAME" 152 | 153 | // RedactedEmail - common constant string 154 | const RedactedEmail = "-REDACTED-EMAIL" 155 | -------------------------------------------------------------------------------- /db/connector.go: -------------------------------------------------------------------------------- 1 | package db 2 | 3 | import ( 4 | _ "github.com/go-sql-driver/mysql" // blank import for mysql driver 5 | "github.com/jmoiron/sqlx" 6 | ) 7 | 8 | // NewConnector creates new db instance with given db 9 | func NewConnector(driverName string, connString string) (*sqlx.DB, error) { 10 | return sqlx.Connect(driverName, connString) 11 | } 12 | -------------------------------------------------------------------------------- /detect-removed-commits.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #for f in `find .git/objects/??/ -type f | sed 's/\(.*\)\/\([[:xdigit:]]\{2\}\)\/\([[:xdigit:]]\+\)$/\2\3/g'` 3 | declare -A glog 4 | declare -A gfile 5 | set -o pipefail 6 | set -e 7 | if [ -z "`git rev-list -n 1 --all`" ] 8 | then 9 | exit 0 10 | fi 11 | allcommits=`git cat-file --unordered --batch-all-objects --buffer --batch-check | grep ' commit ' | awk '{print $1}'` 12 | for f in $allcommits 13 | do 14 | gfile[$f]=1 15 | done 16 | commits=`git rev-list --all --remotes` 17 | for f in $commits 18 | do 19 | glog[$f]=1 20 | done 21 | missing='' 22 | for f in "${!gfile[@]}" 23 | do 24 | got=${glog[$f]} 25 | if [ ! "$got" = "1" ] 26 | then 27 | if [ -z "${missing}" ] 28 | then 29 | missing="$f" 30 | else 31 | missing="${missing} ${f}" 32 | fi 33 | fi 34 | done 35 | if [ ! -z "${missing}" ] 36 | then 37 | echo -n "${missing}" 38 | fi 39 | -------------------------------------------------------------------------------- /dockerhub/README.md: -------------------------------------------------------------------------------- 1 | Dockerhub Datasource 2 | ========= 3 | 4 | Dockerhub datasource is a package to fetch data 5 | from dockerhub API and save it into Elasticsearch 6 | and Enrich saved data. 7 | 8 | 9 | ### Docker Running instructions 10 | 11 | To run dockerhub datasource from dads you 12 | must set proper environment variables to 13 | select dockerhub as an engine and other 14 | parameters that determine the intended behavior. 15 | 16 | These are the needed environment variables to run dockerhub: 17 | - DA_DOCKERHUB_ENRICH={1,0} 18 | - To decide whether will do enrichment step or not. 19 | - DA_DOCKERHUB_ES_URL=http://{ES_USERNAME}:{ES_PASSWORD}@{URL}:{PORT} 20 | - Elasticsearch url included username, password, host and port 21 | - DA_DOCKERHUB_NO_INCREMENTAL={1,0} 22 | - Starts from the beginning if 1 is selected and will not use date to continue enriching 23 | - DA_DOCKERHUB_USERNAME='' 24 | - Optional, for dockerhub repository credentials 25 | - DA_DOCKERHUB_PASSWORD='' 26 | - Optional, for dockerhub repository credentials 27 | - DA_DOCKERHUB_PROJECT_SLUG='{SLUG}' 28 | - Slug name e.g. yocto 29 | - DA_DOCKERHUB_REPOSITORIES_JSON='[{"Owner":'{OWNER}',"Repository":"{REPOSITORY}","Project":"{PROJECT}","ESIndex":"{INDEX_NAME}"}]' 30 | - JSON e.g. '[{"Owner":"crops","Repository":"yocto-eol","Project":"yocto","ESIndex":"sds-yocto-dockerhub"}]' 31 | - DA_DS='{DATASOURCE}' 32 | - Datasource name should be 'dockerhub' 33 | - DA_DOCKERHUB_HTTP_TIMEOUT=60s 34 | - HTTP timeout duration. 35 | 36 | Example of running dads at 37 | `./scripts/dockerhub.sh` 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /dockerhub/const.go: -------------------------------------------------------------------------------- 1 | package dockerhub 2 | 3 | const ( 4 | // APIURL dockerhub base url 5 | APIURL = "https://hub.docker.com" 6 | // APIVersion dockerhub API version 7 | APIVersion = "v2" 8 | // APILogin url 9 | APILogin = "users/login" 10 | // APIRepositories dockerhub repositories API 11 | APIRepositories = "repositories" 12 | // Category ... 13 | Category = "dockerhub-data" 14 | // Dockerhub - DS name 15 | Dockerhub string = "dockerhub" 16 | ) 17 | -------------------------------------------------------------------------------- /dockerhub/dto.go: -------------------------------------------------------------------------------- 1 | package dockerhub 2 | 3 | import "time" 4 | 5 | // RepositoryResponse data model represents dockerhub get repository results 6 | type RepositoryResponse struct { 7 | User string `json:"user"` 8 | Name string `json:"name"` 9 | Namespace string `json:"namespace"` 10 | RepositoryType string `json:"repository_type"` 11 | Status *int `json:"status"` 12 | Description string `json:"description"` 13 | IsPrivate *bool `json:"is_private"` 14 | IsAutomated bool `json:"is_automated"` 15 | CanEdit bool `json:"can_edit"` 16 | StarCount *int `json:"star_count"` 17 | PullCount *int `json:"pull_count"` 18 | LastUpdated time.Time `json:"last_updated"` 19 | IsMigrated bool `json:"is_migrated"` 20 | HasStarred bool `json:"has_starred"` 21 | FullDescription string `json:"full_description"` 22 | Affiliation string `json:"affiliation"` 23 | Permissions Permissions `json:"permissions"` 24 | FetchedOn float64 `json:"fetched_on"` 25 | } 26 | 27 | // Permissions response 28 | type Permissions struct { 29 | Read bool `json:"read"` 30 | Write bool `json:"write"` 31 | Admin bool `json:"admin"` 32 | } 33 | 34 | // RepositorySearchFields ... 35 | type RepositorySearchFields struct { 36 | Name string `json:"name"` 37 | ItemID string `json:"item_id"` 38 | Namespace string `json:"namespace"` 39 | } 40 | 41 | // RepositoryRaw represents dockerhub repository raw model 42 | type RepositoryRaw struct { 43 | BackendVersion string `json:"backend_version"` 44 | Data *RepositoryResponse `json:"data"` 45 | Tag string `json:"tag"` 46 | UUID string `json:"uuid"` 47 | SearchFields *RepositorySearchFields `json:"search_fields"` 48 | Origin string `json:"origin"` 49 | UpdatedOn float64 `json:"updated_on"` 50 | MetadataUpdatedOn time.Time `json:"metadata__updated_on"` 51 | BackendName string `json:"backend_name"` 52 | MetadataTimestamp time.Time `json:"metadata__timestamp"` 53 | Timestamp float64 `json:"timestamp"` 54 | Category string `json:"category"` 55 | ClassifiedFieldsFiltered *string `json:"classified_fields_filtered"` 56 | } 57 | 58 | // RepositoryEnrich represents dockerhub repository enriched model 59 | type RepositoryEnrich struct { 60 | ID string `json:"id"` 61 | Project string `json:"project"` 62 | Affiliation string `json:"affiliation"` 63 | Description string `json:"description"` 64 | IsPrivate bool `json:"is_private"` 65 | IsAutomated bool `json:"is_automated"` 66 | PullCount int `json:"pull_count"` 67 | RepositoryType string `json:"repository_type"` 68 | User string `json:"user"` 69 | Status int `json:"status"` 70 | StarCount int `json:"star_count"` 71 | 72 | IsEvent int `json:"is_event"` 73 | IsDockerImage int `json:"is_docker_image"` 74 | DescriptionAnalyzed string `json:"description_analyzed"` 75 | FullDescriptionAnalyzed string `json:"full_description_analyzed"` 76 | 77 | CreationDate time.Time `json:"creation_date"` 78 | IsDockerhubDockerhub int `json:"is_dockerhub_dockerhub"` 79 | RepositoryLabels *[]string `json:"repository_labels"` 80 | MetadataFilterRaw *string `json:"metadata__filter_raw"` 81 | 82 | LastUpdated time.Time `json:"last_updated"` 83 | Offset *string `json:"offset"` 84 | MetadataEnrichedOn time.Time `json:"metadata__enriched_on"` 85 | 86 | BackendVersion string `json:"backend_version"` 87 | Tag string `json:"tag"` 88 | UUID string `json:"uuid"` 89 | Origin string `json:"origin"` 90 | MetadataUpdatedOn time.Time `json:"metadata__updated_on"` 91 | MetadataBackendName string `json:"metadata__backend_name"` 92 | MetadataTimestamp time.Time `json:"metadata__timestamp"` 93 | BuildOnCloud *string `json:"build_on_cloud"` 94 | ProjectTS int64 `json:"project_ts"` 95 | } 96 | 97 | // LoginResponse from login dockerhub web API 98 | type LoginResponse struct { 99 | Token string `json:"token"` 100 | } 101 | -------------------------------------------------------------------------------- /dockerhub/enricher.go: -------------------------------------------------------------------------------- 1 | package dockerhub 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "strings" 7 | "time" 8 | 9 | dads "github.com/LF-Engineering/da-ds" 10 | ) 11 | 12 | // Enricher contains dockerhub datasource enrich logic 13 | type Enricher struct { 14 | DSName string // Datasource will be used as key for ES 15 | ElasticSearchProvider ESClientProvider 16 | BackendVersion string 17 | } 18 | 19 | // TopHits result 20 | type TopHits struct { 21 | Took int `json:"took"` 22 | Hits Hits `json:"hits"` 23 | Aggregations Aggregations `json:"aggregations"` 24 | } 25 | 26 | // Hits result 27 | type Hits struct { 28 | Total Total `json:"total"` 29 | MaxScore float32 `json:"max_score"` 30 | Hits []NestedHits `json:"hits"` 31 | } 32 | 33 | // Total result 34 | type Total struct { 35 | Value int `json:"value"` 36 | Relation string `json:"relation"` 37 | } 38 | 39 | // NestedHits result 40 | type NestedHits struct { 41 | Index string `json:"_index"` 42 | Type string `json:"_type"` 43 | ID string `json:"_id"` 44 | Score float64 `json:"_score"` 45 | Source *RepositoryRaw `json:"_source"` 46 | } 47 | 48 | // Aggregations result 49 | type Aggregations struct { 50 | LastDate LastDate `json:"last_date"` 51 | } 52 | 53 | // LastDate result 54 | type LastDate struct { 55 | Value float64 `json:"value"` 56 | ValueAsString string `json:"value_as_string"` 57 | } 58 | 59 | // NewEnricher initiates a new Enricher 60 | func NewEnricher(backendVersion string, esClientProvider ESClientProvider) *Enricher { 61 | return &Enricher{ 62 | DSName: Dockerhub, 63 | ElasticSearchProvider: esClientProvider, 64 | BackendVersion: backendVersion, 65 | } 66 | } 67 | 68 | // EnrichItem enriches raw item 69 | func (e *Enricher) EnrichItem(rawItem RepositoryRaw, project string, now time.Time) (*RepositoryEnrich, error) { 70 | 71 | enriched := RepositoryEnrich{} 72 | 73 | enriched.ID = fmt.Sprintf("%s-%s", rawItem.Data.Name, rawItem.Data.Namespace) 74 | enriched.IsEvent = 0 75 | enriched.IsDockerImage = 1 76 | enriched.IsDockerhubDockerhub = 1 77 | enriched.Description = rawItem.Data.Description 78 | enriched.DescriptionAnalyzed = rawItem.Data.Description 79 | 80 | // todo: in python description is used ?? 81 | if rawItem.Data.FullDescription == "" { 82 | enriched.FullDescriptionAnalyzed = rawItem.Data.Description 83 | } else { 84 | enriched.FullDescriptionAnalyzed = rawItem.Data.FullDescription 85 | } 86 | 87 | enriched.Affiliation = rawItem.Data.Affiliation 88 | enriched.IsAutomated = rawItem.Data.IsAutomated 89 | enriched.RepositoryType = rawItem.Data.RepositoryType 90 | enriched.User = rawItem.Data.User 91 | 92 | if rawItem.Data.IsPrivate == nil { 93 | enriched.IsPrivate = false 94 | } else { 95 | enriched.IsPrivate = *rawItem.Data.IsPrivate 96 | } 97 | 98 | if rawItem.Data.PullCount == nil { 99 | enriched.PullCount = 0 100 | } else { 101 | enriched.PullCount = *rawItem.Data.PullCount 102 | } 103 | 104 | if rawItem.Data.Status == nil { 105 | enriched.Status = 0 106 | } else { 107 | enriched.Status = *rawItem.Data.Status 108 | } 109 | 110 | if rawItem.Data.StarCount == nil { 111 | enriched.StarCount = 0 112 | } else { 113 | enriched.StarCount = *rawItem.Data.StarCount 114 | } 115 | 116 | enriched.LastUpdated = rawItem.Data.LastUpdated 117 | enriched.Project = project 118 | 119 | enriched.MetadataBackendName = fmt.Sprintf("%sEnrich", strings.Title(e.DSName)) 120 | enriched.BackendVersion = e.BackendVersion 121 | 122 | enriched.MetadataTimestamp = rawItem.MetadataTimestamp 123 | if rawItem.MetadataTimestamp.IsZero() { 124 | enriched.MetadataTimestamp = rawItem.MetadataUpdatedOn.UTC() 125 | } 126 | 127 | enriched.MetadataUpdatedOn = rawItem.Data.LastUpdated 128 | enriched.MetadataEnrichedOn = rawItem.MetadataUpdatedOn.UTC() 129 | enriched.CreationDate = rawItem.Data.LastUpdated 130 | 131 | // todo: the 3 following fields filling is vague 132 | enriched.RepositoryLabels = nil 133 | enriched.MetadataFilterRaw = nil 134 | enriched.Offset = nil 135 | 136 | enriched.Origin = rawItem.Origin 137 | enriched.Tag = rawItem.Origin 138 | enriched.UUID = rawItem.UUID 139 | 140 | return &enriched, nil 141 | } 142 | 143 | // HandleMapping creates rich mapping 144 | func (e *Enricher) HandleMapping(index string) error { 145 | _, err := e.ElasticSearchProvider.CreateIndex(index, DockerhubRichMapping) 146 | return err 147 | } 148 | 149 | // GetFetchedDataItem gets fetched data items starting from lastDate 150 | func (e *Enricher) GetFetchedDataItem(repo *Repository, cmdLastDate *time.Time, lastDate *time.Time, noIncremental bool) (result *TopHits, err error) { 151 | rawIndex := fmt.Sprintf("%s-raw", repo.ESIndex) 152 | 153 | var lastEnrichDate *time.Time 154 | 155 | if noIncremental == false { 156 | if cmdLastDate != nil && !cmdLastDate.IsZero() { 157 | lastEnrichDate = cmdLastDate 158 | } else if lastDate != nil { 159 | lastEnrichDate = lastDate 160 | 161 | enrichLastDate, err := e.ElasticSearchProvider.GetStat(repo.ESIndex, "metadata__enriched_on", "max", nil, nil) 162 | if err != nil { 163 | log.Printf("Warning: %v", err) 164 | } else { 165 | if lastDate.After(enrichLastDate) { 166 | lastEnrichDate = &enrichLastDate 167 | } 168 | } 169 | } 170 | } 171 | 172 | url := fmt.Sprintf("%s/%s/%s", APIURL, repo.Owner, repo.Repository) 173 | 174 | hits := &TopHits{} 175 | 176 | query := map[string]interface{}{ 177 | "size": 10000, 178 | "query": map[string]interface{}{ 179 | "bool": map[string]interface{}{ 180 | "must": []map[string]interface{}{}, 181 | }, 182 | }, 183 | "collapse": map[string]string{ 184 | "field": "origin.keyword", 185 | }, 186 | "sort": []map[string]interface{}{ 187 | { 188 | "metadata__updated_on": map[string]string{ 189 | "order": "desc", 190 | }, 191 | }, 192 | }, 193 | } 194 | 195 | conditions := []map[string]interface{}{ 196 | { 197 | "term": map[string]interface{}{ 198 | "origin.keyword": url, 199 | }, 200 | }, 201 | } 202 | 203 | if lastEnrichDate != nil { 204 | conditions = append(conditions, 205 | map[string]interface{}{ 206 | "range": map[string]interface{}{ 207 | "metadata__updated_on": map[string]interface{}{ 208 | "gte": (*lastEnrichDate).Format(time.RFC3339), 209 | }, 210 | }, 211 | }, 212 | ) 213 | } 214 | 215 | query["query"].(map[string]interface{})["bool"].(map[string]interface{})["must"] = conditions 216 | 217 | err = e.ElasticSearchProvider.Get(rawIndex, query, hits) 218 | if err != nil { 219 | dads.Printf("[dads-dockerhub] GetFetchedDataItem get elastic data error : %+v\n", err) 220 | return nil, err 221 | } 222 | 223 | return hits, nil 224 | } 225 | -------------------------------------------------------------------------------- /dockerhub/fetcher.go: -------------------------------------------------------------------------------- 1 | package dockerhub 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | 7 | "github.com/LF-Engineering/dev-analytics-libraries/elastic" 8 | timeLib "github.com/LF-Engineering/dev-analytics-libraries/time" 9 | 10 | "net/http" 11 | "strings" 12 | "time" 13 | 14 | dads "github.com/LF-Engineering/da-ds" 15 | "github.com/LF-Engineering/dev-analytics-libraries/uuid" 16 | jsoniter "github.com/json-iterator/go" 17 | ) 18 | 19 | // Fetcher contains dockerhub datasource fetch logic 20 | type Fetcher struct { 21 | DSName string // Datasource will be used as key for ES 22 | IncludeArchived bool 23 | MultiOrigin bool // can we store multiple endpoints in a single index? 24 | HTTPClientProvider HTTPClientProvider 25 | ElasticSearchProvider ESClientProvider 26 | Username string 27 | Password string 28 | Token string 29 | BackendVersion string 30 | } 31 | 32 | // Params required parameters for dockerhub fetcher 33 | type Params struct { 34 | Username string 35 | Password string 36 | BackendVersion string 37 | } 38 | 39 | // HTTPClientProvider used in connecting to remote http server 40 | type HTTPClientProvider interface { 41 | Request(url string, method string, header map[string]string, body []byte, params map[string]string) (statusCode int, resBody []byte, err error) 42 | } 43 | 44 | // ESClientProvider used in connecting to ES Client server 45 | type ESClientProvider interface { 46 | Add(index string, documentID string, body []byte) ([]byte, error) 47 | CreateIndex(index string, body []byte) ([]byte, error) 48 | Bulk(body []byte) ([]byte, error) 49 | Get(index string, query map[string]interface{}, result interface{}) (err error) 50 | GetStat(index string, field string, aggType string, mustConditions []map[string]interface{}, mustNotConditions []map[string]interface{}) (result time.Time, err error) 51 | BulkInsert(data []elastic.BulkData) ([]byte, error) 52 | DelayOfCreateIndex(ex func(str string, b []byte) ([]byte, error), uin uint, du time.Duration, index string, data []byte) error 53 | BulkUpdate(data []elastic.BulkData) ([]byte, error) 54 | } 55 | 56 | // NewFetcher initiates a new dockerhub fetcher 57 | func NewFetcher(params *Params, httpClientProvider HTTPClientProvider, esClientProvider ESClientProvider) *Fetcher { 58 | return &Fetcher{ 59 | DSName: Dockerhub, 60 | HTTPClientProvider: httpClientProvider, 61 | ElasticSearchProvider: esClientProvider, 62 | Username: params.Username, 63 | Password: params.Password, 64 | BackendVersion: params.BackendVersion, 65 | } 66 | } 67 | 68 | // Login dockerhub in order to obtain access token for fetching private repositories 69 | func (f *Fetcher) Login(username string, password string) (string, error) { 70 | url := fmt.Sprintf("%s/%s/%s/%s", APIURL, APIVersion, APIRepositories, APILogin) 71 | 72 | payload := make(map[string]interface{}) 73 | payload["username"] = username 74 | payload["password"] = password 75 | 76 | p, err := jsoniter.Marshal(payload) 77 | if err != nil { 78 | return "", err 79 | } 80 | 81 | dads.Printf("dockerhub login via: %s\n", url) 82 | 83 | statusCode, resBody, err := f.HTTPClientProvider.Request(url, "Post", nil, p, nil) 84 | 85 | if statusCode == http.StatusOK { 86 | res := LoginResponse{} 87 | err = jsoniter.Unmarshal(resBody, &res) 88 | if err != nil { 89 | return "", fmt.Errorf("cannot unmarshal result from %s", string(resBody)) 90 | } 91 | 92 | // Set token into the object fetcher object 93 | f.Token = res.Token 94 | 95 | return res.Token, nil 96 | } 97 | 98 | return "", errors.New("invalid login credentials") 99 | } 100 | 101 | // FetchItem pulls image data 102 | func (f *Fetcher) FetchItem(owner string, repository string, now time.Time) (*RepositoryRaw, error) { 103 | org := owner 104 | if org == "_" { 105 | org = "library" 106 | } 107 | requestURL := fmt.Sprintf("%s/%s/%s/%s/%s", APIURL, APIVersion, APIRepositories, org, repository) 108 | url := fmt.Sprintf("%s/%s/%s", APIURL, owner, repository) 109 | headers := map[string]string{} 110 | if f.Token != "" { 111 | headers["Authorization"] = fmt.Sprintf("JWT %s", f.Token) 112 | } 113 | 114 | statusCode, resBody, err := f.HTTPClientProvider.Request(requestURL, "GET", headers, nil, nil) 115 | if err != nil { 116 | dads.Printf("[dads-dockerhub] FetchItem get repository error : %+v\n", err, statusCode, resBody) 117 | return nil, err 118 | } 119 | 120 | if statusCode != http.StatusOK { 121 | dads.Printf("[dads-dockerhub] FetchItem get repository error, status code : %v, response body : %s\n", statusCode, resBody) 122 | return nil, fmt.Errorf("status code error %+v", statusCode) 123 | } 124 | 125 | repoRes := &RepositoryResponse{} 126 | if err := jsoniter.Unmarshal(resBody, &repoRes); err != nil { 127 | dads.Printf("[dads-dockerhub] FetchItem unmarshal error : %+v\n", err) 128 | return nil, errors.New("unable to resolve json request") 129 | } 130 | 131 | raw := &RepositoryRaw{} 132 | raw.Data = repoRes 133 | raw.BackendName = strings.Title(f.DSName) 134 | raw.BackendVersion = f.BackendVersion 135 | raw.Category = Category 136 | raw.ClassifiedFieldsFiltered = nil 137 | now = now.UTC() 138 | raw.Timestamp = timeLib.ConvertTimeToFloat(now) 139 | raw.Data.FetchedOn = raw.Timestamp 140 | raw.MetadataTimestamp = now 141 | raw.Origin = url 142 | raw.SearchFields = &RepositorySearchFields{repository, fmt.Sprintf("%f", raw.Timestamp), owner} 143 | raw.Tag = url 144 | raw.UpdatedOn = raw.Timestamp 145 | raw.MetadataUpdatedOn = now 146 | 147 | // generate UUID 148 | dStr := now.Format("02-01-2006") 149 | uid, err := uuid.Generate(raw.Origin, dStr) 150 | if err != nil { 151 | dads.Printf("[dads-dockerhub] FetchItem Generate uuid error : %+v\n", err) 152 | return nil, err 153 | } 154 | raw.UUID = uid 155 | 156 | return raw, nil 157 | } 158 | 159 | // GetLastDate gets fetching lastDate 160 | func (f *Fetcher) GetLastDate(repo *Repository, now time.Time) (time.Time, error) { 161 | lastDate, err := f.ElasticSearchProvider.GetStat(fmt.Sprintf("%s-raw", repo.ESIndex), "metadata__updated_on", "max", nil, nil) 162 | if err != nil { 163 | return now.UTC(), err 164 | } 165 | 166 | return lastDate, nil 167 | } 168 | -------------------------------------------------------------------------------- /dockerhub/mapping.go: -------------------------------------------------------------------------------- 1 | package dockerhub 2 | 3 | var ( 4 | // DockerhubSearchFields - extra search fields 5 | DockerhubSearchFields = map[string][]string{ 6 | "name": {"name"}, 7 | "namespace": {"namespace"}, 8 | } 9 | 10 | // DockerhubRawMapping - Dockerhub raw index mapping 11 | DockerhubRawMapping = []byte(`{"mappings": {"dynamic":true,"properties":{"metadata__updated_on":{"type":"date"},"data":{"properties":{"description":{"type":"text","index":true},"full_description":{"type":"text","index":true}}}}}}`) 12 | 13 | // DockerhubRichMapping - Dockerhub rich index mapping 14 | DockerhubRichMapping = []byte(`{"mappings": {"properties":{"metadata__updated_on":{"type":"date"},"description":{"type":"text","index":true},"description_analyzed":{"type":"text","index":true},"full_description_analyzed":{"type":"text","index":true},"origin":{"type":"keyword"},"repository_type":{"type":"keyword"},"tag":{"type":"keyword"},"id":{"type":"keyword"},"metadata__backend_name":{"type":"keyword"},"user":{"type":"keyword"},"uuid":{"type":"keyword"},"project":{"type":"keyword"},"meta_title":{"type":"keyword"},"meta_type":{"type":"keyword"},"meta_state":{"type":"keyword"},"meta_program":{"type":"keyword"},"status":{"type":"keyword"}}}}`) 15 | ) 16 | -------------------------------------------------------------------------------- /dockerhub/mocks/auth0_client.go: -------------------------------------------------------------------------------- 1 | // Code generated by mockery v2.3.0. DO NOT EDIT. 2 | 3 | package mocks 4 | 5 | import mock "github.com/stretchr/testify/mock" 6 | 7 | // Auth0Client is an autogenerated mock type for the Auth0Client type 8 | type Auth0Client struct { 9 | mock.Mock 10 | } 11 | 12 | // GetToken provides a mock function with given fields: 13 | func (_m *Auth0Client) GetToken() (string, error) { 14 | ret := _m.Called() 15 | 16 | var r0 string 17 | if rf, ok := ret.Get(0).(func() string); ok { 18 | r0 = rf() 19 | } else { 20 | r0 = ret.Get(0).(string) 21 | } 22 | 23 | var r1 error 24 | if rf, ok := ret.Get(1).(func() error); ok { 25 | r1 = rf() 26 | } else { 27 | r1 = ret.Error(1) 28 | } 29 | 30 | return r0, r1 31 | } 32 | -------------------------------------------------------------------------------- /dockerhub/mocks/es_client_provider.go: -------------------------------------------------------------------------------- 1 | // Code generated by mockery v2.3.0. DO NOT EDIT. 2 | 3 | package mocks 4 | 5 | import ( 6 | elastic "github.com/LF-Engineering/dev-analytics-libraries/elastic" 7 | mock "github.com/stretchr/testify/mock" 8 | 9 | time "time" 10 | ) 11 | 12 | // ESClientProvider is an autogenerated mock type for the ESClientProvider type 13 | type ESClientProvider struct { 14 | mock.Mock 15 | } 16 | 17 | // Add provides a mock function with given fields: index, documentID, body 18 | func (_m *ESClientProvider) Add(index string, documentID string, body []byte) ([]byte, error) { 19 | ret := _m.Called(index, documentID, body) 20 | 21 | var r0 []byte 22 | if rf, ok := ret.Get(0).(func(string, string, []byte) []byte); ok { 23 | r0 = rf(index, documentID, body) 24 | } else { 25 | if ret.Get(0) != nil { 26 | r0 = ret.Get(0).([]byte) 27 | } 28 | } 29 | 30 | var r1 error 31 | if rf, ok := ret.Get(1).(func(string, string, []byte) error); ok { 32 | r1 = rf(index, documentID, body) 33 | } else { 34 | r1 = ret.Error(1) 35 | } 36 | 37 | return r0, r1 38 | } 39 | 40 | // Bulk provides a mock function with given fields: body 41 | func (_m *ESClientProvider) Bulk(body []byte) ([]byte, error) { 42 | ret := _m.Called(body) 43 | 44 | var r0 []byte 45 | if rf, ok := ret.Get(0).(func([]byte) []byte); ok { 46 | r0 = rf(body) 47 | } else { 48 | if ret.Get(0) != nil { 49 | r0 = ret.Get(0).([]byte) 50 | } 51 | } 52 | 53 | var r1 error 54 | if rf, ok := ret.Get(1).(func([]byte) error); ok { 55 | r1 = rf(body) 56 | } else { 57 | r1 = ret.Error(1) 58 | } 59 | 60 | return r0, r1 61 | } 62 | 63 | // BulkInsert provides a mock function with given fields: data 64 | func (_m *ESClientProvider) BulkInsert(data []elastic.BulkData) ([]byte, error) { 65 | ret := _m.Called(data) 66 | 67 | var r0 []byte 68 | if rf, ok := ret.Get(0).(func([]elastic.BulkData) []byte); ok { 69 | r0 = rf(data) 70 | } else { 71 | if ret.Get(0) != nil { 72 | r0 = ret.Get(0).([]byte) 73 | } 74 | } 75 | 76 | var r1 error 77 | if rf, ok := ret.Get(1).(func([]elastic.BulkData) error); ok { 78 | r1 = rf(data) 79 | } else { 80 | r1 = ret.Error(1) 81 | } 82 | 83 | return r0, r1 84 | } 85 | 86 | // BulkUpdate provides a mock function with given fields: data 87 | func (_m *ESClientProvider) BulkUpdate(data []elastic.BulkData) ([]byte, error) { 88 | ret := _m.Called(data) 89 | 90 | var r0 []byte 91 | if rf, ok := ret.Get(0).(func([]elastic.BulkData) []byte); ok { 92 | r0 = rf(data) 93 | } else { 94 | if ret.Get(0) != nil { 95 | r0 = ret.Get(0).([]byte) 96 | } 97 | } 98 | 99 | var r1 error 100 | if rf, ok := ret.Get(1).(func([]elastic.BulkData) error); ok { 101 | r1 = rf(data) 102 | } else { 103 | r1 = ret.Error(1) 104 | } 105 | 106 | return r0, r1 107 | } 108 | 109 | // CreateIndex provides a mock function with given fields: index, body 110 | func (_m *ESClientProvider) CreateIndex(index string, body []byte) ([]byte, error) { 111 | ret := _m.Called(index, body) 112 | 113 | var r0 []byte 114 | if rf, ok := ret.Get(0).(func(string, []byte) []byte); ok { 115 | r0 = rf(index, body) 116 | } else { 117 | if ret.Get(0) != nil { 118 | r0 = ret.Get(0).([]byte) 119 | } 120 | } 121 | 122 | var r1 error 123 | if rf, ok := ret.Get(1).(func(string, []byte) error); ok { 124 | r1 = rf(index, body) 125 | } else { 126 | r1 = ret.Error(1) 127 | } 128 | 129 | return r0, r1 130 | } 131 | 132 | // DelayOfCreateIndex provides a mock function with given fields: ex, uin, du, index, data 133 | func (_m *ESClientProvider) DelayOfCreateIndex(ex func(string, []byte) ([]byte, error), uin uint, du time.Duration, index string, data []byte) error { 134 | ret := _m.Called(ex, uin, du, index, data) 135 | 136 | var r0 error 137 | if rf, ok := ret.Get(0).(func(func(string, []byte) ([]byte, error), uint, time.Duration, string, []byte) error); ok { 138 | r0 = rf(ex, uin, du, index, data) 139 | } else { 140 | r0 = ret.Error(0) 141 | } 142 | 143 | return r0 144 | } 145 | 146 | // Get provides a mock function with given fields: index, query, result 147 | func (_m *ESClientProvider) Get(index string, query map[string]interface{}, result interface{}) error { 148 | ret := _m.Called(index, query, result) 149 | 150 | var r0 error 151 | if rf, ok := ret.Get(0).(func(string, map[string]interface{}, interface{}) error); ok { 152 | r0 = rf(index, query, result) 153 | } else { 154 | r0 = ret.Error(0) 155 | } 156 | 157 | return r0 158 | } 159 | 160 | // GetStat provides a mock function with given fields: index, field, aggType, mustConditions, mustNotConditions 161 | func (_m *ESClientProvider) GetStat(index string, field string, aggType string, mustConditions []map[string]interface{}, mustNotConditions []map[string]interface{}) (time.Time, error) { 162 | ret := _m.Called(index, field, aggType, mustConditions, mustNotConditions) 163 | 164 | var r0 time.Time 165 | if rf, ok := ret.Get(0).(func(string, string, string, []map[string]interface{}, []map[string]interface{}) time.Time); ok { 166 | r0 = rf(index, field, aggType, mustConditions, mustNotConditions) 167 | } else { 168 | r0 = ret.Get(0).(time.Time) 169 | } 170 | 171 | var r1 error 172 | if rf, ok := ret.Get(1).(func(string, string, string, []map[string]interface{}, []map[string]interface{}) error); ok { 173 | r1 = rf(index, field, aggType, mustConditions, mustNotConditions) 174 | } else { 175 | r1 = ret.Error(1) 176 | } 177 | 178 | return r0, r1 179 | } 180 | 181 | // UpdateByQueryWithMaxDocs provides a mock function with given fields: index, query, fields, max 182 | func (_m *ESClientProvider) UpdateByQueryWithMaxDocs(index string, query string, fields string, max int) ([]byte, error) { 183 | ret := _m.Called(index, query, fields, max) 184 | 185 | var r0 []byte 186 | if rf, ok := ret.Get(0).(func(string, string, string, int) []byte); ok { 187 | r0 = rf(index, query, fields, max) 188 | } else { 189 | if ret.Get(0) != nil { 190 | r0 = ret.Get(0).([]byte) 191 | } 192 | } 193 | 194 | var r1 error 195 | if rf, ok := ret.Get(1).(func(string, string, string, int) error); ok { 196 | r1 = rf(index, query, fields, max) 197 | } else { 198 | r1 = ret.Error(1) 199 | } 200 | 201 | return r0, r1 202 | } 203 | -------------------------------------------------------------------------------- /dockerhub/mocks/http_client_provider.go: -------------------------------------------------------------------------------- 1 | // Code generated by mockery v2.3.0. DO NOT EDIT. 2 | 3 | package mocks 4 | 5 | import mock "github.com/stretchr/testify/mock" 6 | 7 | // HTTPClientProvider is an autogenerated mock type for the HTTPClientProvider type 8 | type HTTPClientProvider struct { 9 | mock.Mock 10 | } 11 | 12 | // Request provides a mock function with given fields: url, method, header, body, params 13 | func (_m *HTTPClientProvider) Request(url string, method string, header map[string]string, body []byte, params map[string]string) (int, []byte, error) { 14 | ret := _m.Called(url, method, header, body, params) 15 | 16 | var r0 int 17 | if rf, ok := ret.Get(0).(func(string, string, map[string]string, []byte, map[string]string) int); ok { 18 | r0 = rf(url, method, header, body, params) 19 | } else { 20 | r0 = ret.Get(0).(int) 21 | } 22 | 23 | var r1 []byte 24 | if rf, ok := ret.Get(1).(func(string, string, map[string]string, []byte, map[string]string) []byte); ok { 25 | r1 = rf(url, method, header, body, params) 26 | } else { 27 | if ret.Get(1) != nil { 28 | r1 = ret.Get(1).([]byte) 29 | } 30 | } 31 | 32 | var r2 error 33 | if rf, ok := ret.Get(2).(func(string, string, map[string]string, []byte, map[string]string) error); ok { 34 | r2 = rf(url, method, header, body, params) 35 | } else { 36 | r2 = ret.Error(2) 37 | } 38 | 39 | return r0, r1, r2 40 | } 41 | -------------------------------------------------------------------------------- /email.go: -------------------------------------------------------------------------------- 1 | package dads 2 | 3 | import ( 4 | "net" 5 | "net/mail" 6 | "regexp" 7 | "strings" 8 | "sync" 9 | "time" 10 | ) 11 | 12 | var ( 13 | // EmailRegex - regexp to match email address 14 | EmailRegex = regexp.MustCompile("^[][a-zA-Z0-9.!#$%&'*+\\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$") 15 | // EmailReplacer - replacer for some email buggy characters 16 | EmailReplacer = strings.NewReplacer(" at ", "@", " AT ", "@", " At ", "@", " dot ", ".", " DOT ", ".", " Dot ", ".", "<", "", ">", "", "`", "") 17 | // emailsCache validation cache 18 | emailsCache = map[string]string{} 19 | // emailsCacheMtx - emails validation cache mutex 20 | emailsCacheMtx *sync.RWMutex 21 | // OpenAddrRE - '<...' -> '<' (... = whitespace) 22 | OpenAddrRE = regexp.MustCompile(`<\s+`) 23 | // CloseAddrRE - '...>' -> '>' (... = whitespace) 24 | CloseAddrRE = regexp.MustCompile(`\s+>`) 25 | // WhiteSpace - one or more whitespace characters 26 | WhiteSpace = regexp.MustCompile(`\s+`) 27 | ) 28 | 29 | // IsValidDomain - is MX domain valid? 30 | // uses internal cache 31 | func IsValidDomain(domain string) (valid bool) { 32 | l := len(domain) 33 | if l < 4 && l > 254 { 34 | return 35 | } 36 | if MT { 37 | emailsCacheMtx.RLock() 38 | } 39 | dom, ok := emailsCache[domain] 40 | if MT { 41 | emailsCacheMtx.RUnlock() 42 | } 43 | valid = dom != "" 44 | if ok { 45 | // fmt.Printf("domain cache hit: '%s' -> %v\n", domain, valid) 46 | return 47 | } 48 | defer func() { 49 | var dom string 50 | if valid { 51 | dom = domain 52 | } 53 | if MT { 54 | emailsCacheMtx.Lock() 55 | } 56 | emailsCache[domain] = dom 57 | if MT { 58 | emailsCacheMtx.Unlock() 59 | } 60 | }() 61 | for i := 0; i < 10; i++ { 62 | mx, err := net.LookupMX(domain) 63 | if err == nil && len(mx) > 0 { 64 | valid = true 65 | return 66 | } 67 | } 68 | for i := 1; i <= 3; i++ { 69 | mx, err := net.LookupMX(domain) 70 | if err == nil && len(mx) > 0 { 71 | valid = true 72 | return 73 | } 74 | time.Sleep(time.Duration(i) * time.Second) 75 | } 76 | return 77 | } 78 | 79 | // IsValidEmail - is email correct: len, regexp, MX domain 80 | // uses internal cache 81 | func IsValidEmail(email string, validateDomain, guess bool) (valid bool, newEmail string) { 82 | l := len(email) 83 | if l < 6 && l > 254 { 84 | return 85 | } 86 | if MT { 87 | emailsCacheMtx.RLock() 88 | } 89 | nEmail, ok := emailsCache[email] 90 | if MT { 91 | emailsCacheMtx.RUnlock() 92 | } 93 | if ok { 94 | newEmail = nEmail 95 | valid = newEmail != "" 96 | return 97 | } 98 | defer func() { 99 | if MT { 100 | emailsCacheMtx.Lock() 101 | } 102 | emailsCache[email] = newEmail 103 | if MT { 104 | emailsCacheMtx.Unlock() 105 | } 106 | }() 107 | if guess { 108 | email = WhiteSpace.ReplaceAllString(email, " ") 109 | email = strings.TrimSpace(EmailReplacer.Replace(email)) 110 | email = strings.Split(email, " ")[0] 111 | } 112 | if !EmailRegex.MatchString(email) { 113 | return 114 | } 115 | if validateDomain { 116 | parts := strings.Split(email, "@") 117 | if len(parts) <= 1 || !IsValidDomain(parts[1]) { 118 | return 119 | } 120 | } 121 | newEmail = email 122 | valid = true 123 | return 124 | } 125 | 126 | // ParseAddresses - parse address string into one or more name/email pairs 127 | func ParseAddresses(ctx *Ctx, addrs string, maxAddrs int) (emails []*mail.Address, ok bool) { 128 | defer func() { 129 | if len(emails) > maxAddrs { 130 | emails = emails[:maxAddrs] 131 | } 132 | }() 133 | var e error 134 | patterns := []string{" at ", "_at_", " en "} 135 | addrs = strings.TrimSpace(addrs) 136 | addrs = SpacesRE.ReplaceAllString(addrs, " ") 137 | addrs = OpenAddrRE.ReplaceAllString(addrs, "<") 138 | addrs = CloseAddrRE.ReplaceAllString(addrs, ">") 139 | for _, pattern := range patterns { 140 | addrs = strings.Replace(addrs, pattern, "@", -1) 141 | } 142 | emails, e = mail.ParseAddressList(addrs) 143 | if e != nil { 144 | addrs2 := strings.Replace(addrs, `"`, "", -1) 145 | emails, e = mail.ParseAddressList(addrs2) 146 | if e != nil { 147 | emails = []*mail.Address{} 148 | ary := strings.Split(addrs2, ",") 149 | for _, f := range ary { 150 | f = strings.TrimSpace(f) 151 | email, e := mail.ParseAddress(f) 152 | if e == nil { 153 | emails = append(emails, email) 154 | if ctx.Debug > 1 { 155 | Printf("unable to parse '%s' but '%s' parsed to %v ('%s','%s')\n", addrs, f, email, email.Name, email.Address) 156 | } 157 | if len(emails) >= maxAddrs { 158 | break 159 | } 160 | continue 161 | } 162 | a := strings.Split(f, "@") 163 | if len(a) == 3 { 164 | // name@domain -> ['name', 'domain '] 165 | // name@domain name@domain -> ['name', 'domain name', 'domain'] 166 | name := a[0] 167 | domain := strings.Replace(a[2], ">", "", -1) 168 | nf := name + " <" + name + "@" + domain + ">" 169 | email, e := mail.ParseAddress(nf) 170 | if e == nil { 171 | emails = append(emails, email) 172 | if ctx.Debug > 1 { 173 | Printf("unable to parse '%s' but '%s' -> '%s' parsed to %v ('%s','%s')\n", addrs, f, nf, email, email.Name, email.Address) 174 | } 175 | if len(emails) > maxAddrs { 176 | break 177 | } 178 | } 179 | } 180 | } 181 | if len(emails) == 0 { 182 | if ctx.Debug > 1 { 183 | Printf("cannot get identities: cannot read email address(es) from %s\n", addrs) 184 | } 185 | return 186 | } 187 | } 188 | } 189 | for i, obj := range emails { 190 | // remove leading/trailing ' " 191 | // skip if starts with =? 192 | // should we allow empty name? 193 | obj.Name = strings.TrimSpace(strings.Trim(obj.Name, `"'`)) 194 | obj.Address = strings.TrimSpace(strings.Trim(obj.Address, `"'`)) 195 | if strings.HasPrefix(obj.Name, "=?") { 196 | if ctx.Debug > 0 { 197 | Printf("clearing buggy name '%s'\n", obj.Name) 198 | } 199 | obj.Name = "" 200 | } 201 | if obj.Name == "" || obj.Name == obj.Address { 202 | ary := strings.Split(obj.Address, "@") 203 | obj.Name = ary[0] 204 | if ctx.Debug > 1 { 205 | Printf("set name '%s' based on address '%s'\n", obj.Name, obj.Address) 206 | } 207 | } 208 | emails[i].Name = obj.Name 209 | emails[i].Address = obj.Address 210 | } 211 | ok = true 212 | return 213 | } 214 | -------------------------------------------------------------------------------- /email_test.go: -------------------------------------------------------------------------------- 1 | package dads 2 | 3 | import ( 4 | "net/mail" 5 | "os" 6 | "testing" 7 | ) 8 | 9 | func TestParseAddresses(t *testing.T) { 10 | var ctx Ctx 11 | FatalOnError(os.Setenv("DA_DS", "ds")) 12 | ctx.Init() 13 | ctx.Debug = 2 14 | sameResult := func(a1, a2 []*mail.Address) bool { 15 | m1 := make(map[[2]string]struct{}) 16 | m2 := make(map[[2]string]struct{}) 17 | for _, a := range a1 { 18 | m1[[2]string{a.Name, a.Address}] = struct{}{} 19 | } 20 | for _, a := range a2 { 21 | m2[[2]string{a.Name, a.Address}] = struct{}{} 22 | } 23 | for k := range m1 { 24 | _, ok := m2[k] 25 | if !ok { 26 | return false 27 | } 28 | } 29 | for k := range m2 { 30 | _, ok := m1[k] 31 | if !ok { 32 | return false 33 | } 34 | } 35 | return true 36 | } 37 | var testCases = []struct { 38 | addr string 39 | expectedEmails []*mail.Address 40 | expectedOK bool 41 | }{ 42 | {addr: "Lukasz Gryglicki ", expectedOK: true, expectedEmails: []*mail.Address{{Name: "Lukasz Gryglicki", Address: "lgryglicki@cncf.io"}}}, 43 | {addr: "Lukasz Gryglicki lgryglicki@cncf.io", expectedOK: false, expectedEmails: []*mail.Address{}}, 44 | {addr: `"Lukasz Gryglicki" `, expectedOK: true, expectedEmails: []*mail.Address{{Name: "Lukasz Gryglicki", Address: "lgryglicki@cncf.io"}}}, 45 | {addr: " Lukasz Gryglicki\t ", expectedOK: true, expectedEmails: []*mail.Address{{Name: "Lukasz Gryglicki", Address: "lgryglicki@cncf.io"}}}, 46 | {addr: " Lukasz Gryglicki\t ", expectedOK: true, expectedEmails: []*mail.Address{{Name: "Lukasz Gryglicki", Address: "lgryglicki@cncf.io"}}}, 47 | {addr: "Lukasz Gryglicki ", expectedOK: true, expectedEmails: []*mail.Address{{Name: "Lukasz Gryglicki", Address: "lgryglicki@cncf.io"}}}, 48 | {addr: "Lukasz Gryglicki ", expectedOK: true, expectedEmails: []*mail.Address{{Name: "Lukasz Gryglicki", Address: "lgryglicki@cncf.io"}}}, 49 | { 50 | addr: "Lukasz Gryglicki,Justyna Gryglicka", 51 | expectedOK: true, 52 | expectedEmails: []*mail.Address{{Name: "Lukasz Gryglicki", Address: "lgryglicki@cncf.io"}, {Name: "Justyna Gryglicka", Address: "jgryglicka@cncf.io"}}, 53 | }, 54 | { 55 | addr: "Lukasz Gryglicki\t , \tJustyna Gryglicka", 56 | expectedOK: true, 57 | expectedEmails: []*mail.Address{{Name: "Lukasz Gryglicki", Address: "lgryglicki@cncf.io"}, {Name: "Justyna Gryglicka", Address: "jgryglicka@cncf.io"}}, 58 | }, 59 | {addr: "a,d", expectedOK: true, expectedEmails: []*mail.Address{{Name: "d", Address: "e@f"}, {Name: "a", Address: "b@c"}}}, 60 | {addr: `a"b`, expectedOK: true, expectedEmails: []*mail.Address{{Name: "ab", Address: "ab@my.com"}}}, 61 | {addr: "me@domain.com", expectedOK: true, expectedEmails: []*mail.Address{{Name: "me", Address: "me@domain.com"}}}, 62 | {addr: `'"mia"' <'me@domain.com'>`, expectedOK: true, expectedEmails: []*mail.Address{{Name: "mia", Address: "me@domain.com"}}}, 63 | {addr: " luke\t \t \t", expectedOK: true, expectedEmails: []*mail.Address{{Name: "luke", Address: "me@domain.com"}}}, 64 | {addr: " luke\t \t < me@domain.com\t>\t", expectedOK: true, expectedEmails: []*mail.Address{{Name: "luke", Address: "me@domain.com"}}}, 65 | {addr: "\t i have\twhitespace \t < \t me@domain.com\t \t>\t \t", expectedOK: true, expectedEmails: []*mail.Address{{Name: "i have whitespace", Address: "me@domain.com"}}}, 66 | {addr: "", expectedOK: true, expectedEmails: []*mail.Address{{Name: "me", Address: "me@domain.com"}}}, 67 | {addr: "=?76dea4628?&", expectedOK: true, expectedEmails: []*mail.Address{{Name: "mail", Address: "mail@domain.com"}}}, 68 | // Jeremy Selan - some groups cut email addrss - we cannot parse this because we cannot guess what the cut value is, example group: SF+ocio-dev 69 | {addr: "bsloan ", expectedOK: false, expectedEmails: []*mail.Address{}}, 70 | {addr: "a, z, d, y", expectedOK: true, expectedEmails: []*mail.Address{{Name: "d, y", Address: "e@f"}, {Name: "a, z", Address: "b@c"}}}, 71 | {addr: " , ", expectedOK: true, expectedEmails: []*mail.Address{{Name: "me", Address: "me@domain.com"}, {Name: "you", Address: "you@domain.com"}}}, 72 | {addr: "me@domain.com,", expectedOK: true, expectedEmails: []*mail.Address{{Name: "me", Address: "me@domain.com"}, {Name: "you", Address: "you@domain.com"}}}, 73 | {addr: "< me@domain.com >, you@domain.com", expectedOK: true, expectedEmails: []*mail.Address{{Name: "me", Address: "me@domain.com"}, {Name: "you", Address: "you@domain.com"}}}, 74 | {addr: "me@domain.com,you@domain.com", expectedOK: true, expectedEmails: []*mail.Address{{Name: "me", Address: "me@domain.com"}, {Name: "you", Address: "you@domain.com"}}}, 75 | // we don't support such messy addresses, original code didn't support this neither 76 | {addr: "=?iso-8859-2?Q?Michal_=C8marada?= ", expectedOK: false, expectedEmails: []*mail.Address{}}, 77 | {addr: "=?Windows-1252?Q?Ivan_Hra=9Ako?= ", expectedOK: false, expectedEmails: []*mail.Address{}}, 78 | {addr: "=?iso-8859-2?Q?Radek_Krej=E8a?= ", expectedOK: false, expectedEmails: []*mail.Address{}}, 79 | {addr: `robert.konc@controlmatik.eu `, expectedOK: true, expectedEmails: []*mail.Address{{Name: "robert.konc", Address: "robert.konc@controlmatik.eu"}}}, 80 | { 81 | addr: ` =?windows-1257?Q?B=B8e=2C_Sebastian?= ,"robert.konc@controlmatik.eu" ,"devel@lists.zephyrproject.org" `, 82 | expectedOK: true, 83 | expectedEmails: []*mail.Address{{Name: "robert.konc", Address: "robert.konc@controlmatik.eu"}, {Name: "devel", Address: "devel@lists.zephyrproject.org"}}, 84 | }, 85 | } 86 | for index, test := range testCases { 87 | gotEmails, gotOK := ParseAddresses(&ctx, test.addr, GroupsioMaxRecipients) 88 | if gotOK != test.expectedOK { 89 | t.Errorf("test number %d, expected '%s' ok %v, got %v", index+1, test.addr, test.expectedOK, gotOK) 90 | } else { 91 | if !sameResult(gotEmails, test.expectedEmails) { 92 | t.Errorf("test number %d, expected '%s' to parse to %+v, got %+v", index+1, test.addr, test.expectedEmails, gotEmails) 93 | } 94 | } 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /error.go: -------------------------------------------------------------------------------- 1 | package dads 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "runtime/debug" 7 | "time" 8 | ) 9 | 10 | // FatalOnError displays error message (if error present) and exits program 11 | func FatalOnError(err error) string { 12 | if err != nil { 13 | tm := time.Now() 14 | msg := fmt.Sprintf("DA_DS_ERROR(time=%+v):\nError: '%s'\nStacktrace:\n%s\n", tm, err.Error(), string(debug.Stack())) 15 | Printf("%s", msg) 16 | fmt.Fprintf(os.Stderr, "%s", msg) 17 | panic("stacktrace") 18 | } 19 | return OK 20 | } 21 | 22 | // Fatalf - it will call FatalOnError using fmt.Errorf with args provided 23 | func Fatalf(f string, a ...interface{}) { 24 | FatalOnError(fmt.Errorf(f, a...)) 25 | } 26 | -------------------------------------------------------------------------------- /exec.go: -------------------------------------------------------------------------------- 1 | package dads 2 | 3 | import ( 4 | "bytes" 5 | "io" 6 | "os" 7 | "os/exec" 8 | ) 9 | 10 | // ExecCommand - execute command given by array of strings with eventual environment map 11 | func ExecCommand(ctx *Ctx, cmdAndArgs []string, cwd string, env map[string]string) (sout, serr string, err error) { 12 | command := cmdAndArgs[0] 13 | arguments := cmdAndArgs[1:] 14 | if ctx.Debug > 1 { 15 | Printf("executing command %s:%v:%+v\n", cwd, env, cmdAndArgs) 16 | } 17 | cmd := exec.Command(command, arguments...) 18 | if len(env) > 0 { 19 | newEnv := os.Environ() 20 | for key, value := range env { 21 | newEnv = append(newEnv, key+"="+value) 22 | } 23 | cmd.Env = newEnv 24 | } 25 | var ( 26 | stdOut bytes.Buffer 27 | stdErr bytes.Buffer 28 | ) 29 | cmd.Stdout = &stdOut 30 | cmd.Stderr = &stdErr 31 | if cwd != "" { 32 | cmd.Dir = cwd 33 | } 34 | err = cmd.Start() 35 | if err != nil { 36 | return 37 | } 38 | err = cmd.Wait() 39 | sout = stdOut.String() 40 | serr = stdErr.String() 41 | if ctx.Debug > 1 { 42 | Printf("executed command %s:%v:%+v -> (%v,%s,%s)\n", cwd, env, cmdAndArgs, err, StringTrunc(sout, 0x400, true), StringTrunc(serr, 0x400, true)) 43 | } 44 | return 45 | } 46 | 47 | // ExecCommandPipe - execute command given by array of strings with eventual environment map, return STDOUT pipe to read from 48 | func ExecCommandPipe(ctx *Ctx, cmdAndArgs []string, cwd string, env map[string]string) (stdOutPipe io.ReadCloser, cmd *exec.Cmd, err error) { 49 | command := cmdAndArgs[0] 50 | arguments := cmdAndArgs[1:] 51 | if ctx.Debug > 1 { 52 | Printf("executing non-blocking command %s:%v:%+v\n", cwd, env, cmdAndArgs) 53 | } 54 | cmd = exec.Command(command, arguments...) 55 | if len(env) > 0 { 56 | newEnv := os.Environ() 57 | for key, value := range env { 58 | newEnv = append(newEnv, key+"="+value) 59 | } 60 | cmd.Env = newEnv 61 | } 62 | if cwd != "" { 63 | cmd.Dir = cwd 64 | } 65 | stdOutPipe, err = cmd.StdoutPipe() 66 | if err != nil { 67 | return 68 | } 69 | err = cmd.Start() 70 | if err != nil { 71 | return 72 | } 73 | return 74 | } 75 | -------------------------------------------------------------------------------- /flag.go: -------------------------------------------------------------------------------- 1 | package dads 2 | 3 | import ( 4 | "strconv" 5 | "time" 6 | ) 7 | 8 | // Flag gets CLI flag values 9 | type Flag string 10 | 11 | // NewFlag ... 12 | func NewFlag() *Flag { 13 | s := Flag("") 14 | return &s 15 | } 16 | 17 | // String gets string value 18 | func (f *Flag) String() string { 19 | if f != nil { 20 | return string(*f) 21 | } 22 | 23 | return "" 24 | } 25 | 26 | // Set flag value 27 | func (f *Flag) Set(val string) error { 28 | *f = Flag(val) 29 | return nil 30 | } 31 | 32 | // Bool gets flag bool value 33 | func (f *Flag) Bool() bool { 34 | if f != nil { 35 | val, err := strconv.ParseBool(f.String()) 36 | if err != nil { 37 | return false 38 | } 39 | 40 | return val 41 | } 42 | 43 | return false 44 | } 45 | 46 | // Int gets flag int value 47 | func (f *Flag) Int() int { 48 | if f != nil { 49 | val, err := strconv.Atoi(f.String()) 50 | if err != nil { 51 | return 0 52 | } 53 | return val 54 | } 55 | 56 | return 0 57 | } 58 | 59 | // Date gets flag date value 60 | func (f *Flag) Date() *time.Time { 61 | if f != nil { 62 | date, err := time.Parse("2006-01-02 15:04:05", f.String()) 63 | if err != nil { 64 | return nil 65 | } 66 | return &date 67 | } 68 | return nil 69 | } 70 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/LF-Engineering/da-ds 2 | 3 | go 1.15 4 | 5 | require ( 6 | github.com/LF-Engineering/dev-analytics-libraries v1.1.28 7 | github.com/PuerkitoBio/goquery v1.6.0 8 | github.com/andybalholm/cascadia v1.2.0 // indirect 9 | github.com/araddon/dateparse v0.0.0-20210207001429-0eec95c9db7e 10 | github.com/go-sql-driver/mysql v1.5.0 11 | github.com/google/go-github/v38 v38.1.0 12 | github.com/jmoiron/sqlx v1.2.0 13 | github.com/json-iterator/go v1.1.10 14 | github.com/lib/pq v1.9.0 // indirect 15 | github.com/mattn/go-sqlite3 v1.14.5 // indirect 16 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect 17 | github.com/modern-go/reflect2 v1.0.1 // indirect 18 | github.com/stretchr/objx v0.3.0 // indirect 19 | github.com/stretchr/testify v1.7.0 20 | golang.org/x/net v0.0.0-20201216054612-986b41b23924 21 | golang.org/x/oauth2 v0.0.0-20201208152858-08078c50e5b5 22 | google.golang.org/api v0.30.0 23 | gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect 24 | gopkg.in/h2non/gock.v1 v1.0.16 25 | gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776 // indirect 26 | ) 27 | -------------------------------------------------------------------------------- /googlegroups/cmd.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | setUp(){ 4 | 5 | venv=/Users/code/da-ds/googlegroups/venv 6 | workDir=/Users/code/da-ds/googlegroups/archives 7 | jsonFilesDir=/Users/code/da-ds/googlegroups/jsonfiles 8 | mboxScript=/Users/code/da-ds/googlegroups/ggmbox.py 9 | logsDir=/Users/code/da-ds/googlegroups/logs/ 10 | 11 | cd $workDir || exit 12 | 13 | googleGroup="$1" 14 | jsonFile=${jsonFilesDir}"/""$1".json 15 | current_time=$(date "+%Y.%m.%d-%H.%M.%S") 16 | # shellcheck disable=SC2001 17 | sanitizeLogsFile=$(echo "${googleGroup}" |sed 's#/#-#g') 18 | logfile=${logsDir}${sanitizeLogsFile}.${current_time}.txt 19 | 20 | # delete existing json file 21 | rm -rf "${jsonFile}" 22 | 23 | if [ -e "$venv" ]; then 24 | source $venv/bin/activate 25 | scrapy runspider -a name="${googleGroup}" -o "${jsonFile}" -t json ${mboxScript} > "${logfile}" 2>&1 26 | else 27 | virtualenv --python=python3 $venv 28 | source $venv/bin/activate 29 | pip install scrapy 30 | scrapy runspider -a name="${googleGroup}" -o "${jsonFile}" -t json ${mboxScript} > "${logfile}" 2>&1 31 | fi 32 | } 33 | 34 | main(){ 35 | setUp "$1" 36 | } 37 | 38 | main "$@" 39 | -------------------------------------------------------------------------------- /googlegroups/const.go: -------------------------------------------------------------------------------- 1 | package googlegroups 2 | 3 | import ( 4 | "regexp" 5 | "time" 6 | ) 7 | 8 | const ( 9 | // GoogleGroups ... 10 | GoogleGroups = "googlegroups" 11 | // malformedMIMEHeaderLineErrorMessage 12 | malformedMIMEHeaderLineErrorMessage = "malformed MIME header line: " 13 | // Unknown ... 14 | Unknown = "Unknown" 15 | // CredentialsSSMParamName from ssm 16 | CredentialsSSMParamName = "insights_googlegroups_credentials" 17 | // TokenSSMParamName from ssm 18 | TokenSSMParamName = "insights_googlegroups_token" 19 | // MaxNumberOfMessages from gmail 20 | MaxNumberOfMessages = 10000000 21 | // MaxConcurrentRequests ... 22 | MaxConcurrentRequests = 10000 23 | ) 24 | 25 | var ( 26 | // GoogleGroupRichMapping ... 27 | GoogleGroupRichMapping = []byte(`{"mappings":{"dynamic_templates":[{"notanalyzed":{"match":"*","match_mapping_type":"string","mapping":{"type":"keyword"}}},{"int_to_float":{"match":"*","match_mapping_type":"long","mapping":{"type":"float"}}},{"formatdate":{"match":"*","match_mapping_type":"date","mapping":{"format":"strict_date_optional_time||epoch_millis","type":"date"}}}]}}`) 28 | // GoogleGroupRawMapping ... 29 | GoogleGroupRawMapping = []byte(`{"mappings":{"dynamic":true,"properties":{"metadata__updated_on":{"type":"date"},"data":{"properties":{"body":{"dynamic":false,"properties":{}}}}}}}`) 30 | // base64RE ... 31 | base64RE = regexp.MustCompile("^([a-zA-Z0-9+/]+\\r\\n)+[a-zA-Z0-9+/]+={0,2}$") 32 | // DefaultDateTime ... 33 | DefaultDateTime = time.Date(1970, 1, 1, 0, 0, 0, 0, time.UTC) 34 | ) 35 | -------------------------------------------------------------------------------- /googlegroups/dto.go: -------------------------------------------------------------------------------- 1 | package googlegroups 2 | 3 | import "time" 4 | 5 | // GoogleGroupMessages ... 6 | type GoogleGroupMessages struct { 7 | Messages []*GoogleGroupMessageThread 8 | } 9 | 10 | // GoogleGroupMessageThread ... 11 | type GoogleGroupMessageThread struct { 12 | Topic string `json:"topic"` 13 | ID string `json:"id"` 14 | Messages []*GoogleGroupMessage `json:"messages"` 15 | } 16 | 17 | // GoogleGroupMessage ... 18 | type GoogleGroupMessage struct { 19 | ID string `json:"id"` 20 | Author string `json:"author"` 21 | Date string `json:"date"` 22 | File string `json:"file"` 23 | Message string `json:"message"` 24 | } 25 | 26 | // EnrichedMessage ... 27 | type EnrichedMessage struct { 28 | From string `json:"from"` 29 | Date time.Time `json:"date"` 30 | To []string `json:"to"` 31 | MessageID string `json:"message_id"` 32 | InReplyTo string `json:"in_reply_to"` 33 | References string `json:"references"` 34 | Subject string `json:"subject"` 35 | Topic string `json:"topic"` 36 | MessageBody string `json:"message_body"` 37 | TopicID string `json:"topic_id"` 38 | BackendVersion string `json:"backend_version"` 39 | UUID string `json:"uuid"` 40 | Origin string `json:"origin"` 41 | MetadataUpdatedOn time.Time `json:"metadata__updated_on"` 42 | BackendName string `json:"backend_name"` 43 | MetadataTimestamp time.Time `json:"metadata__timestamp"` 44 | MetadataEnrichedOn time.Time `json:"metadata__enriched_on"` 45 | ProjectSlug string `json:"project_slug"` 46 | GroupName string `json:"group_name"` 47 | Project string `json:"project"` 48 | Root bool `json:"root"` 49 | FromBot bool `json:"from_bot"` 50 | ChangedAt time.Time `json:"changed_at"` 51 | AuthorName string `json:"author_name"` 52 | AuthorID string `json:"author_id"` 53 | AuthorUUID string `json:"author_uuid"` 54 | AuthorOrgName string `json:"author_org_name"` 55 | AuthorUserName string `json:"author_user_name"` 56 | AuthorBot bool `json:"author_bot"` 57 | AuthorMultiOrgNames []string `json:"author_multi_org_names"` 58 | MboxAuthorDomain string `json:"mbox_author_domain"` 59 | IsGoogleGroupMessage int `json:"is_google_group_message"` 60 | Timezone int `json:"timezone"` 61 | ViaCommunityGroup bool `json:"via_community_group"` 62 | } 63 | 64 | // RawMessage represents GoogleGroups raw message 65 | type RawMessage struct { 66 | From string `json:"from"` 67 | Date time.Time `json:"date"` 68 | To []string `json:"to"` 69 | MessageID string `json:"message_id"` 70 | InReplyTo string `json:"in_reply_to"` 71 | References string `json:"references"` 72 | Subject string `json:"subject"` 73 | MessageBody string `json:"message_body"` 74 | TopicID string `json:"topic_id"` 75 | Topic string `json:"topic"` 76 | BackendVersion string `json:"backend_version"` 77 | UUID string `json:"uuid"` 78 | Origin string `json:"origin"` 79 | MetadataUpdatedOn time.Time `json:"metadata__updated_on"` 80 | BackendName string `json:"backend_name"` 81 | MetadataTimestamp time.Time `json:"metadata__timestamp"` 82 | ProjectSlug string `json:"project_slug"` 83 | GroupName string `json:"group_name"` 84 | Project string `json:"project"` 85 | ChangedAt time.Time `json:"changed_at"` 86 | Timezone int `json:"timezone"` 87 | } 88 | 89 | // RawHits result 90 | type RawHits struct { 91 | Hits NHits `json:"hits"` 92 | } 93 | 94 | // NHits result 95 | type NHits struct { 96 | Hits []NestedRawHits `json:"hits"` 97 | } 98 | 99 | // NestedRawHits is the actual hit data 100 | type NestedRawHits struct { 101 | ID string `json:"_id"` 102 | Source RawMessage `json:"_source"` 103 | } 104 | 105 | // HeadersData struct 106 | type HeadersData struct { 107 | // Date is the date the message was originally sent 108 | Date string 109 | // MessageID is the message id 110 | MessageID string 111 | // InReplyTo is who the email was sent to. This can contain multiple 112 | // addresses if the email was forwarded. 113 | InReplyTo string 114 | // References 115 | References string 116 | // Sender is the entity that originally created and sent the message 117 | Sender string 118 | // From is the name - email address combo of the email author 119 | From string 120 | // Subject is the subject of the email 121 | Subject string 122 | // To is the email recipient. 123 | To []string 124 | // DeliveredTo is to whom the email was sent to. This can contain multiple 125 | // addresses if the email was forwarded. 126 | DeliveredTo []string 127 | MailingList string 128 | } 129 | -------------------------------------------------------------------------------- /googlegroups/enricher_test.go: -------------------------------------------------------------------------------- 1 | package googlegroups 2 | 3 | import ( 4 | "testing" 5 | "time" 6 | 7 | "github.com/LF-Engineering/da-ds/googlegroups/mocks" 8 | "github.com/LF-Engineering/dev-analytics-libraries/affiliation" 9 | "github.com/LF-Engineering/dev-analytics-libraries/elastic" 10 | jsoniter "github.com/json-iterator/go" 11 | "github.com/stretchr/testify/assert" 12 | _ "github.com/stretchr/testify/assert" 13 | ) 14 | 15 | func TestEnrichMessage(t *testing.T) { 16 | type fields struct { 17 | DSName string 18 | ElasticSearchProvider *elastic.ClientProvider 19 | BackendVersion string 20 | } 21 | type args struct { 22 | rawItem RawMessage 23 | project string 24 | now time.Time 25 | } 26 | 27 | rawItem1String := `{"from":"Jone Dow ","date":"2021-03-05T03:35:19-08:00","to":["GAM for Google Workspace "],"message_id":"<000621f3-1eac-4c3a-b61f-2a785169e4d4n@googlegroups.com>","in_reply_to":"","references":"<272d9c9b-f0ec-418d-becf-89d3d29f14c0n@googlegroups.com> ","subject":"Re: [GAM] Cannot update some users info","message_body":"Hi Jay, One more point to add, I can update the user account info I created on Gsuite admin portal manually. But I cannot edit user account which is existing on Gsuite already even it is student or","topic_id":"","topic":"","backend_version":"0.0.1","uuid":"f9fb3a591ee4b9050f3e7e2a03b09c876e2e8413","origin":"https://groups.google.com/g/google-apps-manager","updated_on":0,"metadata__updated_on":"2021-03-05T03:35:19-08:00","backend_name":"GoogleGroupsFetch","metadata__timestamp":"2021-03-05T11:38:43.3631Z","timestamp":0,"project_slug":"project1","group_name":"google-apps-manager","project":"project1","changed_at":"2021-03-05T11:38:43.3631Z","timezone":-8}` 28 | googleGroupsRaw1, err := toGoogleGroupsRaw(rawItem1String) 29 | if err != nil { 30 | t.Error(err) 31 | } 32 | 33 | enrichItem1String := `{"from":"Jone Dow","date":"2021-03-05T03:35:19-08:00","to":["GAM for Google Workspace "],"message_id":"<000621f3-1eac-4c3a-b61f-2a785169e4d4n@googlegroups.com>","in_reply_to":"","references":"<272d9c9b-f0ec-418d-becf-89d3d29f14c0n@googlegroups.com> ","subject":"Re: [GAM] Cannot update some users info","topic":"","message_body":"Hi Jay, One more point to add, I can update the user account info I created on Gsuite admin portal manually. But I cannot edit user account which is existing on Gsuite already even it is student or","topic_id":"","backend_version":"0.0.1","uuid":"f9fb3a591ee4b9050f3e7e2a03b09c876e2e8413","origin":"https://groups.google.com/g/google-apps-manager","updated_on":0,"metadata__updated_on":"2021-03-05T03:35:19-08:00","backend_name":"GoogleGroupsEnrich","metadata__timestamp":"2021-03-05T11:38:43.3631Z","metadata__enriched_on":"2021-03-05T11:40:45.22395Z","timestamp":0,"project_slug":"project1","group_name":"google-apps-manager","project":"project1","root":false,"from_bot":false,"changed_at":"2021-03-05T11:38:43.3631Z","author_name":"Jone Dow","author_id":"20328dba9d970328af607179cd21b25039d85340","author_uuid":"20328dba9d970328af607179cd21b25039d85340","author_gender":"Unknown","author_org_name":"Unknown","author_user_name":"","author_bot":false,"author_gender_acc":0,"author_multi_org_names":["Unknown"],"mbox_author_domain":"g.lfis.edu.hk","is_google_group_message":1,"timezone":-8}` 34 | googleGroupsEnrich1, err := toGoogleGroupsEnrich(enrichItem1String) 35 | if err != nil { 36 | t.Error(err) 37 | } 38 | 39 | tests := []struct { 40 | name string 41 | fields fields 42 | args args 43 | want *EnrichedMessage 44 | wantErr bool 45 | }{ 46 | { 47 | name: "Test Case #1", 48 | fields: fields{ 49 | DSName: "GoogleGroups", 50 | ElasticSearchProvider: nil, 51 | BackendVersion: "0.0.1", 52 | }, 53 | args: args{ 54 | rawItem: googleGroupsRaw1, 55 | project: "project1", 56 | now: time.Time{}, 57 | }, 58 | want: &googleGroupsEnrich1, 59 | wantErr: false, 60 | }, 61 | } 62 | for _, tt := range tests { 63 | t.Run(tt.name, func(t *testing.T) { 64 | zero := int64(0) 65 | aff1UUID := "20328dba9d970328af607179cd21b25039d85340" 66 | fakeAff1 := &affiliation.AffIdentity{ID: &aff1UUID, 67 | UUID: &aff1UUID, Name: "Qian", IsBot: &zero, 68 | Domain: "", OrgName: nil, Username: "", 69 | MultiOrgNames: []string{}, 70 | } 71 | 72 | fakeOrganizations1 := &[]affiliation.Enrollment{ 73 | { 74 | ID: 1, 75 | Organization: struct { 76 | ID int `json:"id"` 77 | Name string `json:"name"` 78 | }{ 79 | ID: 1, 80 | Name: "Org1", 81 | }, 82 | OrganizationID: 1, 83 | Role: "Contributor", 84 | UUID: "20328dba9d970328af607179cd21b25039d85340", 85 | }, 86 | } 87 | userIdentity := affiliation.Identity{ 88 | Name: "Jone Dow", 89 | Source: "googlegroups", 90 | Email: "jone.dow@gmail.com", 91 | ID: "894b751382341e2d958ba48f235c37b75690b194", 92 | } 93 | affProviderMock := &mocks.AffiliationClient{} 94 | affProviderMock.On("GetIdentityByUser", "id", "894b751382341e2d958ba48f235c37b75690b194").Return(fakeAff1, nil) 95 | affProviderMock.On("GetOrganizations", "20328dba9d970328af607179cd21b25039d85340", "project1").Return(fakeOrganizations1, nil) 96 | affProviderMock.On("AddIdentity", &userIdentity).Return(true) 97 | 98 | e := &Enricher{ 99 | DSName: tt.fields.DSName, 100 | ElasticSearchProvider: tt.fields.ElasticSearchProvider, 101 | affiliationsClientProvider: affProviderMock, 102 | } 103 | // 104 | got, err := e.EnrichMessage(&tt.args.rawItem, tt.args.now) 105 | if (err != nil) != tt.wantErr { 106 | t.Errorf("EnrichItem() error = %v, wantErr %v", err, tt.wantErr) 107 | return 108 | } 109 | assert.NotEqual(t, got, nil) 110 | assert.Equal(t, tt.want.UUID, got.UUID) 111 | assert.Equal(t, tt.want.MessageID, got.MessageID) 112 | assert.Equal(t, tt.want.AuthorUUID, got.AuthorUUID) 113 | assert.Equal(t, tt.want.Origin, got.Origin) 114 | }) 115 | } 116 | } 117 | 118 | func toGoogleGroupsEnrich(b string) (EnrichedMessage, error) { 119 | expectedEnrich := EnrichedMessage{} 120 | err := jsoniter.Unmarshal([]byte(b), &expectedEnrich) 121 | return expectedEnrich, err 122 | } 123 | 124 | func toGoogleGroupsRaw(b string) (RawMessage, error) { 125 | expectedRaw := RawMessage{} 126 | err := jsoniter.Unmarshal([]byte(b), &expectedRaw) 127 | return expectedRaw, err 128 | } 129 | -------------------------------------------------------------------------------- /googlegroups/fetcher_test.go: -------------------------------------------------------------------------------- 1 | package googlegroups 2 | 3 | import ( 4 | "testing" 5 | "time" 6 | 7 | "github.com/LF-Engineering/dev-analytics-libraries/elastic" 8 | JSONiter "github.com/json-iterator/go" 9 | "github.com/stretchr/testify/assert" 10 | "google.golang.org/api/gmail/v1" 11 | ) 12 | 13 | // TestFetcherSimpleFetchItem tests the working of the fetchItem function 14 | func TestFetchMessage(t *testing.T) { 15 | type fields struct { 16 | DSName string 17 | ElasticSearchProvider *elastic.ClientProvider 18 | BackendVersion string 19 | GroupName string 20 | } 21 | type args struct { 22 | gmailMessage *gmail.Message 23 | project string 24 | now time.Time 25 | } 26 | 27 | rawItem1String := `{"from":"Jone Dow ","date":"2021-03-05T03:35:19-08:00","to":["GAM for Google Workspace "],"message_id":"<000621f3-1eac-4c3a-b61f-2a785169e4d4n@googlegroups.com>","in_reply_to":"","references":"<272d9c9b-f0ec-418d-becf-89d3d29f14c0n@googlegroups.com> ","subject":"Re: [GAM] Cannot update some users info","message_body":"Hi Jay, One more point to add, I can update the user account info I created on GSuite admin portal manually. But I cannot edit user account which is existing on GSuite already even it is student or","topic_id":"","topic":"","backend_version":"0.0.1","uuid":"f9fb3a591ee4b9050f3e7e2a03b09c876e2e8413","origin":"https://groups.google.com/g/google-apps-manager","updated_on":0,"metadata__updated_on":"2021-03-05T03:35:19-08:00","backend_name":"GoogleGroupsFetch","metadata__timestamp":"2021-03-05T11:38:43.3631Z","timestamp":0,"project_slug":"project1","group_name":"google-apps-manager","project":"project1","changed_at":"2021-03-05T11:38:43.3631Z","timezone":-8}` 28 | googleGroupsRaw1, err := toGoogleGroupsRaw(rawItem1String) 29 | if err != nil { 30 | t.Error(err) 31 | } 32 | 33 | gmailItem1String := `{"id":"1698138e68ca","threadId":"1698138e68ca","labelIds":["UNREAD","IMPORTANT","CATEGORY_PERSONAL","INBOX"],"snippet":"Hi Jay, One more point to add, I can update the user account info I created on GSuite admin portal manually. But I cannot edit user account which is existing on GSuite already even it is student or","historyId":"270427","internalDate":"1554492714000","payload":{"partId":"","mimeType":"text/plain","filename":"","headers":[{"name":"Delivered-To","value":"google-apps-manager@googlegroups.com"},{"name":"Return-Path","value":""},{"name":"From","value":"Jone Dow "},{"name":"To","value":"GAM for Google Workspace "},{"name":"Subject","value":"Re: [GAM] Cannot update some users info"},{"name":"Thread-Topic","value":"Plain text sample email"},{"name":"Thread-Index","value":"AdTr5jkL493BeKJkSt2I+4R5TWw=="},{"name":"Date","value":"Fri, 5 Apr 2019 19:31:54 +0000"},{"name":"Message-ID","value":"<000621f3-1eac-4c3a-b61f-2a785169e4d4n@googlegroups.com>"},{"name":"Accept-Language","value":"en-US"},{"name":"Content-Language","value":"en-US"},{"name":"authentication-results","value":"spf=none (sender IP is ) smtp.mail.from=outlook.tester@salesforceemail.com;"},{"name":"Content-Type","value":"text/plain; charset=\"us-ascii\""},{"name":"Content-Transfer-Encoding","value":"quoted-printable"}],"body":{"size":146,"data":"DQoNCG4gU21pdGgNCkNFTyBvZiBCaWdDbw0KQ2VsbCAtIDYxOS0zNDQtMzMyMg0KT2ZmaWNlIC0gNjE5LTM0NS0yMzMzDQpTYW"}},"sizeEstimate":6978}` 34 | gmailMessage1, err := toGmailMessage(gmailItem1String) 35 | if err != nil { 36 | t.Error(err) 37 | } 38 | 39 | tests := []struct { 40 | name string 41 | fields fields 42 | args args 43 | want *RawMessage 44 | wantErr bool 45 | }{ 46 | { 47 | name: "Test Case #1", 48 | fields: fields{ 49 | DSName: "GoogleGroups", 50 | ElasticSearchProvider: nil, 51 | BackendVersion: "0.0.1", 52 | GroupName: "google-apps-manager@googlegroups.com", 53 | }, 54 | args: args{ 55 | gmailMessage: gmailMessage1, 56 | project: "project1", 57 | now: time.Time{}, 58 | }, 59 | want: &googleGroupsRaw1, 60 | wantErr: false, 61 | }, 62 | } 63 | 64 | for _, tt := range tests { 65 | t.Run(tt.name, func(t *testing.T) { 66 | f := &Fetcher{ 67 | DSName: tt.fields.DSName, 68 | HTTPClientProvider: nil, 69 | ElasticSearchProvider: tt.fields.ElasticSearchProvider, 70 | BackendVersion: tt.fields.BackendVersion, 71 | GroupName: tt.fields.GroupName, 72 | } 73 | now := time.Now() 74 | defaultDate := time.Date(1970, 1, 1, 0, 0, 0, 0, time.UTC) 75 | got, err := f.getMessage(tt.args.gmailMessage, &defaultDate, &now) 76 | 77 | if (err != nil) != tt.wantErr { 78 | t.Errorf("FetchItem() error = %v, wantErr %v", err, tt.wantErr) 79 | return 80 | } 81 | assert.NotEqual(t, got, nil) 82 | assert.Equal(t, tt.want.MessageID, got.MessageID) 83 | assert.Equal(t, tt.want.From, got.From) 84 | assert.Equal(t, tt.want.Subject, got.Subject) 85 | }) 86 | } 87 | } 88 | 89 | func toGmailMessage(b string) (*gmail.Message, error) { 90 | gmailMessage := gmail.Message{} 91 | err := JSONiter.Unmarshal([]byte(b), &gmailMessage) 92 | return &gmailMessage, err 93 | } 94 | -------------------------------------------------------------------------------- /googlegroups/ggmbox.py: -------------------------------------------------------------------------------- 1 | import functools 2 | import os 3 | 4 | import scrapy 5 | import scrapy.http.response.html 6 | import scrapy.http.response.text 7 | 8 | 9 | class GoogleGroupMBoxSpider(scrapy.Spider): 10 | """ 11 | We use "?_escaped_fragment_=forum" trick to fetch plain HTML pages. 12 | 13 | Usage: 14 | 15 | scrapy runspider -a name=? ggmbox.py 16 | """ 17 | name = "ggmbox" 18 | 19 | def __init__(self, name: str, template="{topic}/{index:03d}_{message}.email", output="{name}", 20 | root="https://groups.google.com", prefix="", **kwargs): 21 | """ 22 | Initializes a new instance of GoogleGroupMBoxSpider class. 23 | 24 | :param name: group name, e.g. "golang-nuts" or "finos.org/dav" for organization groups. 25 | :param template: `str.format()` raw email file name template. Supported keys: \ 26 | topic - topic identifier, \ 27 | index - message index in the thread, \ 28 | message - message identifier. \ 29 | The directories are automatically created. 30 | :param output: output directory. 31 | :param root: common root of all the URLs. 32 | :param kwargs: scrapy internal. 33 | """ 34 | super().__init__(**kwargs) 35 | self.name = name 36 | self.output = output.format(name=name) 37 | self.template = template 38 | 39 | """ 40 | handle organization groups 41 | """ 42 | is_org = name.find('/') 43 | if is_org == -1: 44 | self.root = root 45 | else: 46 | split_string = name.split("/", 1) 47 | org = split_string[0] 48 | new_root = "%s/a/%s" % (root, org) 49 | self.name = split_string[1] 50 | name = split_string[1] 51 | self.root = new_root 52 | 53 | # if not prefix.endswith("/"): 54 | # prefix += "/" 55 | self.prefix = "forum" 56 | self.start_urls = ["%s/%s/?_escaped_fragment_=forum/%s" % (self.root, self.prefix, name)] 57 | 58 | def parse(self, response: scrapy.http.response.html.HtmlResponse): 59 | for topic in response.css("tr a::attr(href)"): 60 | topic_url = "%s/%s/?_escaped_fragment_=topic/%s/%s" % ( 61 | self.root, self.prefix, self.name, self.last_part(topic.extract())) 62 | yield response.follow(topic_url, self.parse_topic) 63 | 64 | for next_page in response.css("body > a"): 65 | self.log("Page: %s -> %s" % ( 66 | self.last_part(response.url), 67 | self.last_part(next_page.css("::attr(href)").extract_first()))) 68 | yield response.follow(next_page, self.parse) 69 | 70 | def parse_topic(self, response: scrapy.http.response.html.HtmlResponse): 71 | messages = [] 72 | topic_id = self.last_part(response.url) 73 | for i, message in enumerate(response.css("tr")): 74 | topic_url = message.css("td[class=subject] > a::attr(href)").extract_first() 75 | if topic_url is None: 76 | continue 77 | message_id = self.last_part(topic_url) 78 | messages.append({ 79 | "id": message_id, 80 | "author": message.css("td[class=author] ::text").extract_first(), 81 | "date": message.css("td[class=lastPostDate] ::text").extract_first(), 82 | "file": self.locate_email_file(topic_id, i, message_id, False) 83 | }) 84 | file_name = self.locate_email_file(topic_id, i, message_id, True) 85 | if os.path.exists(file_name): 86 | self.log("Skipped %s/%s - already fetched" % (topic_id, message_id)) 87 | continue 88 | yield response.follow( 89 | "%s/%s/message/raw?msg=%s/%s/%s" % (self.root, self.prefix, self.name, 90 | topic_id, message_id), 91 | functools.partial(self.save_email, file_name=file_name)) 92 | yield {"topic": response.css("h2 ::text").extract_first(), 93 | "id": topic_id, 94 | "messages": messages} 95 | 96 | def save_email(self, response: scrapy.http.response.text.TextResponse, file_name: str): 97 | with open(file_name, "wb") as fout: 98 | fout.write(response.body) 99 | 100 | @staticmethod 101 | def last_part(url): 102 | return url.rsplit("/", 1)[1] 103 | 104 | def locate_email_file(self, topic: str, index: int, message: str, full: bool): 105 | file_name = self.template.format(topic=topic, index=index, message=message) 106 | if full: 107 | file_name = os.path.join(self.output, file_name) 108 | file_dir = os.path.dirname(file_name) 109 | if not os.path.isdir(file_dir): 110 | os.makedirs(file_dir, exist_ok=True) 111 | return file_name 112 | -------------------------------------------------------------------------------- /googlegroups/mocks/affiliation_client.go: -------------------------------------------------------------------------------- 1 | // Code generated by mockery v0.0.0-dev. DO NOT EDIT. 2 | 3 | package mocks 4 | 5 | import ( 6 | affiliation "github.com/LF-Engineering/dev-analytics-libraries/affiliation" 7 | 8 | mock "github.com/stretchr/testify/mock" 9 | ) 10 | 11 | // AffiliationClient is an autogenerated mock type for the AffiliationClient type 12 | type AffiliationClient struct { 13 | mock.Mock 14 | } 15 | 16 | // AddIdentity provides a mock function with given fields: identity 17 | func (_m *AffiliationClient) AddIdentity(identity *affiliation.Identity) bool { 18 | ret := _m.Called(identity) 19 | 20 | var r0 bool 21 | if rf, ok := ret.Get(0).(func(*affiliation.Identity) bool); ok { 22 | r0 = rf(identity) 23 | } else { 24 | r0 = ret.Get(0).(bool) 25 | } 26 | 27 | return r0 28 | } 29 | 30 | // GetIdentityByUser provides a mock function with given fields: key, value 31 | func (_m *AffiliationClient) GetIdentityByUser(key string, value string) (*affiliation.AffIdentity, error) { 32 | ret := _m.Called(key, value) 33 | 34 | var r0 *affiliation.AffIdentity 35 | if rf, ok := ret.Get(0).(func(string, string) *affiliation.AffIdentity); ok { 36 | r0 = rf(key, value) 37 | } else { 38 | if ret.Get(0) != nil { 39 | r0 = ret.Get(0).(*affiliation.AffIdentity) 40 | } 41 | } 42 | 43 | var r1 error 44 | if rf, ok := ret.Get(1).(func(string, string) error); ok { 45 | r1 = rf(key, value) 46 | } else { 47 | r1 = ret.Error(1) 48 | } 49 | 50 | return r0, r1 51 | } 52 | 53 | // GetOrganizations provides a mock function with given fields: uuid, projectSlug 54 | func (_m *AffiliationClient) GetOrganizations(uuid string, projectSlug string) *[]affiliation.Enrollment { 55 | ret := _m.Called(uuid, projectSlug) 56 | 57 | var r0 *[]affiliation.Enrollment 58 | if rf, ok := ret.Get(0).(func(string, string) *[]affiliation.Enrollment); ok { 59 | r0 = rf(uuid, projectSlug) 60 | } else { 61 | if ret.Get(0) != nil { 62 | r0 = ret.Get(0).(*[]affiliation.Enrollment) 63 | } 64 | } 65 | 66 | return r0 67 | } 68 | -------------------------------------------------------------------------------- /googlegroups/mocks/auth_client_provider.go: -------------------------------------------------------------------------------- 1 | // Code generated by mockery v0.0.0-dev. DO NOT EDIT. 2 | 3 | package mocks 4 | 5 | import mock "github.com/stretchr/testify/mock" 6 | 7 | // AuthClientProvider is an autogenerated mock type for the AuthClientProvider type 8 | type AuthClientProvider struct { 9 | mock.Mock 10 | } 11 | 12 | // GetToken provides a mock function with given fields: env 13 | func (_m *AuthClientProvider) GetToken(env string) (string, error) { 14 | ret := _m.Called(env) 15 | 16 | var r0 string 17 | if rf, ok := ret.Get(0).(func(string) string); ok { 18 | r0 = rf(env) 19 | } else { 20 | r0 = ret.Get(0).(string) 21 | } 22 | 23 | var r1 error 24 | if rf, ok := ret.Get(1).(func(string) error); ok { 25 | r1 = rf(env) 26 | } else { 27 | r1 = ret.Error(1) 28 | } 29 | 30 | return r0, r1 31 | } 32 | -------------------------------------------------------------------------------- /jenkins/const.go: -------------------------------------------------------------------------------- 1 | package jenkins 2 | 3 | import "time" 4 | 5 | var ( 6 | // Jenkins represents the name of data source 7 | Jenkins = "jenkins" 8 | // Depth is the attribute to be passed onto the /api/json 9 | Depth = 1 10 | // BuildCategory is the default category for jenkins build 11 | BuildCategory = "build" 12 | // DefaultDateTime is the default time used when no time is provided 13 | DefaultDateTime = time.Date(1970, 1, 1, 0, 0, 0, 0, time.UTC) 14 | // ListView represents the classname of the jenkins jobs that are views 15 | ListView = "hudson.model.ListView" 16 | // UndefinedCategory is used for jobs without categories 17 | UndefinedCategory = "UNDEFINED_CATEGORY" 18 | ) 19 | -------------------------------------------------------------------------------- /json.go: -------------------------------------------------------------------------------- 1 | package dads 2 | 3 | import ( 4 | jsoniter "github.com/json-iterator/go" 5 | ) 6 | 7 | // PrettyPrintJSON - pretty formats raw JSON bytes 8 | func PrettyPrintJSON(jsonBytes []byte) []byte { 9 | var jsonObj interface{} 10 | FatalOnError(jsoniter.Unmarshal(jsonBytes, &jsonObj)) 11 | pretty, err := jsoniter.MarshalIndent(jsonObj, "", " ") 12 | FatalOnError(err) 13 | return pretty 14 | } 15 | 16 | // JSONEscape - escape string for JSON to avoid injections 17 | func JSONEscape(str string) string { 18 | b, _ := jsoniter.Marshal(str) 19 | return string(b[1 : len(b)-1]) 20 | } 21 | -------------------------------------------------------------------------------- /json.json: -------------------------------------------------------------------------------- 1 | { 2 | "dynamic": true, 3 | "properties": { 4 | "file_data": { 5 | "type": "nested" 6 | }, 7 | "authors_signed": { 8 | "type": "nested" 9 | }, 10 | "authors_co_authored": { 11 | "type": "nested" 12 | }, 13 | "authors_tested": { 14 | "type": "nested" 15 | }, 16 | "authors_approved": { 17 | "type": "nested" 18 | }, 19 | "authors_reviewed": { 20 | "type": "nested" 21 | }, 22 | "authors_reported": { 23 | "type": "nested" 24 | }, 25 | "authors_informed": { 26 | "type": "nested" 27 | }, 28 | "authors_resolved": { 29 | "type": "nested" 30 | }, 31 | "authors_influenced": { 32 | "type": "nested" 33 | }, 34 | "author_name": { 35 | "type": "keyword" 36 | }, 37 | "metadata__updated_on": { 38 | "type": "date", 39 | "format": "strict_date_optional_time||epoch_millis" 40 | }, 41 | "message_analyzed": { 42 | "type": "text", 43 | "index": true 44 | } 45 | }, 46 | "dynamic_templates": [ 47 | { 48 | "notanalyzed": { 49 | "match": "*", 50 | "unmatch": "message_analyzed", 51 | "match_mapping_type": "string", 52 | "mapping": { 53 | "type": "keyword" 54 | } 55 | } 56 | }, 57 | { 58 | "formatdate": { 59 | "match": "*", 60 | "match_mapping_type": "date", 61 | "mapping": { 62 | "format": "strict_date_optional_time||epoch_millis", 63 | "type": "date" 64 | } 65 | } 66 | } 67 | ] 68 | } 69 | -------------------------------------------------------------------------------- /log.go: -------------------------------------------------------------------------------- 1 | package dads 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "time" 7 | ) 8 | 9 | // Printf is a wrapper around Printf(...) that supports logging and removes redacted data. 10 | func Printf(format string, args ...interface{}) { 11 | // Actual logging to stdout & DB 12 | now := time.Now() 13 | msg := FilterRedacted(fmt.Sprintf("%s: "+format, append([]interface{}{ToYMDHMSDate(now)}, args...)...)) 14 | _, err := fmt.Printf("%s", msg) 15 | if err != nil { 16 | log.Printf("Err: %s", err.Error()) 17 | } 18 | } 19 | 20 | // PrintfNoRedacted is a wrapper around Printf(...) that supports logging and don't removes redacted data 21 | func PrintfNoRedacted(format string, args ...interface{}) { 22 | // Actual logging to stdout & DB 23 | now := time.Now() 24 | msg := fmt.Sprintf("%s: "+format, append([]interface{}{ToYMDHMSDate(now)}, args...)...) 25 | _, err := fmt.Printf("%s", msg) 26 | if err != nil { 27 | log.Printf("Err: %s", err.Error()) 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /map.json: -------------------------------------------------------------------------------- 1 | {"dynamic":true,"properties":{"metadata__updated_on":{"type":"date","format":"strict_date_optional_time||epoch_millis"},"merge_author_geolocation":{"type":"geo_point"},"assignee_geolocation":{"type":"geo_point"},"state":{"type":"keyword"},"user_geolocation":{"type":"geo_point"},"title_analyzed":{"type":"text","index":true},"body_analyzed":{"type":"text","index":true}},"dynamic_templates":[{"notanalyzed":{"match":"*","unmatch":"body","match_mapping_type":"string","mapping":{"type":"keyword"}}},{"formatdate":{"match":"*","match_mapping_type":"date","mapping":{"format":"strict_date_optional_time||epoch_millis","type":"date"}}}]} 2 | -------------------------------------------------------------------------------- /notused/dsgroupsio.g_: -------------------------------------------------------------------------------- 1 | // Printf("Result %d\nCookies %s\n", len(res.(map[string]interface{})), cookies) 2 | // We do have cookies now (from either real request or from the L2 cache) 3 | // we *could* call getsubs now, but login already returns that data 4 | // so I will restructur this to make use of login result to find Group ID/Name 5 | // and store cookies for future/other requests that require them 6 | /* 7 | url = GroupsioAPIURL + GroupsioAPIGetsubs 8 | res, _, _, err = Request( 9 | ctx, 10 | url, 11 | method, 12 | nil, 13 | []byte{}, 14 | cookies, 15 | map[[2]int]struct{}{{200, 200}: {}}, // JSON statuses: 200 16 | nil, // Error statuses 17 | map[[2]int]struct{}{{200, 200}: {}}, // OK statuses: 200 18 | false, // retry 19 | nil, // cache duration 20 | false, // skip in dry-run mode 21 | ) 22 | if err != nil { 23 | Printf("Result %d\n", len(res.([]byte))) 24 | } else { 25 | Printf("Result %d\n", len(res.(map[string]interface{}))) 26 | } 27 | */ 28 | -------------------------------------------------------------------------------- /notused/mbox.g_: -------------------------------------------------------------------------------- 1 | package dads 2 | 3 | import ( 4 | "io" 5 | "os" 6 | "sync" 7 | 8 | "github.com/blabber/mbox" 9 | "github.com/spf13/afero" 10 | ) 11 | 12 | // ParseMBoxMsg - parse a raw MBox message into object to be inserte dinto raw ES 13 | func ParseMBoxMsg(ctx *Ctx, mfs *afero.Fs, mtx *sync.Mutex, msg []byte) (item map[string]interface{}, valid bool, err error) { 14 | var f afero.File 15 | //var f *os.File 16 | if mtx != nil { 17 | mtx.Lock() 18 | defer mtx.Unlock() 19 | } 20 | f, err = afero.TempFile(*mfs, "groupsio", "mbox") 21 | //f, err = ioutil.TempFile("", "mbox") 22 | if err != nil { 23 | Printf("tempfile error: %v\n", err) 24 | return 25 | } 26 | defer func() { _ = f.Close() }() 27 | _, err = f.Write(msg) 28 | if err != nil { 29 | Printf("write error: %v\n", err) 30 | return 31 | } 32 | _, err = f.Seek(0, io.SeekStart) 33 | if err != nil { 34 | Printf("seek error: %v\n", err) 35 | return 36 | } 37 | if ctx.Debug > 1 { 38 | Printf("message length %d, file: %s\n", len(msg), f.Name()) 39 | } 40 | reader := mbox.NewReader(f) 41 | for { 42 | var msgReader io.Reader 43 | msgReader, err = reader.NextMessage() 44 | if err == io.EOF { 45 | break 46 | } 47 | if err != nil { 48 | Printf("fetch next message error: %v\n", err) 49 | return 50 | } 51 | Printf("msgReader: %v\n", msgReader) 52 | } 53 | os.Exit(1) 54 | /* 55 | if len(mbox.Messages) == 0 { 56 | var st os.FileInfo 57 | st, err = f.Stat() 58 | if err != nil { 59 | printf("stat failed: %v\n", err) 60 | return 61 | } 62 | data := make([]byte, st.Size()) 63 | _, err = f.Read(data) 64 | if err != nil { 65 | printf("read failed: %v\n", err) 66 | return 67 | } 68 | Printf("nothing parsed from:\n%s\n", string(data)) 69 | os.Exit(1) 70 | } 71 | */ 72 | // FIXME: continue 73 | // valid = true 74 | return 75 | } 76 | -------------------------------------------------------------------------------- /notused/mbox_test.g_: -------------------------------------------------------------------------------- 1 | data, _ := ioutil.ReadFile("yocto+meta-arm_3753.mbox") 2 | _, _, _ = lib.ParseMBoxMsg(&ctx, "xxx", data) 3 | data, _ = ioutil.ReadFile("yocto+meta-arm_4915.mbox") 4 | _, _, _ = lib.ParseMBoxMsg(&ctx, "xxx", data) 5 | data, _ = ioutil.ReadFile("3212.mbox") 6 | _, _, _ = lib.ParseMBoxMsg(&ctx, "xxx", data) 7 | data, _ = ioutil.ReadFile("8201.mbox") 8 | _, _, _ = lib.ParseMBoxMsg(&ctx, "xxx", data) 9 | data, _ = ioutil.ReadFile("1426647.mbox") 10 | _, _, _ = lib.ParseMBoxMsg(&ctx, "xxx", data) 11 | data, _ = ioutil.ReadFile("62454.mbox") 12 | _, _, _ = lib.ParseMBoxMsg(&ctx, "xxx", data) 13 | data, _ = ioutil.ReadFile("yocto+meta-arm_2742.mbox") 14 | _, _, _ = lib.ParseMBoxMsg(&ctx, "xxx", data) 15 | data, _ = ioutil.ReadFile("risc-v+tech-virt-mem_77768.mbox") 16 | _, _, _ = lib.ParseMBoxMsg(&ctx, "xxx", data) 17 | data, _ = ioutil.ReadFile("spdx+Spdx-tech_12382.mbox") 18 | _, _, _ = lib.ParseMBoxMsg(&ctx, "xxx", data) 19 | data, _ = ioutil.ReadFile("spdx+Spdx-tech_11160.mbox") 20 | _, _, _ = lib.ParseMBoxMsg(&ctx, "xxx", data) 21 | data, _ = ioutil.ReadFile("tungsten+marketing_66343.mbox") 22 | _, _, _ = lib.ParseMBoxMsg(&ctx, "xxx", data) 23 | data, _ = ioutil.ReadFile("openswitch+dev_479180.mbox") 24 | _, _, _ = lib.ParseMBoxMsg(&ctx, "xxx", data) 25 | data, _ = ioutil.ReadFile("openswitch+dev_164359.mbox") 26 | _, _, _ = lib.ParseMBoxMsg(&ctx, "xxx", data) 27 | os.Exit(1) 28 | 29 | dbg := ctx.Debug 30 | ctx.Debug = 0 31 | fns, err := lib.ExecCommand(&ctx, []string{"find", ".", "-iname", "*.mbox"}) 32 | lib.FatalOnError(err) 33 | ctx.Debug = dbg 34 | for _, fn := range strings.Split(fns, "\n") { 35 | if fn == "" { 36 | continue 37 | } 38 | if strings.HasPrefix(fn, "xxx") { 39 | continue 40 | } 41 | data, err := ioutil.ReadFile(fn) 42 | lib.FatalOnError(err) 43 | fmt.Printf("%s\n", fn) 44 | _, _, _ = lib.ParseMBoxMsg(&ctx, "xxx", data) 45 | } 46 | os.Exit(1) 47 | 48 | sdts := []string{ 49 | "Mon, 30 Sep 2019 15:15:39 +0000", 50 | "TUE, 1 oCt 2019 15:15:39 -1200", 51 | "23 Dec 2013 14:51:30 gmt", 52 | "> Tue, 02 Jul 2013 02:28:30 GMT", 53 | "2017-04-03 09:52:03 -0700", 54 | "2017-11-19 09:52:03 -1000", 55 | ">>\t Wed, 29 Jan 2003 16:55 +0000 (Pacific Standard Time)", 56 | } 57 | for _, sdt := range sdts { 58 | dt, ok := lib.ParseMBoxDate(sdt) 59 | if !ok { 60 | lib.Printf("unable to parse date from '%s'\n", sdt) 61 | } else { 62 | lib.Printf("'%s' parsed to %v\n", sdt, dt) 63 | } 64 | } 65 | os.Exit(1) 66 | 67 | // FIXME: start 68 | dbg := ctx.Debug 69 | ctx.Debug = 0 70 | fns, err := lib.ExecCommand(&ctx, []string{"find", "/root/mbox/", "-iname", "*.mbox"}) 71 | lib.FatalOnError(err) 72 | ctx.Debug = dbg 73 | for _, fn := range strings.Split(fns, "\n") { 74 | if fn == "" { 75 | continue 76 | } 77 | if strings.HasPrefix(fn, "xxx") { 78 | continue 79 | } 80 | data, err := ioutil.ReadFile(fn) 81 | lib.FatalOnError(err) 82 | _, _, _ = lib.ParseMBoxMsg(&ctx, fn, data) 83 | } 84 | os.Exit(1) 85 | // FIXME: end 86 | /* 87 | rich["Received"], ok = getIValue("received") 88 | rich["Received"], ok = getIValue("RECEIVED") 89 | rich["Received"], ok = getIValue("xxx") 90 | rich["Received"], ok = getIValue("date") 91 | rich["Received"], ok = getIValue("MBox-Valid") 92 | rich["Received"], ok = getIValue("mbox-n-lines") 93 | */ 94 | -------------------------------------------------------------------------------- /pipermail/const.go: -------------------------------------------------------------------------------- 1 | package pipermail 2 | 3 | import ( 4 | "os" 5 | "strings" 6 | "time" 7 | ) 8 | 9 | const ( 10 | // ModMboxThreadStr ... 11 | ModMboxThreadStr = "/thread" 12 | // Pipermail datasource 13 | Pipermail = "pipermail" 14 | // PiperBackendVersion ... 15 | PiperBackendVersion = "0.0.1" 16 | // MessageDateField ... 17 | MessageDateField = "date" 18 | // Message ... 19 | Message = "message" 20 | // MessageIDField ... 21 | MessageIDField = "Message-ID" 22 | // Unknown ... 23 | Unknown = "Unknown" 24 | // MaxConcurrentRequests ... 25 | MaxConcurrentRequests = 100000 26 | ) 27 | 28 | var ( 29 | // CompressedTypes ... 30 | CompressedTypes = []string{".gz", ".bz2", ".zip", ".tar", ".tar.gz", ".tar.bz2", ".tgz", ".tbz"} 31 | // AcceptedTypes ... 32 | AcceptedTypes = []string{".mbox", ".txt"} 33 | // CombinedTypes ... 34 | CombinedTypes []string 35 | // MONTHS ... 36 | MONTHS = map[string]int{"January": 1, "February": 2, "March": 3, "April": 4, "May": 5, "June": 6, "July": 7, "August": 8, "September": 9, "October": 10, "November": 11, "December": 12} 37 | // DefaultDateTime ... 38 | DefaultDateTime = time.Date(1970, 1, 1, 0, 0, 0, 0, time.UTC) 39 | // MessageSeparator ... 40 | MessageSeparator = []byte("\nFrom") 41 | // PiperRawMapping ... 42 | PiperRawMapping = []byte(`{"mappings":{"dynamic":true,"properties":{"metadata__updated_on":{"type":"date"},"data":{"properties":{"body":{"dynamic":false,"properties":{}}}}}}}`) 43 | // PiperRichMapping ... 44 | PiperRichMapping = []byte(`{"mappings":{"dynamic_templates":[{"notanalyzed":{"match":"*","match_mapping_type":"string","mapping":{"type":"keyword"}}},{"int_to_float":{"match":"*","match_mapping_type":"long","mapping":{"type":"float"}}},{"formatdate":{"match":"*","match_mapping_type":"date","mapping":{"format":"strict_date_optional_time||epoch_millis","type":"date"}}}]}}`) 45 | // EmailObfuscationPatterns ... 46 | EmailObfuscationPatterns = []string{" at ", "_at_", " en "} 47 | // ArchiveDownloadsPath ... 48 | ArchiveDownloadsPath = strings.TrimSpace(os.Getenv("HOME") + "/.perceval/mailinglists/") 49 | // DumpsPath ... 50 | DumpsPath = strings.TrimSpace(os.Getenv("HOME") + "/.perceval/dumps/") 51 | ) 52 | -------------------------------------------------------------------------------- /pipermail/downloader.go: -------------------------------------------------------------------------------- 1 | package pipermail 2 | 3 | import ( 4 | "fmt" 5 | "io" 6 | "net/http" 7 | "os" 8 | "path/filepath" 9 | "strings" 10 | "time" 11 | ) 12 | 13 | // TrimFirstDot ... 14 | func TrimFirstDot(s string) string { 15 | st := strings.Split(s, ".") 16 | return st[0] 17 | } 18 | 19 | // TrimFirstDash ... 20 | func TrimFirstDash(s string) (year, month string) { 21 | m := strings.Split(s, "-") 22 | return m[0], m[1] 23 | } 24 | 25 | // ParseDateFromFilePath ... 26 | func ParseDateFromFilePath(path string) time.Time { 27 | layoutISO := "2006-1-2" 28 | baseName := filepath.Base(path) 29 | year, month := TrimFirstDash(TrimFirstDot(baseName)) 30 | monthVal := MONTHS[month] 31 | date := fmt.Sprintf("%s-%+v-1", year, monthVal) 32 | t, err := time.Parse(layoutISO, date) 33 | if err != nil { 34 | fmt.Println(baseName) 35 | fmt.Println(err) 36 | } 37 | return t 38 | } 39 | 40 | // DateTimeToUTC ... 41 | func DateTimeToUTC(date string) time.Time { 42 | layout := "2006-01-02T15:04:05.000Z" 43 | t, err := time.Parse(layout, date) 44 | if err != nil { 45 | fmt.Println(err) 46 | } 47 | return t 48 | } 49 | 50 | // DownloadFile will download a url to a local file. It's efficient because it will 51 | // write as it downloads and not load the whole file into memory. 52 | func DownloadFile(url, filepath string) error { 53 | // Get the data 54 | resp, err := http.Get(url) 55 | if err != nil { 56 | return err 57 | } 58 | defer func() { _ = resp.Body.Close() }() 59 | 60 | // Create the file 61 | out, err := os.Create(filepath) 62 | if err != nil { 63 | return err 64 | } 65 | defer func() { _ = out.Close() }() 66 | 67 | // Write the body to file 68 | _, err = io.Copy(out, resp.Body) 69 | return err 70 | } 71 | -------------------------------------------------------------------------------- /pipermail/dto.go: -------------------------------------------------------------------------------- 1 | package pipermail 2 | 3 | import "time" 4 | 5 | // MessageSearchFields ... 6 | type MessageSearchFields struct { 7 | Name string `json:"name"` 8 | ItemID string `json:"item_id"` 9 | } 10 | 11 | // RawMessage represents piper mail raw message 12 | type RawMessage struct { 13 | BackendVersion string `json:"backend_version"` 14 | Data *RawMessageData `json:"data"` 15 | Tag string `json:"tag"` 16 | UUID string `json:"uuid"` 17 | SearchFields *MessageSearchFields `json:"search_fields"` 18 | Origin string `json:"origin"` 19 | UpdatedOn float64 `json:"updated_on"` 20 | MetadataUpdatedOn time.Time `json:"metadata__updated_on"` 21 | BackendName string `json:"backend_name"` 22 | MetadataTimestamp time.Time `json:"metadata__timestamp"` 23 | Timestamp float64 `json:"timestamp"` 24 | Category string `json:"category"` 25 | ProjectSlug string `json:"project_slug"` 26 | GroupName string `json:"group_name"` 27 | Project string `json:"project"` 28 | ChangedAt time.Time `json:"changed_at"` 29 | } 30 | 31 | // RawMessageData ... 32 | type RawMessageData struct { 33 | ContentType string `json:"Content-Type"` 34 | Date string `json:"Date"` 35 | From string `json:"From"` 36 | InReplyTo string `json:"In-Reply-To"` 37 | MboxByteLength int64 `json:"MBox-Bytes-Length"` 38 | MboxNBodies int `json:"MBox-N-Bodies"` 39 | MboxNLines int64 `json:"MBox-N-Lines"` 40 | MboxProjectName string `json:"MBox-Project-Name"` 41 | MboxValid bool `json:"MBox-Valid"` 42 | MboxWarn bool `json:"MBox-Warn"` 43 | MessageID string `json:"Message-ID"` 44 | References string `json:"References"` 45 | Subject string `json:"Subject"` 46 | Data struct { 47 | Text struct { 48 | Plain []struct { 49 | Data string `json:"data"` 50 | } `json:"plain"` 51 | } `json:"text"` 52 | } `json:"data"` 53 | DateInTZ string `json:"date_in_tz"` 54 | DateTZ float64 `json:"date_tz"` 55 | } 56 | 57 | // EnrichedMessage represents piper mail enriched message 58 | type EnrichedMessage struct { 59 | ID string `json:"id"` 60 | TZ float64 `json:"tz"` 61 | MessageID string `json:"Message-ID"` 62 | UUID string `json:"uuid"` 63 | AuthorName string `json:"author_name"` 64 | Root bool `json:"root"` 65 | AuthorOrgName string `json:"author_org_name"` 66 | AuthorBot bool `json:"author_bot"` 67 | BodyExtract string `json:"body_extract"` 68 | AuthorID string `json:"author_id"` 69 | SubjectAnalyzed string `json:"subject_analyzed"` 70 | Project string `json:"project"` 71 | MboxAuthorDomain string `json:"mbox_author_domain"` 72 | Date time.Time `json:"date"` 73 | IsPipermailMessage int `json:"is_pipermail_message"` 74 | List string `json:"list"` 75 | AuthorUUID string `json:"author_uuid"` 76 | AuthorMultiOrgNames []string `json:"author_multi_org_names"` 77 | Origin string `json:"origin"` 78 | Size int64 `json:"size"` 79 | Tag string `json:"tag"` 80 | Subject string `json:"subject"` 81 | FromID string `json:"from_id"` 82 | EmailDate time.Time `json:"email_date"` 83 | MetadataTimestamp time.Time `json:"metadata__timestamp"` 84 | MetadataBackendName string `json:"metadata__backend_name"` 85 | MetadataUpdatedOn time.Time `json:"metadata__updated_on"` 86 | MetadataEnrichedOn time.Time `json:"metadata__enriched_on"` 87 | ProjectSlug string `json:"project_slug"` 88 | ChangedAt time.Time `json:"changed_at"` 89 | GroupName string `json:"group_name"` 90 | Slug string `json:"slug"` 91 | References string `json:"references"` 92 | } 93 | 94 | // RawHits result 95 | type RawHits struct { 96 | Hits NHits `json:"hits"` 97 | } 98 | 99 | // NHits result 100 | type NHits struct { 101 | Hits []NestedRawHits `json:"hits"` 102 | } 103 | 104 | // NestedRawHits is the actual hit data 105 | type NestedRawHits struct { 106 | ID string `json:"_id"` 107 | Source RawMessage `json:"_source"` 108 | } 109 | -------------------------------------------------------------------------------- /pipermail/fetcher_test.go: -------------------------------------------------------------------------------- 1 | package pipermail 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | "time" 7 | 8 | "github.com/LF-Engineering/da-ds/pipermail/mocks" 9 | "github.com/LF-Engineering/dev-analytics-libraries/http" 10 | jsoniter "github.com/json-iterator/go" 11 | "github.com/stretchr/testify/assert" 12 | "github.com/stretchr/testify/mock" 13 | ) 14 | 15 | func TestFetchAll(t *testing.T) { 16 | httpClientProviderMock := &mocks.HTTPClientProvider{} 17 | from, err := time.Parse("2006-01-02 15:04:05", "2020-01-01 03:00:00") 18 | if err != nil { 19 | fmt.Println(err) 20 | } 21 | 22 | url := "https://www.openembedded.org/pipermail/openembedded-architecture/" 23 | 24 | httpClient := http.NewClientProvider(time.Second * 600) 25 | httpClientProviderMock.On("Request", url, "GET", 26 | mock.Anything, mock.Anything, mock.Anything).Return( 27 | 200, rawMessageBytes, nil) 28 | tt := []struct { 29 | name string 30 | fields fields 31 | expected []byte 32 | err bool 33 | }{ 34 | { 35 | name: "ok message", 36 | fields: fields{ 37 | DSName: "pipermail", 38 | IncludeArchived: false, 39 | MultiOrigin: false, 40 | HTTPClientProvider: nil, 41 | ElasticSearchProvider: nil, 42 | BackendVersion: "", 43 | }, 44 | expected: rawMessageBytes, 45 | err: false, 46 | }, 47 | } 48 | 49 | for _, tc := range tt { 50 | t.Run(tc.name, func(t *testing.T) { 51 | expecRaw, err := toMessageRaw(tc.expected) 52 | if err != nil { 53 | t.Error(err) 54 | } 55 | 56 | params := &Params{ 57 | FromDate: from, 58 | BackendVersion: "0.0.1", 59 | Project: "yocto", 60 | Debug: 2, 61 | ProjectSlug: "yocto", 62 | GroupName: "openembedded-architecture", 63 | } 64 | srv := NewFetcher(params, httpClient, nil) 65 | var rawMessage interface{} 66 | err = jsoniter.Unmarshal(rawMessageBytes, &rawMessage) 67 | if err != nil { 68 | t.Error(err) 69 | } 70 | var message *RawMessage 71 | message = srv.AddMetadata(rawMessage, url, params.ProjectSlug, params.GroupName) 72 | if err != nil { 73 | t.Error(err) 74 | } 75 | 76 | assert.NoError(t, err) 77 | assert.Equal(t, expecRaw.BackendVersion, message.BackendVersion) 78 | assert.Equal(t, expecRaw.Origin, message.Origin) 79 | assert.Equal(t, expecRaw.Data.MessageID, message.Data.MessageID) 80 | }) 81 | } 82 | 83 | } 84 | 85 | func toMessageRaw(b []byte) (output RawMessage, err error) { 86 | err = jsoniter.Unmarshal(b, &output) 87 | return 88 | } 89 | 90 | var rawMessageBytes = []byte(` 91 | { 92 | "backend_version":"0.0.1", 93 | "data":{ 94 | "Content-Type":"text/plain", 95 | "Date":"2016-02-26T19:15:43Z", 96 | "From":"jon.doe at gmail.com (Jon Doe)", 97 | "In-Reply-To":"", 98 | "MBox-Bytes-Length":742, 99 | "MBox-N-Bodies":1, 100 | "MBox-N-Lines":19, 101 | "MBox-Project-Name":"openembedded-architecture", 102 | "MBox-Valid":true, 103 | "MBox-Warn":false, 104 | "Message-ID":"", 105 | "References":"<2956615.y8hyrZheM7@peggleto-mobl.m.m.try.com>", 106 | "Subject":"[Openembedded-architecture] Removing Hob for 2.1", 107 | "data":{ 108 | "text":{ 109 | "plain":[ 110 | { 111 | "data":"On Fri, 26 Feb 2016, 03:54 Jon Doe wrote: Go ahead Yes, go ahead." 112 | } 113 | ] 114 | } 115 | }, 116 | "date_in_tz":"2016-02-26T19:15:43Z", 117 | "date_tz":0 118 | }, 119 | "tag":"https://www.openembedded.org/pipermail/openembedded-architecture/", 120 | "uuid":"acb13ade6f1540ceb6b72b085c94c32c7a6a540b", 121 | "search_fields":{ 122 | "name":"openembedded-architecture", 123 | "item_id":"" 124 | }, 125 | "origin":"https://www.openembedded.org/pipermail/openembedded-architecture/", 126 | "updated_on":1456514143, 127 | "metadata__updated_on":"2016-02-26T19:15:43.000000+00:00", 128 | "backend_name":"pipermail", 129 | "metadata__timestamp":"2020-12-24T20:15:09.322548+00:00", 130 | "timestamp":1.608840909322548E9, 131 | "category":"message", 132 | "project_slug":"yocto", 133 | "group_name":"openembedded-architecture", 134 | "project":"yocto", 135 | "changed_at":"0001-01-01T00:00:00Z", 136 | "Message-ID":"", 137 | "date":"2016-02-26T19:15:43Z" 138 | } 139 | `) 140 | 141 | type fields struct { 142 | DSName string 143 | IncludeArchived bool 144 | MultiOrigin bool 145 | HTTPClientProvider HTTPClientProvider 146 | ElasticSearchProvider ESClientProvider 147 | BackendVersion string 148 | Debug int 149 | DateFrom time.Time 150 | } 151 | -------------------------------------------------------------------------------- /pipermail/mapping.go: -------------------------------------------------------------------------------- 1 | package pipermail 2 | 3 | var ( 4 | // PipermailRawMapping - Pipeermail raw index mapping 5 | PipermailRawMapping = []byte(`{"mappings": {"dynamic":true,"properties":{"metadata__updated_on":{"type":"date"},"data":{"properties":{"description":{"type":"text","index":true},"full_description":{"type":"text","index":true}}}}}}`) 6 | ) 7 | -------------------------------------------------------------------------------- /pipermail/mocks/affiliation_client.go: -------------------------------------------------------------------------------- 1 | // Code generated by mockery v0.0.0-dev. DO NOT EDIT. 2 | 3 | package mocks 4 | 5 | import ( 6 | affiliation "github.com/LF-Engineering/dev-analytics-libraries/affiliation" 7 | mock "github.com/stretchr/testify/mock" 8 | ) 9 | 10 | // AffiliationClient is an autogenerated mock type for the AffiliationClient type 11 | type AffiliationClient struct { 12 | mock.Mock 13 | } 14 | 15 | // AddIdentity provides a mock function with given fields: identity 16 | func (_m *AffiliationClient) AddIdentity(identity *affiliation.Identity) bool { 17 | ret := _m.Called(identity) 18 | 19 | var r0 bool 20 | if rf, ok := ret.Get(0).(func(*affiliation.Identity) bool); ok { 21 | r0 = rf(identity) 22 | } else { 23 | r0 = ret.Get(0).(bool) 24 | } 25 | 26 | return r0 27 | } 28 | 29 | // GetIdentityByUser provides a mock function with given fields: key, value 30 | func (_m *AffiliationClient) GetIdentityByUser(key string, value string) (*affiliation.AffIdentity, error) { 31 | ret := _m.Called(key, value) 32 | 33 | var r0 *affiliation.AffIdentity 34 | if rf, ok := ret.Get(0).(func(string, string) *affiliation.AffIdentity); ok { 35 | r0 = rf(key, value) 36 | } else { 37 | if ret.Get(0) != nil { 38 | r0 = ret.Get(0).(*affiliation.AffIdentity) 39 | } 40 | } 41 | 42 | var r1 error 43 | if rf, ok := ret.Get(1).(func(string, string) error); ok { 44 | r1 = rf(key, value) 45 | } else { 46 | r1 = ret.Error(1) 47 | } 48 | 49 | return r0, r1 50 | } 51 | 52 | // GetOrganizations provides a mock function with given fields: uuid, projectSlug 53 | func (_m *AffiliationClient) GetOrganizations(uuid string, projectSlug string) *[]affiliation.Enrollment { 54 | ret := _m.Called(uuid, projectSlug) 55 | 56 | var r0 *[]affiliation.Enrollment 57 | if rf, ok := ret.Get(0).(func(string, string) *[]affiliation.Enrollment); ok { 58 | r0 = rf(uuid, projectSlug) 59 | } else { 60 | if ret.Get(0) != nil { 61 | r0 = ret.Get(0).(*[]affiliation.Enrollment) 62 | } 63 | } 64 | 65 | return r0 66 | } 67 | -------------------------------------------------------------------------------- /pipermail/mocks/es_client_provider.go: -------------------------------------------------------------------------------- 1 | // Code generated by mockery v0.0.0-dev. DO NOT EDIT. 2 | 3 | package mocks 4 | 5 | import ( 6 | elastic "github.com/LF-Engineering/dev-analytics-libraries/elastic" 7 | mock "github.com/stretchr/testify/mock" 8 | 9 | time "time" 10 | ) 11 | 12 | // ESClientProvider is an autogenerated mock type for the ESClientProvider type 13 | type ESClientProvider struct { 14 | mock.Mock 15 | } 16 | 17 | // Add provides a mock function with given fields: index, documentID, body 18 | func (_m *ESClientProvider) Add(index string, documentID string, body []byte) ([]byte, error) { 19 | ret := _m.Called(index, documentID, body) 20 | 21 | var r0 []byte 22 | if rf, ok := ret.Get(0).(func(string, string, []byte) []byte); ok { 23 | r0 = rf(index, documentID, body) 24 | } else { 25 | if ret.Get(0) != nil { 26 | r0 = ret.Get(0).([]byte) 27 | } 28 | } 29 | 30 | var r1 error 31 | if rf, ok := ret.Get(1).(func(string, string, []byte) error); ok { 32 | r1 = rf(index, documentID, body) 33 | } else { 34 | r1 = ret.Error(1) 35 | } 36 | 37 | return r0, r1 38 | } 39 | 40 | // Bulk provides a mock function with given fields: body 41 | func (_m *ESClientProvider) Bulk(body []byte) ([]byte, error) { 42 | ret := _m.Called(body) 43 | 44 | var r0 []byte 45 | if rf, ok := ret.Get(0).(func([]byte) []byte); ok { 46 | r0 = rf(body) 47 | } else { 48 | if ret.Get(0) != nil { 49 | r0 = ret.Get(0).([]byte) 50 | } 51 | } 52 | 53 | var r1 error 54 | if rf, ok := ret.Get(1).(func([]byte) error); ok { 55 | r1 = rf(body) 56 | } else { 57 | r1 = ret.Error(1) 58 | } 59 | 60 | return r0, r1 61 | } 62 | 63 | // BulkInsert provides a mock function with given fields: data 64 | func (_m *ESClientProvider) BulkInsert(data []elastic.BulkData) ([]byte, error) { 65 | ret := _m.Called(data) 66 | 67 | var r0 []byte 68 | if rf, ok := ret.Get(0).(func([]elastic.BulkData) []byte); ok { 69 | r0 = rf(data) 70 | } else { 71 | if ret.Get(0) != nil { 72 | r0 = ret.Get(0).([]byte) 73 | } 74 | } 75 | 76 | var r1 error 77 | if rf, ok := ret.Get(1).(func([]elastic.BulkData) error); ok { 78 | r1 = rf(data) 79 | } else { 80 | r1 = ret.Error(1) 81 | } 82 | 83 | return r0, r1 84 | } 85 | 86 | // CreateIndex provides a mock function with given fields: index, body 87 | func (_m *ESClientProvider) CreateIndex(index string, body []byte) ([]byte, error) { 88 | ret := _m.Called(index, body) 89 | 90 | var r0 []byte 91 | if rf, ok := ret.Get(0).(func(string, []byte) []byte); ok { 92 | r0 = rf(index, body) 93 | } else { 94 | if ret.Get(0) != nil { 95 | r0 = ret.Get(0).([]byte) 96 | } 97 | } 98 | 99 | var r1 error 100 | if rf, ok := ret.Get(1).(func(string, []byte) error); ok { 101 | r1 = rf(index, body) 102 | } else { 103 | r1 = ret.Error(1) 104 | } 105 | 106 | return r0, r1 107 | } 108 | 109 | // Get provides a mock function with given fields: index, query, result 110 | func (_m *ESClientProvider) Get(index string, query map[string]interface{}, result interface{}) error { 111 | ret := _m.Called(index, query, result) 112 | 113 | var r0 error 114 | if rf, ok := ret.Get(0).(func(string, map[string]interface{}, interface{}) error); ok { 115 | r0 = rf(index, query, result) 116 | } else { 117 | r0 = ret.Error(0) 118 | } 119 | 120 | return r0 121 | } 122 | 123 | // GetStat provides a mock function with given fields: index, field, aggType, mustConditions, mustNotConditions 124 | func (_m *ESClientProvider) GetStat(index string, field string, aggType string, mustConditions []map[string]interface{}, mustNotConditions []map[string]interface{}) (time.Time, error) { 125 | ret := _m.Called(index, field, aggType, mustConditions, mustNotConditions) 126 | 127 | var r0 time.Time 128 | if rf, ok := ret.Get(0).(func(string, string, string, []map[string]interface{}, []map[string]interface{}) time.Time); ok { 129 | r0 = rf(index, field, aggType, mustConditions, mustNotConditions) 130 | } else { 131 | r0 = ret.Get(0).(time.Time) 132 | } 133 | 134 | var r1 error 135 | if rf, ok := ret.Get(1).(func(string, string, string, []map[string]interface{}, []map[string]interface{}) error); ok { 136 | r1 = rf(index, field, aggType, mustConditions, mustNotConditions) 137 | } else { 138 | r1 = ret.Error(1) 139 | } 140 | 141 | return r0, r1 142 | } 143 | -------------------------------------------------------------------------------- /pipermail/mocks/http_client_provider.go: -------------------------------------------------------------------------------- 1 | // Code generated by mockery v0.0.0-dev. DO NOT EDIT. 2 | 3 | package mocks 4 | 5 | import mock "github.com/stretchr/testify/mock" 6 | 7 | // HTTPClientProvider is an autogenerated mock type for the HTTPClientProvider type 8 | type HTTPClientProvider struct { 9 | mock.Mock 10 | } 11 | 12 | // Request provides a mock function with given fields: url, method, header, body, params 13 | func (_m *HTTPClientProvider) Request(url string, method string, header map[string]string, body []byte, params map[string]string) (int, []byte, error) { 14 | ret := _m.Called(url, method, header, body, params) 15 | 16 | var r0 int 17 | if rf, ok := ret.Get(0).(func(string, string, map[string]string, []byte, map[string]string) int); ok { 18 | r0 = rf(url, method, header, body, params) 19 | } else { 20 | r0 = ret.Get(0).(int) 21 | } 22 | 23 | var r1 []byte 24 | if rf, ok := ret.Get(1).(func(string, string, map[string]string, []byte, map[string]string) []byte); ok { 25 | r1 = rf(url, method, header, body, params) 26 | } else { 27 | if ret.Get(1) != nil { 28 | r1 = ret.Get(1).([]byte) 29 | } 30 | } 31 | 32 | var r2 error 33 | if rf, ok := ret.Get(2).(func(string, string, map[string]string, []byte, map[string]string) error); ok { 34 | r2 = rf(url, method, header, body, params) 35 | } else { 36 | r2 = ret.Error(2) 37 | } 38 | 39 | return r0, r1, r2 40 | } 41 | -------------------------------------------------------------------------------- /pipermail/scrapper.go: -------------------------------------------------------------------------------- 1 | package pipermail 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "net/http" 7 | "path/filepath" 8 | "strings" 9 | "time" 10 | 11 | "github.com/PuerkitoBio/goquery" 12 | ) 13 | 14 | // ParseArchiveLinks scraps the contents of a given url to extract compressed files 15 | // download links 16 | func (f *Fetcher) ParseArchiveLinks(archivesURL string, fromDate *time.Time) ([]string, error) { 17 | // get all accepted & compressed types into one list 18 | CombinedTypes = append(CombinedTypes, CompressedTypes...) 19 | CombinedTypes = append(CombinedTypes, AcceptedTypes...) 20 | 21 | headers := map[string]string{} 22 | headers["pragma"] = "no-cache" 23 | headers["cache-control"] = "no-cache" 24 | headers["dnt"] = "1" 25 | headers["upgrade-insecure-requests"] = "1" 26 | headers["referer"] = archivesURL 27 | 28 | var links []string 29 | 30 | statusCode, resBody, err := f.HTTPClientProvider.Request(archivesURL, "GET", headers, nil, nil) 31 | if err != nil || statusCode != http.StatusOK { 32 | return nil, err 33 | } 34 | if statusCode == http.StatusOK { 35 | doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resBody)) 36 | 37 | if err != nil { 38 | fmt.Println(err) 39 | } 40 | 41 | doc.Find("a[href]").Each(func(i int, s *goquery.Selection) { 42 | link, _ := s.Attr("href") 43 | 44 | link = archivesURL + link 45 | 46 | // Make sure you we only fetch correct URL with corresponding title 47 | if strings.Contains(link, ".") { 48 | text := s.Text() 49 | // filter out unnecessary links 50 | if text != "" && text != "more" { 51 | links = append(links, link) 52 | } 53 | } 54 | }) 55 | } 56 | 57 | var sortedLinks []string 58 | for _, link := range links { 59 | // Links from Apache's 'mod_mbox' plugin contain 60 | // trailing "/thread" substrings. Remove them to get 61 | // the links where mbox files are stored. 62 | if strings.HasSuffix(link, ModMboxThreadStr) { 63 | link = strings.TrimSuffix(link, ModMboxThreadStr) 64 | } 65 | 66 | // inspect the first extension for any accepted compressed types 67 | // ie ".gz", ".bz2", ".zip", ".tar", ".tar.gz", ".tar.bz2", ".tgz", ".tbz" 68 | _, ext1 := f.Find(CombinedTypes, filepath.Ext(link)) 69 | 70 | // get the second extension. piper mail extensions are in the format https://mails.dpdk.org/archives/users/2016-March.txt.gz 71 | // thus have two extensions. 72 | secondExtension := strings.TrimSuffix(link, ext1) 73 | 74 | // inspect the second extension for any accepted types 75 | // ie ".mbox", ".txt" 76 | _, ext2 := f.Find(CombinedTypes, filepath.Ext(secondExtension)) 77 | 78 | if ext1 != "" || ext2 != "" { 79 | mboxDT := ParseDateFromFilePath(link) 80 | if fromDate.Year() == mboxDT.Year() && fromDate.Month() == mboxDT.Month() || fromDate.After(mboxDT) { 81 | sortedLinks = append(sortedLinks, link) 82 | } 83 | 84 | // all time 85 | if fromDate.Year() == 1970 { 86 | sortedLinks = append(sortedLinks, link) 87 | } 88 | } 89 | } 90 | return sortedLinks, nil 91 | } 92 | 93 | // Find takes a slice and looks for an element in it. If found it will 94 | // return it's true, otherwise it will return a bool of false. 95 | func (f *Fetcher) Find(slice []string, val string) (bool, string) { 96 | for _, item := range slice { 97 | if item == val { 98 | return true, item 99 | } 100 | } 101 | return false, "" 102 | } 103 | 104 | // TrimDots ... 105 | func TrimDots(s string) string { 106 | var st []string 107 | if strings.Contains(s, ".") { 108 | st = strings.Split(s, ".") 109 | TrimDots(st[0]) 110 | } 111 | return s 112 | } 113 | -------------------------------------------------------------------------------- /redacted.go: -------------------------------------------------------------------------------- 1 | package dads 2 | 3 | import ( 4 | "regexp" 5 | "strings" 6 | "sync" 7 | ) 8 | 9 | var ( 10 | // GRedactedStrings - need to be global, to redact them from error logs 11 | GRedactedStrings map[string]struct{} 12 | // GRedactedMtx - guard access to this map while in MT 13 | GRedactedMtx *sync.RWMutex 14 | redactedOnce sync.Once 15 | // AnonymizeURLPattern - used to remove sensitive data from the url - 3rd can be a GitHub password 16 | AnonymizeURLPattern = regexp.MustCompile(`(^.*)(://)(.*@)(.*$)`) 17 | ) 18 | 19 | // AddRedacted - adds redacted string 20 | func AddRedacted(newRedacted string, useMutex bool) { 21 | // Initialize map & mutex once 22 | redactedOnce.Do(func() { 23 | GRedactedStrings = make(map[string]struct{}) 24 | GRedactedMtx = &sync.RWMutex{} 25 | }) 26 | if useMutex { 27 | GRedactedMtx.Lock() 28 | defer func() { 29 | GRedactedMtx.Unlock() 30 | }() 31 | } 32 | if len(newRedacted) > 3 { 33 | GRedactedStrings[newRedacted] = struct{}{} 34 | } 35 | } 36 | 37 | // FilterRedacted - filter out all known redacted starings 38 | func FilterRedacted(str string) string { 39 | if GRedactedStrings == nil { 40 | return str 41 | } 42 | GRedactedMtx.RLock() 43 | defer func() { 44 | GRedactedMtx.RUnlock() 45 | }() 46 | for redacted := range GRedactedStrings { 47 | str = strings.Replace(str, redacted, Redacted, -1) 48 | } 49 | return str 50 | } 51 | 52 | // GetRedacted - get redacted 53 | func GetRedacted() (str string) { 54 | if GRedactedStrings == nil { 55 | return "[]" 56 | } 57 | GRedactedMtx.RLock() 58 | defer func() { 59 | GRedactedMtx.RUnlock() 60 | }() 61 | str = "[" 62 | for redacted := range GRedactedStrings { 63 | str += redacted + " " 64 | } 65 | str += "]" 66 | return 67 | } 68 | 69 | // AnonymizeURL - remove sensitive data from the URL 70 | func AnonymizeURL(url string) string { 71 | return AnonymizeURLPattern.ReplaceAllString(url, `$1$2$4`) 72 | } 73 | -------------------------------------------------------------------------------- /scripts/bugzilla.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | DA_BUGZILLA_ES_URL=http://elastic:changeme@127.0.0.1:9200 \ 3 | DA_BUGZILLA_USERNAME="" \ 4 | DA_BUGZILLA_PASSWORD="" \ 5 | DA_DS=bugzilla \ 6 | RAW_INDEX=sds-test-yocto \ 7 | ./dads --bugzilla-origin=https://bugzilla.yoctoproject.org \ 8 | --bugzilla-project=yocto \ 9 | --bugzilla-do-fetch=true --bugzilla-do-enrich=true --bugzilla-fetch-size=25 \ 10 | --bugzilla-enrich-size=25 11 | 12 | 13 | -------------------------------------------------------------------------------- /scripts/bugzillarest.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | DA_BUGZILLAREST_ES_URL=http://elastic:changeme@127.0.0.1:9200 \ 3 | DA_BUGZILLAREST_USERNAME="" \ 4 | DA_BUGZILLAREST_PASSWORD="" \ 5 | DA_DS=bugzillarest \ 6 | DA_BUGZILLAREST_GAP_URL=localhost:80000 \ 7 | DA_BUGZILLAREST_AFFILIATION_API_URL=$1 \ 8 | DA_BUGZILLAREST_ES_CACHE_URL=$2 \ 9 | DA_BUGZILLAREST_ES_CACHE_USERNAME=$3 \ 10 | DA_BUGZILLAREST_ES_CACHE_PASSWORD=$4 \ 11 | DA_BUGZILLAREST_AUTH0_GRANT_TYPE=$5 \ 12 | DA_BUGZILLAREST_AUTH0_CLIENT_ID=$6 \ 13 | DA_BUGZILLAREST_AUTH0_CLIENT_SECRET=$7 \ 14 | DA_BUGZILLAREST_AUTH0_AUDIENCE=$8 \ 15 | DA_BUGZILLAREST_AUTH0_URL=$9 \ 16 | DA_BUGZILLAREST_BRANCH=${10} \ 17 | RAW_INDEX=sds-test-dpdk \ 18 | ./dads --bugzillarest-origin=https://bugs.dpdk.org/ \ 19 | --bugzillarest-project=dpdk \ 20 | --bugzillarest-do-fetch=true --bugzillarest-do-enrich=true --bugzillarest-fetch-size=25 \ 21 | --bugzillarest-enrich-size=25 22 | -------------------------------------------------------------------------------- /scripts/check_confluence.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # start=1 3 | start=1 4 | if [ ! -z "$START" ] 5 | then 6 | start="$START" 7 | fi 8 | e=0 9 | end=$(curl -s 'https://wiki.anuket.io/rest/api/content/search?cql=lastModified%3E%3D%272000-01-01+00%3A00%27+order+by+lastModified&limit=1&start=1' | jq '.totalSize') 10 | while true 11 | do 12 | code=$(curl -s "https://wiki.anuket.io/rest/api/content/search?cql=lastModified%3E%3D%272000-01-01+00%3A00%27+order+by+lastModified&limit=1&start=${start}&expand=ancestors%2Cversion" | jq -rS '.statusCode') 13 | if [ "$code" = "500" ] 14 | then 15 | json=$(curl -s "https://wiki.anuket.io/rest/api/content/search?cql=lastModified%3E%3D%272000-01-01+00%3A00%27+order+by+lastModified&limit=1&start=${start}" | jq -rS '.results[0]') 16 | id=$(echo "$json" | jq -rS '.id') 17 | title=$(echo "$json" | jq -rS '.title') 18 | link=$(echo "$json" | jq -rS '._links.self') 19 | e=$((e+1)) 20 | echo "$e) index=$start/$end, id=$id: \"$title\": $link" 21 | elif [ ! -z "$DBG" ] 22 | then 23 | echo "index $start ok" 24 | fi 25 | start=$((start+1)) 26 | if [ "$start" = "$end" ] 27 | then 28 | break 29 | fi 30 | done 31 | echo "$e error pages" 32 | -------------------------------------------------------------------------------- /scripts/compare_confluence.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # ES_URL=... 3 | # _ID=ba6f08b6d2527009f4f2f5b359336073cd680edf 4 | # curl -s "${ES_URL}/dads-confluence/_search" | jq '.hits.hits[]._source.uuid' 5 | if [ -z "${ES_URL}" ] 6 | then 7 | echo "$0: you must set ES_URL" 8 | exit 1 9 | fi 10 | if [ -z "${_ID}" ] 11 | then 12 | echo "$0: you must set _ID" 13 | exit 2 14 | fi 15 | curl -s -H 'Content-Type: application/json' "${ES_URL}/dads-confluence/_search" -d "{\"query\":{\"term\":{\"_id\":\"${_ID}\"}}}" | jq '.' > dads.json 16 | curl -s -H 'Content-Type: application/json' "${ES_URL}/sds-lfn-shared-confluence/_search" -d "{\"query\":{\"term\":{\"_id\":\"${_ID}\"}}}" | jq '.' > p2o.json 17 | cat p2o.json | sort -r | uniq > tmp && mv tmp p2o.txt 18 | cat dads.json | sort -r | uniq > tmp && mv tmp dads.txt 19 | echo "da-ds:" > report.txt 20 | echo '-------------------------------------------' >> report.txt 21 | cat dads.txt >> report.txt 22 | echo '-------------------------------------------' >> report.txt 23 | echo "p2o:" >> report.txt 24 | echo '-------------------------------------------' >> report.txt 25 | cat p2o.txt >> report.txt 26 | echo '-------------------------------------------' >> report.txt 27 | -------------------------------------------------------------------------------- /scripts/compare_gerrit.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # ES_URL=... 3 | # _ID=914eae314d14f071f873dd669b60569a9702471e 4 | # changeset: _ID=4a0e886ac66fede1e5d362292f93182ac7510126_changeset_5754 5 | # comment: _ID=3dce4c3739f027c096ff921d172b9f9174bef90d_changeset_6141_comment_1576597948.0 6 | # patchset: _ID=72f99dc7ce7e51fd997a2021b0837a0e60eaa081_changeset_6419_patchset_2 7 | # approval: _ID=3dce4c3739f027c096ff921d172b9f9174bef90d_changeset_6141_patchset_3_approval_1576599373.0 8 | # curl -s "${ES_URL}/dads-gerrit/_search" | jq '.hits.hits[]._source.id' 9 | if [ -z "${ES_URL}" ] 10 | then 11 | echo "$0: you must set ES_URL" 12 | exit 1 13 | fi 14 | if [ -z "${_ID}" ] 15 | then 16 | echo "$0: you must set _ID" 17 | exit 2 18 | fi 19 | curl -s -H 'Content-Type: application/json' "${ES_URL}/dads-gerrit/_search" -d "{\"query\":{\"term\":{\"_id\":\"${_ID}\"}}}" | jq '.' > dads.json 20 | curl -s -H 'Content-Type: application/json' "${ES_URL}/sds-lfai-acumos-gerrit/_search" -d "{\"query\":{\"term\":{\"_id\":\"${_ID}\"}}}" | jq '.' > p2o.json 21 | cat p2o.json | sort -r | uniq > tmp && mv tmp p2o.txt 22 | cat dads.json | sort -r | uniq > tmp && mv tmp dads.txt 23 | echo "da-ds:" > report.txt 24 | echo '-------------------------------------------' >> report.txt 25 | cat dads.txt >> report.txt 26 | echo '-------------------------------------------' >> report.txt 27 | echo "p2o:" >> report.txt 28 | echo '-------------------------------------------' >> report.txt 29 | cat p2o.txt >> report.txt 30 | echo '-------------------------------------------' >> report.txt 31 | -------------------------------------------------------------------------------- /scripts/compare_git.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # ES_URL=... 3 | # _ID=9d6fc206523095cd6967e856cd15146b276d3632 4 | # curl -s "${ES_URL}/dads-k8s-git/_search" | jq '.hits.hits[]._source.uuid' 5 | if [ -z "${ES_URL}" ] 6 | then 7 | echo "$0: you must set ES_URL" 8 | exit 1 9 | fi 10 | if [ -z "${_ID}" ] 11 | then 12 | echo "$0: you must set _ID" 13 | exit 2 14 | fi 15 | curl -s -H 'Content-Type: application/json' "${ES_URL}/dads-k8s-git/_search" -d "{\"query\":{\"term\":{\"_id\":\"${_ID}\"}}}" | jq '.' > dads.json 16 | curl -s -H 'Content-Type: application/json' "${ES_URL}/sds-cncf-k8s-git/_search" -d "{\"query\":{\"term\":{\"_id\":\"${_ID}\"}}}" | jq '.' > p2o.json 17 | cat p2o.json | sort -r | uniq > tmp && mv tmp p2o.txt 18 | cat dads.json | sort -r | uniq > tmp && mv tmp dads.txt 19 | echo "da-ds:" > report.txt 20 | echo '-------------------------------------------' >> report.txt 21 | cat dads.txt >> report.txt 22 | echo '-------------------------------------------' >> report.txt 23 | echo "p2o:" >> report.txt 24 | echo '-------------------------------------------' >> report.txt 25 | cat p2o.txt >> report.txt 26 | echo '-------------------------------------------' >> report.txt 27 | -------------------------------------------------------------------------------- /scripts/compare_github.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # ES1_URL='http://127.0.0.1:19200' 3 | # ES2_URL="`cat ../sync-data-sources/helm-charts/sds-helm/sds-helm/secrets/ES_URL.prod.secret`" 4 | # _ID=kubernetes-client/gen/issues/1 5 | # _ID=kubernetes-client/gen/pull/1 6 | # curl -s "${ES1_URL}/sds-da-ds-gh-api-github-issue/_search" | jq '.hits.hits[]._source.url_id' 7 | # curl -s "${ES2_URL}/sds-cncf-k8s-github-issue/_search" | jq '.hits.hits[]._source.url_id' 8 | # curl -s "${ES2_URL}/sds-cncf-k8s-github-pull_request/_search" | jq '.hits.hits[]._source.url_id' 9 | # ES1_URL='http://127.0.0.1:19200' ES2_URL="`cat ../sync-data-sources/helm-charts/sds-helm/sds-helm/secrets/ES_URL.prod.secret`" _ID=kubernetes-client/gen/issues/1 ./scripts/compare_github.sh 10 | # ES1_URL='http://127.0.0.1:19200' ES2_URL="`cat ../sync-data-sources/helm-charts/sds-helm/sds-helm/secrets/ES_URL.prod.secret`" _ID=kubernetes-client/gen/pull/1 ./scripts/compare_github.sh 11 | if [ -z "${ES1_URL}" ] 12 | then 13 | echo "$0: you must set ES1_URL" 14 | exit 1 15 | fi 16 | if [ -z "${ES2_URL}" ] 17 | then 18 | echo "$0: you must set ES2_URL" 19 | exit 2 20 | fi 21 | if [ -z "${_ID}" ] 22 | then 23 | echo "$0: you must set _ID" 24 | exit 3 25 | fi 26 | curl -s -H 'Content-Type: application/json' "${ES1_URL}/sds-da-ds-gh-api-github-issue/_search" -d "{\"query\":{\"term\":{\"url_id\":\"${_ID}\"}}}" | jq -rS '.' > dads.json 27 | #curl -s -H 'Content-Type: application/json' "${ES2_URL}/sds-cncf-k8s-github-issue/_search" -d "{\"query\":{\"term\":{\"url_id\":\"${_ID}\"}}}" | jq -rS '.' > p2o.json 28 | curl -s -H 'Content-Type: application/json' "${ES2_URL}/sds-cncf-k8s-github-pull_request/_search" -d "{\"query\":{\"term\":{\"url_id\":\"${_ID}\"}}}" | jq -rS '.' > p2o.json 29 | cat p2o.json | sort -r | uniq > tmp && mv tmp p2o.txt 30 | cat dads.json | sort -r | uniq > tmp && mv tmp dads.txt 31 | echo "da-ds:" > report.txt 32 | echo '-------------------------------------------' >> report.txt 33 | cat dads.txt >> report.txt 34 | echo '-------------------------------------------' >> report.txt 35 | echo "p2o:" >> report.txt 36 | echo '-------------------------------------------' >> report.txt 37 | cat p2o.txt >> report.txt 38 | echo '-------------------------------------------' >> report.txt 39 | -------------------------------------------------------------------------------- /scripts/compare_groupsio.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # ES_URL=... _ID=914eae314d14f071f873dd669b60569a9702471e 3 | if [ -z "${ES_URL}" ] 4 | then 5 | echo "$0: you must set ES_URL" 6 | exit 1 7 | fi 8 | if [ -z "${_ID}" ] 9 | then 10 | echo "$0: you must set _ID" 11 | exit 2 12 | fi 13 | curl -s -H 'Content-Type: application/json' "${ES_URL}/dads-groupsio/_search" -d "{\"query\":{\"term\":{\"_id\":\"${_ID}\"}}}" | jq '.' > dads.json 14 | curl -s -H 'Content-Type: application/json' "${ES_URL}/sds-act-fossology-groupsio/_search" -d "{\"query\":{\"term\":{\"_id\":\"${_ID}\"}}}" | jq '.' > p2o.json 15 | cat p2o.json | sort -r | uniq > tmp && mv tmp p2o.txt 16 | cat dads.json | sort -r | uniq > tmp && mv tmp dads.txt 17 | -------------------------------------------------------------------------------- /scripts/compare_jira.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | if [ -z "${ES_URL}" ] 3 | then 4 | echo "$0: you must set ES_URL" 5 | exit 1 6 | fi 7 | if [ -z "${_ID}" ] 8 | then 9 | echo "$0: you must set _ID" 10 | exit 2 11 | fi 12 | curl -s -H 'Content-Type: application/json' "${ES_URL}/dads-test/_search" -d "{\"query\":{\"term\":{\"_id\":\"${_ID}\"}}}" | jq '.' > dads.json 13 | curl -s -H 'Content-Type: application/json' "${ES_URL}/sds-lfn-odl-jira/_search" -d "{\"query\":{\"term\":{\"_id\":\"${_ID}\"}}}" | jq '.' > p2o.json 14 | cat p2o.json | sort -r | uniq > tmp && mv tmp p2o.txt 15 | cat dads.json | sort -r | uniq > tmp && mv tmp dads.txt 16 | -------------------------------------------------------------------------------- /scripts/confluences.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | for confluence in `cat confluences.secret` 3 | do 4 | DA_DS=confluence DA_CONFLUENCE_LEGACY_UUID=1 DA_CONFLUENCE_MULTI_ORIGIN=1 DA_CONFLUENCE_ENRICH=1 DA_CONFLUENCE_ES_URL="${ES_URL}" DA_CONFLUENCE_RAW_INDEX=dads-confluence-raw DA_CONFLUENCE_RICH_INDEX=dads-confluence DA_CONFLUENCE_DEBUG=1 DA_CONFLUENCE_DB_PORT=13306 DA_CONFLUENCE_DB_NAME=sortinghat DA_CONFLUENCE_DB_USER=sortinghat DA_CONFLUENCE_DB_PASS=pwd DA_CONFLUENCE_NO_SSL_VERIFY='' DA_CONFLUENCE_MAX_CONTENTS=500 DA_CONFLUENCE_ES_BULK_SIZE=500 DA_CONFLUENCE_URL="$confluence" ./dads 5 | done 6 | 7 | -------------------------------------------------------------------------------- /scripts/coverage.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # 3 | # Code coverage generation 4 | # 5 | COVERAGE_DIR="${COVERAGE_DIR:-coverage}" 6 | PKG_LIST=$(go list ./... | grep -v /vendor/) 7 | 8 | # Create the coverage files directory 9 | mkdir -p "$COVERAGE_DIR" 10 | 11 | # Create a coverage file for each package 12 | for package in ${PKG_LIST}; do 13 | mkdir -p "${COVERAGE_DIR}/$(dirname ${package})" 14 | go test -covermode=count -coverprofile "${COVERAGE_DIR}/${package}.cov" "$package" 15 | done 16 | 17 | # Merge the coverage profile files 18 | echo 'mode: count' > coverage.cov 19 | find "${COVERAGE_DIR}" -type f -name "*.cov" -exec tail -q -n +2 {} \; >> coverage.cov 20 | 21 | # Display the global code coverage 22 | go tool cover -func=coverage.cov 23 | 24 | # If needed, generate HTML report 25 | if [ "$1" = "html" ]; then 26 | go tool cover -html=coverage.cov -o coverage.html 27 | fi 28 | 29 | # Remove the coverage files directory 30 | rm -rf "$COVERAGE_DIR" -------------------------------------------------------------------------------- /scripts/debug.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | if [ -z "${ES_URL}" ] 3 | then 4 | echo "$0: you must set ES_URL" 5 | exit 1 6 | fi 7 | echo "Example breakpoint: break github.com/LF-Engineering/da-ds.DSJira.AffsItems" 8 | DA_DS=jira DA_JIRA_ENRICH=1 DA_JIRA_ES_URL="${ES_URL}" DA_JIRA_RAW_INDEX=dads-test-raw2 DA_JIRA_RICH_INDEX=dads-test DA_JIRA_DEBUG=1 DA_JIRA_DB_PORT=13306 DA_JIRA_DB_NAME=sortinghat DA_JIRA_DB_USER=sortinghat DA_JIRA_DB_PASS=pwd DA_JIRA_URL=https://jira.opendaylight.org dlv debug github.com/LF-Engineering/da-ds/cmd/dads 9 | -------------------------------------------------------------------------------- /scripts/dockerhub.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | DA_DOCKERHUB_ENRICH=0 \ 3 | DA_DOCKERHUB_ES_URL=http://elastic:changeme@127.0.0.1:9200 \ 4 | DA_DOCKERHUB_NO_INCREMENTAL=1 \ 5 | DA_DOCKERHUB_USERNAME="" \ 6 | DA_DOCKERHUB_PASSWORD="" \ 7 | DA_DOCKERHUB_REPOSITORIES_JSON='[{"Owner":"crops","Repository":"yocto-eol","ESIndex":"sds-yocto-dockerhub"}]' \ 8 | DA_DS=dockerhub \ 9 | DA_DOCKERHUB_HTTP_TIMEOUT=60s \ 10 | DA_DOCKERHUB_RAW_INDEX="sds-yocto-dockerhub-raw" \ 11 | DA_DOCKERHUB_RICH_INDEX="sds-yocto-dockerhub" \ 12 | ./dads 13 | 14 | -------------------------------------------------------------------------------- /scripts/find.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | if [ -z "$1" ] 3 | then 4 | echo "You need to provide path as first arument" 5 | exit 1 6 | fi 7 | if [ -z "$2" ] 8 | then 9 | echo "You need to provide file name pattern as a second argument" 10 | exit 1 11 | fi 12 | if [ -z "$3" ] 13 | then 14 | echo "You need to provide regexp pattern to search for as a third argument" 15 | exit 1 16 | fi 17 | find "$1" -type f -iname "$2" -not -name "out" -not -path '*.git/*' -exec grep -EHIin "$3" "{}" \; | tee -a out 18 | -------------------------------------------------------------------------------- /scripts/fix_loc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ES="`cat ../sync-data-sources/helm-charts/sds-helm/sds-helm/secrets/ES_URL.$1.secret`" 3 | indices=`curl -s "${ES}/_cat/indices?format=json" | jq -rS '.[].index' | grep 'sds-' | grep -v bitergia | grep -v github | grep -v raw | grep git | uniq | sort` 4 | for i in $indices 5 | do 6 | # TODO: need to fetch distinct origins there, and then for each non-empty origin 7 | data=$(curl -s -XPOST -H 'Content-Type: application/json' "${ES}/_sql?format=json" -d"{\"query\":\"select min(total_lines_of_code), max(total_lines_of_code) from \\\"${i}\\\"\"}" | jq --compact-output -r ".rows[0]") 8 | mi=$(echo "$data" | jq --compact-output -r '.[0]') 9 | ma=$(echo "$data" | jq --compact-output -r '.[1]') 10 | # FIXME: temporary debug 11 | echo "$i: ($mi,$ma)" 12 | continue 13 | if ( [ "$mi" = "0" ] && [ ! "$ma" = "0" ] && [ ! -z "$ma" ] ) 14 | then 15 | result=$(curl -s -XPOST -H 'Content-Type: application/json' "${ES}/${i}/_update_by_query?pretty" -d"{\"script\":{\"inline\":\"ctx._source.total_lines_of_code=\\\"${ma}\\\";\"},\"query\":{\"term\":{\"total_lines_of_code\":\"0\"}}}" | jq -rS --compact-output '.updated') 16 | echo "$i set $ma LOC result: $result" 17 | else 18 | echo "$i $mi - $ma skipped" 19 | fi 20 | done 21 | -------------------------------------------------------------------------------- /scripts/for_each_go_file.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | for f in `find ./ -type f -iname "*.go"` 3 | do 4 | $1 "$f" || exit 1 5 | done 6 | exit 0 7 | -------------------------------------------------------------------------------- /scripts/gerrits.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | for d in `cat gerrits.secret` 3 | do 4 | ary=(${d//;/ }) 5 | DA_DS=gerrit DA_GERRIT_LEGACY_UUID=1 DA_GERRIT_ENRICH=1 DA_GERRIT_ES_URL="${ES_URL}" DA_GERRIT_RAW_INDEX=dads-gerrit-raw DA_GERRIT_RICH_INDEX=dads-gerrit DA_GERRIT_DEBUG=1 DA_GERRIT_DB_PORT=13306 DA_GERRIT_DB_NAME=sortinghat DA_GERRIT_DB_USER=sortinghat DA_GERRIT_DB_PASS=pwd DA_GERRIT_NO_SSL_VERIFY='' DA_GERRIT_DISABLE_HOST_KEY_CHECK=1 DA_GERRIT_MAX_REVIEWS=2000 DA_GERRIT_URL="${ary[0]}" DA_GERRIT_USER="${ary[1]}" DA_GERRIT_SSH_KEY_PATH="${ary[2]}" ./dads 6 | done 7 | -------------------------------------------------------------------------------- /scripts/git-non-local.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | if [ -z "$1" ] 3 | then 4 | echo "$0: you need to specify env: test|prod" 5 | exit 1 6 | fi 7 | export PROJECT_SLUG='lg' 8 | export DA_DS=git 9 | export DA_GIT_NO_AFFILIATION=1 10 | export DA_GIT_DB_HOST="`cat ../sync-data-sources/helm-charts/sds-helm/sds-helm/secrets/SH_HOST.$1.secret`" 11 | export DA_GIT_DB_NAME="`cat ../sync-data-sources/helm-charts/sds-helm/sds-helm/secrets/SH_DB.$1.secret`" 12 | export DA_GIT_DB_PASS="`cat ../sync-data-sources/helm-charts/sds-helm/sds-helm/secrets/SH_PASS.$1.secret`" 13 | export DA_GIT_DB_PORT="`cat ../sync-data-sources/helm-charts/sds-helm/sds-helm/secrets/SH_PORT.$1.secret`" 14 | export DA_GIT_DB_USER="`cat ../sync-data-sources/helm-charts/sds-helm/sds-helm/secrets/SH_USER.$1.secret`" 15 | export DA_GIT_ES_URL="`cat ../sync-data-sources/helm-charts/sds-helm/sds-helm/secrets/ES_URL.$1.secret`" 16 | export DA_GIT_LEGACY_UUID='' 17 | export DA_GIT_PROJECT_SLUG='lg' 18 | export DA_GIT_RAW_INDEX=lg-test-raw 19 | export DA_GIT_RICH_INDEX=lg-test 20 | export DA_GIT_DROP_RAW=1 21 | export DA_GIT_DROP_RICH=1 22 | export DA_GIT_URL='https://github.com/lukaszgryglicki/test-api' 23 | export DA_GIT_PAIR_PROGRAMMING='' 24 | export DA_GIT_ENRICH=1 25 | export DA_GIT_DEBUG='' 26 | ./dads 2>&1 | tee run.log 27 | -------------------------------------------------------------------------------- /scripts/git.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # dev-analytics-import-sh-json/README.md: PASS=rootpwd ./mariadb_local_docker.sh 3 | # dev-analytics-import-sh-json/README.md: USR=root PASS=rootpwd SH_USR=shusername SH_PASS=shpwd SH_DB=shdb ./mariadb_init.sh 4 | # dev-analytics-import-bitergia-indexes/README.md: ./es_local_docker.sh 5 | echo "delete from uidentities" | mysql -h127.0.0.1 -P13306 -prootpwd -uroot shdb || exit 1 6 | curl -s -XDELETE 'http://127.0.0.1:19200/*' || exit 1 7 | echo 'da-ds git' 8 | #PROJECT_SLUG='lg' DA_DS=git DA_GIT_NO_AFFILIATION='' DA_GIT_DB_HOST=127.0.0.1 DA_GIT_DB_NAME=shdb DA_GIT_DB_PASS=shpwd DA_GIT_DB_PORT=13306 DA_GIT_DB_USER=shusername DA_GIT_ES_URL='http://127.0.0.1:19200' DA_GIT_LEGACY_UUID='' DA_GIT_PROJECT_SLUG='lg' DA_GIT_RAW_INDEX=da-ds-git-raw DA_GIT_RICH_INDEX=da-ds-git DA_GIT_URL='https://github.com/lukaszgryglicki/trailers-test' DA_GIT_PAIR_PROGRAMMING='' DA_GIT_ENRICH=1 DA_GIT_DEBUG=2 ./dads 2>&1 | tee run.log 9 | PROJECT_SLUG='korg' DA_DS=git DA_GIT_NO_AFFILIATION='' DA_GIT_DB_HOST=127.0.0.1 DA_GIT_DB_NAME=shdb DA_GIT_DB_PASS=shpwd DA_GIT_DB_PORT=13306 DA_GIT_DB_USER=shusername DA_GIT_ES_URL='http://127.0.0.1:19200' DA_GIT_LEGACY_UUID='' DA_GIT_PROJECT_SLUG='korg' DA_GIT_RAW_INDEX=sds-korg-git-raw DA_GIT_RICH_INDEX=sds-korg-git DA_GIT_URL='https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git' DA_GIT_PAIR_PROGRAMMING='' DA_GIT_ENRICH=1 DA_GIT_DEBUG='' ./dads 2>&1 | tee run.log 10 | curl -s 'http://127.0.0.1:19200/sds-korg-git-raw/_search' | jq '.hits.hits[]._source' | tee git-raw.json 11 | curl -s 'http://127.0.0.1:19200/sds-korg-git/_search' | jq '.hits.hits[]._source' | tee git-rich.json 12 | -------------------------------------------------------------------------------- /scripts/git_log.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | git log --reverse --topo-order --branches --tags --remotes=origin --raw --numstat --pretty=fuller --decorate=full --parents -M -C -c 3 | -------------------------------------------------------------------------------- /scripts/git_trailers.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | if [ ! -f "git.log" ] 3 | then 4 | ./scripts/git_log.sh > git.log || exit 1 5 | fi 6 | grep -E "^[[:space:]]+[a-zA-z0-9-]+:.+[[:space:]]+<.+>[[:space:]]*$" git.log | sort | uniq 7 | -------------------------------------------------------------------------------- /scripts/github.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # dev-analytics-import-sh-json/README.md: PASS=rootpwd ./mariadb_local_docker.sh 3 | # dev-analytics-import-sh-json/README.md: USR=root PASS=rootpwd SH_USR=shusername SH_PASS=shpwd SH_DB=shdb ./mariadb_init.sh 4 | # dev-analytics-import-bitergia-indexes/README.md: ./es_local_docker.sh 5 | # dev-analytics-affiliation: ./sh/psql.sh docker, then ./sh/psql.sh 6 | # dev-analytics-affiliation: ./sh/local_api.sh 7 | # Example: DA_GITHUB_RETRY=1 PROJECT='my project' ORGREPO='LF-Engineering/da-ds' CLEAN=1 REPOSITORY='' ISSUE=1 PULLREQUEST='' CURL=1 REFRESH='' ./scripts/github.sh 8 | # DA_GITHUB_NO_AFFILIATION=1 9 | if [ -z "$ORGREPO" ] 10 | then 11 | ORGREPO='cncf/devstats' 12 | fi 13 | ary=(${ORGREPO//\// }) 14 | ORG="${ary[0]}" 15 | REPO="${ary[1]}" 16 | # export AUTH0_DATA="`cat ../sync-data-sources/helm-charts/sds-helm/sds-helm/secrets/AUTH0_DATA.test.secret`" 17 | export AUTH0_DATA="`cat ../sync-data-sources/helm-charts/sds-helm/sds-helm/secrets/AUTH0_DATA.prod.secret`" 18 | export DA_DS=github 19 | export DA_GITHUB_AFFILIATION_API_URL='http://127.0.0.1:8080' 20 | export DA_GITHUB_DB_HOST=127.0.0.1 21 | export DA_GITHUB_DB_NAME=shdb 22 | export DA_GITHUB_DB_USER=shusername 23 | export DA_GITHUB_DB_PASS=shpwd 24 | export DA_GITHUB_DB_PORT=13306 25 | export DA_GITHUB_ES_URL='http://127.0.0.1:19200' 26 | export DA_GITHUB_TOKENS="`cat /etc/github/oauths`" 27 | export DA_GITHUB_ORG="$ORG" 28 | export DA_GITHUB_REPO="$REPO" 29 | export DA_GITHUB_PROJECT="$PROJECT" 30 | export DA_GITHUB_ENRICH=1 31 | export DA_GITHUB_DEBUG=1 32 | export PROJECT_SLUG="$ORGREPO" 33 | if [ ! -z "$REFRESH" ] 34 | then 35 | export DA_GITHUB_NO_RAW=1 36 | export DA_GITHUB_REFRESH_AFFS=1 37 | export DA_GITHUB_FORCE_FULL=1 38 | fi 39 | if [ ! -z "$CLEAN" ] 40 | then 41 | echo "delete from uidentities" | mysql -h127.0.0.1 -P13306 -prootpwd -uroot shdb || exit 1 42 | curl -s -XDELETE 'http://127.0.0.1:19200/*' || exit 1 43 | echo '' 44 | fi 45 | echo 'da-ds github' 46 | if [ ! -z "$REPOSITORY" ] 47 | then 48 | DA_GITHUB_RAW_INDEX=sds-da-ds-gh-api-github-repository-raw DA_GITHUB_RICH_INDEX=sds-da-ds-gh-api-github-repository DA_GITHUB_CATEGORY=repository ./dads 2>&1 | tee run-repository.log 49 | if [ ! -z "$CURL" ] 50 | then 51 | curl -s 'http://127.0.0.1:19200/sds-da-ds-gh-api-github-repository-raw/_search?size=10000' | jq -S '.hits.hits[]._source' > github-repository-raw.json 52 | curl -s 'http://127.0.0.1:19200/sds-da-ds-gh-api-github-repository/_search?size=10000' | jq -S '.hits.hits[]._source' > github-repository-rich.json 53 | fi 54 | fi 55 | if [ ! -z "$ISSUE" ] 56 | then 57 | #DA_GITHUB_DATE_FROM=2021-01-01 DA_GITHUB_RAW_INDEX=sds-da-ds-gh-api-github-issue-raw DA_GITHUB_RICH_INDEX=sds-da-ds-gh-api-github-issue DA_GITHUB_CATEGORY=issue ./dads 2>&1 | tee run-issue.log 58 | DA_GITHUB_RAW_INDEX=sds-da-ds-gh-api-github-issue-raw DA_GITHUB_RICH_INDEX=sds-da-ds-gh-api-github-issue DA_GITHUB_CATEGORY=issue ./dads 2>&1 | tee run-issue.log 59 | if [ ! -z "$CURL" ] 60 | then 61 | curl -s -XPOST -H 'Content-Type: application/json' 'http://127.0.0.1:19200/sds-da-ds-gh-api-github-issue-raw/_search?size=10000' -d '{"query":{"term":{"is_github_issue":1}}}' | jq -S '.hits.hits[]._source' > github-issue-raw.json 62 | curl -s -XPOST -H 'Content-Type: application/json' 'http://127.0.0.1:19200/sds-da-ds-gh-api-github-issue/_search?size=10000' -d '{"query":{"term":{"is_github_issue":1}}}' | jq -S '.hits.hits[]._source' > github-issue-rich.json 63 | fi 64 | fi 65 | if [ ! -z "$PULLREQUEST" ] 66 | then 67 | DA_GITHUB_RAW_INDEX=sds-da-ds-gh-api-github-issue-raw DA_GITHUB_RICH_INDEX=sds-da-ds-gh-api-github-issue DA_GITHUB_CATEGORY=pull_request ./dads 2>&1 | tee run-pull-request.log 68 | if [ ! -z "$CURL" ] 69 | then 70 | curl -s -XPOST -H 'Content-Type: application/json' 'http://127.0.0.1:19200/sds-da-ds-gh-api-github-issue-raw/_search?size=10000' -d '{"query":{"term":{"is_github_pull_request":1}}}' | jq -S '.hits.hits[]._source' > github-pull-request-raw.json 71 | curl -s -XPOST -H 'Content-Type: application/json' 'http://127.0.0.1:19200/sds-da-ds-gh-api-github-issue/_search?size=10000' -d '{"query":{"term":{"is_github_pull_request":1}}}' | jq -S '.hits.hits[]._source' > github-pull-request-rich.json 72 | fi 73 | fi 74 | -------------------------------------------------------------------------------- /scripts/github_api_calls.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #echo 'PR:' 3 | #curl -s -u "lukaszgryglicki:`cat /etc/github/oauth`" https://api.github.com/repos/lukaszgryglicki/csqconv/pulls/3 4 | echo 'comments:' 5 | curl -s -u "lukaszgryglicki:`cat /etc/github/oauth`" https://api.github.com/repos/lukaszgryglicki/csqconv/pulls/3/comments | jq ".[]\(.body)" 6 | echo 'reviews:' 7 | curl -s -u "lukaszgryglicki:`cat /etc/github/oauth`" https://api.github.com/repos/lukaszgryglicki/csqconv/pulls/3/reviews | jq ".[]\(.body)" 8 | -------------------------------------------------------------------------------- /scripts/googlegroups.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | DA_GOOGLEGROUPS_ENRICH=0 \ 3 | DA_GOOGLEGROUPS_ES_URL="" \ 4 | DA_GOOGLEGROUPS_NO_INCREMENTAL=1 \ 5 | DA_DS=GoogleGroups \ 6 | DA_GOOGLEGROUPS_HTTP_TIMEOUT=60s \ 7 | DA_GOOGLEGROUPS_DB_CONN="" \ 8 | DA_GOOGLEGROUPS_AFFILIATIONS_API_BASE_URL="" \ 9 | DA_GOOGLEGROUPS_ES_CACHE_URL="" \ 10 | DA_GOOGLEGROUPS_ES_CACHE_USERNAME="" \ 11 | DA_GOOGLEGROUPS_ES_CACHE_PASSWORD="" \ 12 | DA_GOOGLEGROUPS_AUTH0_GRANT_TYPE="" \ 13 | DA_GOOGLEGROUPS_AUTH0_CLIENT_ID="" \ 14 | DA_GOOGLEGROUPS_AUTH0_CLIENT_SECRET="" \ 15 | DA_GOOGLEGROUPS_AUTH0_AUDIENCE="" \ 16 | DA_GOOGLEGROUPS_AUTH0_BASE_URL="" \ 17 | DA_GOOGLEGROUPS_ENVIRONMENT="" \ 18 | ./dads --googlegroups-project=project1 --googlegroups-slug=project1 --googlegroups-groupname=finos.org/legend \ 19 | --googlegroups-do-fetch=true --googlegroups-do-enrich=true --googlegroups-fetch-size=1000 \ 20 | --googlegroups-enrich-size=1000 --googlegroups-es-index=sds-project1-dads-googlegroups 21 | -------------------------------------------------------------------------------- /scripts/jenkins.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | DA_DS=jenkins \ 4 | DA_JENKINS_DADS=true \ 5 | DA_JENKINS_DB_HOST=db_endpoint \ 6 | DA_JENKINS_DB_NAME=db_name \ 7 | DA_JENKINS_DB_PASS=password \ 8 | DA_JENKINS_DB_PORT=3306 \ 9 | DA_JENKINS_DB_USER=user \ 10 | DA_JENKINS_ENRICH=1 \ 11 | DA_JENKINS_ES_BULK_SIZE=500 \ 12 | DA_JENKINS_ES_SCROLL_SIZE=500 \ 13 | DA_JENKINS_ES_SCROLL_WAIT=2700s \ 14 | DA_JENKINS_ES_URL=https://user:password@url \ 15 | DA_JENKINS_HTTP_TIMEOUT=60s \ 16 | DA_JENKINS_JENKINS_JSON='[{"url":"https://www.jenkins_url.com","project":"ProjectName","index":"sds-ProjectName-"}]' \ 17 | DA_JENKINS_NO_INCREMENTAL=1 \ 18 | ./dads 19 | -------------------------------------------------------------------------------- /scripts/manual_gerrit.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | if [ -z "$USR" ] 3 | then 4 | echo "$0: you need to specify USR=user-name" 5 | exit 1 6 | fi 7 | if [ -z "$KEY" ] 8 | then 9 | echo "$0: you need to specify KEY=/path/to/gerrit/ssh-key" 10 | exit 2 11 | fi 12 | if [ -z "$GERRIT" ] 13 | then 14 | echo "$0: you need to specify GERRIT=git.opendaylight.org" 15 | exit 3 16 | fi 17 | from=0 18 | page=500 19 | to='' 20 | if [ ! -z "$PAGE" ] 21 | then 22 | page=$PAGE 23 | if [ "$page" -ge "500" ] 24 | then 25 | echo"setting page size to 500, it cannot be any bigger" 26 | page=500 27 | fi 28 | fi 29 | if [ ! -z "$FROM" ] 30 | then 31 | from=$FROM 32 | fi 33 | if [ ! -z "$TO" ] 34 | then 35 | to=$TO 36 | fi 37 | fn=gerrit.secret 38 | if [ ! -z "${FN}" ] 39 | then 40 | fn="${FN}" 41 | fi 42 | > "${fn}" 43 | while true 44 | do 45 | echo -n "from:$from page:$page " 46 | ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i "${KEY}" -p 29418 "${USR}@${GERRIT}" gerrit query after:"1970-01-01 00:00:00" "limit:$page" '(status:open OR status:closed)' --all-approvals --all-reviewers --comments --format=JSON --start="$from" 1>./out 2>/dev/null 47 | rows=$(cat ./out | grep '"rowCount"' | jq -rS '.rowCount') 48 | echo "rows:$rows" 49 | cat ./out >> "${fn}" 50 | if ( [ "$rows" = "0" ] || [ -z "$rows" ] ) 51 | then 52 | echo "finished, rows: $rows" 53 | break 54 | fi 55 | from=$(($from + $page)) 56 | if ( [ ! -z "$to" ] && [ "$from" -ge "$to" ] ) 57 | then 58 | echo "$from >= $to, finished" 59 | break 60 | fi 61 | done 62 | -------------------------------------------------------------------------------- /scripts/mk.go.bak: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | jsoniter "github.com/json-iterator/go" 6 | "io/ioutil" 7 | "os" 8 | "sort" 9 | ) 10 | 11 | func main() { 12 | trailers := map[string]map[string]struct{}{} 13 | for _, arg := range os.Args { 14 | data, _ := ioutil.ReadFile(arg) 15 | m := map[string]interface{}{} 16 | jsoniter.Unmarshal(data, &m) 17 | for k, v := range m { 18 | _, ok := trailers[k] 19 | if !ok { 20 | trailers[k] = map[string]struct{}{} 21 | } 22 | trailers[k][v.(string)] = struct{}{} 23 | } 24 | } 25 | ks := []string{} 26 | for k := range trailers { 27 | ks = append(ks, k) 28 | } 29 | sort.Strings(ks) 30 | s := "GitAllowedTrailers = map[string][]string{\n" 31 | for _, ky := range ks { 32 | m, _ := trailers[ky] 33 | ks := []string{} 34 | for k := range m { 35 | ks = append(ks, k) 36 | } 37 | sort.Strings(ks) 38 | s += ` "` + ky + `": []string{` 39 | for _, k := range ks { 40 | s += `"` + k + `", ` 41 | } 42 | s = s[:len(s)-2] + "},\n" 43 | } 44 | s += "}\n" 45 | fmt.Printf("%s", s) 46 | } 47 | -------------------------------------------------------------------------------- /scripts/pipermail.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | DA_PIPERMAIL_ENRICH=0 \ 3 | DA_PIPERMAIL_ES_URL=http://elastic:changeme@127.0.0.1:9200 \ 4 | DA_PIPERMAIL_NO_INCREMENTAL=1 \ 5 | DA_DS=pipermail \ 6 | DA_PIPERMAIL_HTTP_TIMEOUT=60s \ 7 | DA_PIPERMAIL_RAW_INDEX="sds-yocto-dads-pipermail-raw" \ 8 | DA_PIPERMAIL_RICH_INDEX="sds-yocto-dads-pipermail" \ 9 | DA_PIPERMAIL_DB_CONN="" \ 10 | DA_PIPERMAIL_AFFILIATIONS_API_BASE_URL="" \ 11 | DA_PIPERMAIL_ES_CACHE_URL="" \ 12 | DA_PIPERMAIL_ES_CACHE_USERNAME="" \ 13 | DA_PIPERMAIL_ES_CACHE_PASSWORD="" \ 14 | DA_PIPERMAIL_AUTH0_GRANT_TYPE="" \ 15 | DA_PIPERMAIL_AUTH0_CLIENT_ID="" \ 16 | DA_PIPERMAIL_AUTH0_CLIENT_SECRET="" \ 17 | DA_PIPERMAIL_AUTH0_AUDIENCE="" \ 18 | DA_PIPERMAIL_AUTH0_BASE_URL="" \ 19 | DA_PIPERMAIL_ENVIRONMENT="" \ 20 | ./dads --pipermail-origin=https://www.openembedded.org/pipermail/openembedded-architecture/ \ 21 | --pipermail-project=yocto --pipermail-slug=yocto --pipermail-groupname=openembedded-architecture \ 22 | --pipermail-do-fetch=true --pipermail-do-enrich=true --pipermail-fetch-size=1000 \ 23 | --pipermail-enrich-size=1000 --pipermail-es-index=sds-yocto-dads-pipermail 24 | -------------------------------------------------------------------------------- /scripts/vet_files.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | $1 *.go || exit 1 3 | for dir in `find ./cmd/ -mindepth 1 -type d` 4 | do 5 | $1 $dir/*.go || exit 1 6 | done 7 | exit 0 8 | -------------------------------------------------------------------------------- /sds-rocketchat.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | SDS_ST='' DA_ROCKETCHAT_ST='' SDS_DATASOURCES_RE='^rocketchat$' SDS_SKIP_ES_DATA=1 SDS_SKIP_ES_LOG=1 SDS_DEBUG=2 SDS_CMDDEBUG=2 SDS_ONLY_P2O=1 SDS_SKIP_AFFS='' SDS_DRY_RUN='' syncdatasources 2>&1 | tee -a /sds.log 3 | -------------------------------------------------------------------------------- /sql.go: -------------------------------------------------------------------------------- 1 | package dads 2 | 3 | import ( 4 | "database/sql" 5 | "fmt" 6 | "reflect" 7 | "time" 8 | 9 | _ "github.com/go-sql-driver/mysql" // User MySQL driver 10 | "github.com/jmoiron/sqlx" 11 | ) 12 | 13 | // ConnectAffiliationsDB - connect to affilaitions DB 14 | func ConnectAffiliationsDB(ctx *Ctx) { 15 | if !ctx.AffsDBConfigured() { 16 | Fatalf("requested connection to affiliations DB while connection parameters are not set") 17 | } 18 | connStr := ctx.DBConn 19 | if connStr == "" { 20 | if ctx.DBName == "" { 21 | Fatalf("requested connection to affiliations DB while DB name was not specified") 22 | } 23 | if ctx.DBUser == "" { 24 | Fatalf("requested connection to affiliations DB while DB user was not specified") 25 | } 26 | hostPort := ctx.DBHost 27 | if hostPort == "" { 28 | hostPort = "127.0.0.1" 29 | } 30 | if ctx.DBPort != "" { 31 | hostPort += ":" + ctx.DBPort 32 | } 33 | userPass := ctx.DBUser 34 | if ctx.DBPass != "" { 35 | userPass += ":" + ctx.DBPass 36 | } 37 | opts := ctx.DBOpts 38 | if opts == "" { 39 | opts = "charset=utf8&parseTime=true" 40 | } 41 | // user:pwd@tcp(127.0.0.1:3306)/db?charset=utf8&parseTime=true 42 | connStr = fmt.Sprintf("%s@tcp(%s)/%s?%s", userPass, hostPort, ctx.DBName, opts) 43 | } 44 | if ctx.Debug > 0 { 45 | Printf("affiliations DB connect string: %s\n", connStr) 46 | } 47 | d, err := sqlx.Connect("mysql", connStr) 48 | FatalOnError(err) 49 | d.SetMaxOpenConns(3) 50 | ctx.DB = d 51 | FatalOnError(SetDBSessionOrigin(ctx)) 52 | } 53 | 54 | // SetDBSessionOrigin - Set Session DB variable @origin to 'dads' so we will know which tool performed the DB operation 55 | func SetDBSessionOrigin(ctx *Ctx) (err error) { 56 | _, err = ExecSQL(ctx, nil, "set @origin = ?", DADSOrigin) 57 | return err 58 | } 59 | 60 | // QueryOut - display DB query 61 | func QueryOut(ctx *Ctx, in bool, err error, query string, args ...interface{}) { 62 | pref := "<<< " 63 | if in { 64 | pref = ">>> " 65 | } 66 | q := pref + query + "\n" 67 | if (err != nil || ctx.DebugSQL > 1) && len(args) > 0 { 68 | s := "" 69 | for vi, vv := range args { 70 | switch v := vv.(type) { 71 | case int, int8, int16, int32, int64, uint, uint8, uint16, uint32, uint64, float32, float64, complex64, complex128, string, bool, time.Time: 72 | s += fmt.Sprintf("%d:%+v ", vi+1, v) 73 | case *int, *int8, *int16, *int32, *int64, *uint, *uint8, *uint16, *uint32, *uint64, *float32, *float64, *complex64, *complex128, *string, *bool, *time.Time: 74 | s += fmt.Sprintf("%d:%+v ", vi+1, v) 75 | case nil: 76 | s += fmt.Sprintf("%d:(null) ", vi+1) 77 | default: 78 | s += fmt.Sprintf("%d:%+v ", vi+1, reflect.ValueOf(vv)) 79 | } 80 | } 81 | q += "[" + s + "]\n" 82 | } 83 | if err != nil || ctx.DebugSQL > 0 { 84 | Printf("%s", q) 85 | if err != nil { 86 | Printf("Error: %+v\n", err) 87 | } 88 | } 89 | } 90 | 91 | // ExecDB - execute DB query without transaction 92 | func ExecDB(ctx *Ctx, query string, args ...interface{}) (res sql.Result, err error) { 93 | if err != nil || ctx.DebugSQL > 0 { 94 | QueryOut(ctx, true, err, query, args...) 95 | } 96 | res, err = ctx.DB.Exec(query, args...) 97 | if err != nil || ctx.DebugSQL > 0 { 98 | QueryOut(ctx, false, err, query, args...) 99 | } 100 | return 101 | } 102 | 103 | // ExecTX - execute DB query with transaction 104 | func ExecTX(ctx *Ctx, tx *sql.Tx, query string, args ...interface{}) (res sql.Result, err error) { 105 | if err != nil || ctx.DebugSQL > 0 { 106 | QueryOut(ctx, true, err, query, args...) 107 | } 108 | res, err = tx.Exec(query, args...) 109 | if err != nil || ctx.DebugSQL > 0 { 110 | QueryOut(ctx, false, err, query, args...) 111 | } 112 | return 113 | } 114 | 115 | // ExecSQL - execute db query with transaction if provided 116 | func ExecSQL(ctx *Ctx, tx *sql.Tx, query string, args ...interface{}) (sql.Result, error) { 117 | if tx == nil { 118 | return ExecDB(ctx, query, args...) 119 | } 120 | return ExecTX(ctx, tx, query, args...) 121 | } 122 | 123 | // QueryDB - query database without transaction 124 | func QueryDB(ctx *Ctx, query string, args ...interface{}) (rows *sql.Rows, err error) { 125 | if err != nil || ctx.DebugSQL > 0 { 126 | QueryOut(ctx, true, err, query, args...) 127 | } 128 | rows, err = ctx.DB.Query(query, args...) 129 | if err != nil || ctx.DebugSQL > 0 { 130 | QueryOut(ctx, false, err, query, args...) 131 | } 132 | return 133 | } 134 | 135 | // QueryTX - query database with transaction 136 | func QueryTX(ctx *Ctx, tx *sql.Tx, query string, args ...interface{}) (rows *sql.Rows, err error) { 137 | if err != nil || ctx.DebugSQL > 0 { 138 | QueryOut(ctx, true, err, query, args...) 139 | } 140 | rows, err = tx.Query(query, args...) 141 | if err != nil || ctx.DebugSQL > 0 { 142 | QueryOut(ctx, false, err, query, args...) 143 | } 144 | return 145 | } 146 | 147 | // QuerySQL - query DB using transaction if provided 148 | func QuerySQL(ctx *Ctx, tx *sql.Tx, query string, args ...interface{}) (*sql.Rows, error) { 149 | if tx == nil { 150 | return QueryDB(ctx, query, args...) 151 | } 152 | return QueryTX(ctx, tx, query, args...) 153 | } 154 | -------------------------------------------------------------------------------- /sql/update_empty_names.sql: -------------------------------------------------------------------------------- 1 | begin; 2 | update ignore identities set name = concat(substring_index(trim(both '@' from trim(email)), '@', 1), '-MISSING-NAME') where (name = '' or name is null) and not (email = '' or email is null); 3 | update ignore identities set name = concat(substring_index(trim(both '@' from trim(username)), '@', 1), '-MISSING-NAME') where (name = '' or name is null) and not (username = '' or username is null); 4 | update ignore identities set name = concat(substring_index(trim(both '@' from trim(name)), '@', 1), '-REDACTED-EMAIL') where instr(trim(both '@' from trim(name)), '@') > 1; 5 | update ignore identities set username = concat(substring_index(trim(both '@' from trim(username)), '@', 1), '-REDACTED-EMAIL') where instr(trim(both '@' from trim(username)), '@') > 1; 6 | update ignore profiles p, identities i set p.name = i.name where p.uuid = i.uuid and (p.name is null or p.name = '') and not (i.name = '' or i.name is null); 7 | commit; 8 | -------------------------------------------------------------------------------- /test/time.go: -------------------------------------------------------------------------------- 1 | package test 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "time" 7 | ) 8 | 9 | // YMDHMS - return time defined by args 10 | func YMDHMS(in ...int) time.Time { 11 | m := 1 12 | d := 1 13 | h := 0 14 | mi := 0 15 | s := 0 16 | l := len(in) 17 | if l >= 2 { 18 | m = in[1] 19 | } 20 | if l >= 3 { 21 | d = in[2] 22 | } 23 | if l >= 4 { 24 | h = in[3] 25 | } 26 | if l >= 5 { 27 | mi = in[4] 28 | } 29 | if l >= 6 { 30 | s = in[5] 31 | } 32 | t := time.Date( 33 | in[0], 34 | time.Month(m), 35 | d, 36 | h, 37 | mi, 38 | s, 39 | 0, 40 | time.UTC, 41 | ) 42 | if t.Year() != in[0] || t.Month() != time.Month(m) || t.Day() != d || t.Hour() != h || t.Minute() != mi || t.Second() != s { 43 | fmt.Printf("Expected to set date from %v, got %v\n", in, t) 44 | os.Exit(1) 45 | } 46 | return t 47 | } 48 | -------------------------------------------------------------------------------- /threads.go: -------------------------------------------------------------------------------- 1 | package dads 2 | 3 | import ( 4 | "runtime" 5 | "sync" 6 | ) 7 | 8 | var ( 9 | // MT - are we running in multiple threading mode? 10 | MT = false 11 | thrN = 0 12 | thrNMtx = &sync.Mutex{} 13 | ) 14 | 15 | // SetMT - we're in multithreaded mode, setup global caches mutexes 16 | func SetMT() { 17 | if uuidsNonEmptyCacheMtx == nil { 18 | uuidsNonEmptyCacheMtx = &sync.RWMutex{} 19 | } 20 | if uuidsAffsCacheMtx == nil { 21 | uuidsAffsCacheMtx = &sync.RWMutex{} 22 | } 23 | if identityCacheMtx == nil { 24 | identityCacheMtx = &sync.RWMutex{} 25 | } 26 | if rollsCacheMtx == nil { 27 | rollsCacheMtx = &sync.RWMutex{} 28 | } 29 | if i2uCacheMtx == nil { 30 | i2uCacheMtx = &sync.RWMutex{} 31 | } 32 | if emailsCacheMtx == nil { 33 | emailsCacheMtx = &sync.RWMutex{} 34 | } 35 | if esCacheMtx == nil { 36 | esCacheMtx = &sync.RWMutex{} 37 | } 38 | if memCacheMtx == nil { 39 | memCacheMtx = &sync.RWMutex{} 40 | } 41 | if parseDateCacheMtx == nil { 42 | parseDateCacheMtx = &sync.RWMutex{} 43 | } 44 | if gTokenEnvMtx == nil { 45 | gTokenEnvMtx = &sync.Mutex{} 46 | } 47 | if gTokenMtx == nil { 48 | gTokenMtx = &sync.Mutex{} 49 | } 50 | MT = true 51 | } 52 | 53 | // ResetThreadsNum - allows clearing current setting so the new one can be applied 54 | func ResetThreadsNum(ctx *Ctx) { 55 | thrNMtx.Lock() 56 | defer thrNMtx.Unlock() 57 | thrN = 0 58 | MT = false 59 | uuidsNonEmptyCacheMtx = nil 60 | uuidsAffsCacheMtx = nil 61 | identityCacheMtx = nil 62 | rollsCacheMtx = nil 63 | i2uCacheMtx = nil 64 | emailsCacheMtx = nil 65 | esCacheMtx = nil 66 | memCacheMtx = nil 67 | parseDateCacheMtx = nil 68 | gTokenEnvMtx = nil 69 | gTokenMtx = nil 70 | } 71 | 72 | // GetThreadsNum returns the number of available CPUs 73 | // If environment variable DA_DS_ST is set it retuns 1 74 | // It can be used to debug single threaded verion 75 | func GetThreadsNum(ctx *Ctx) int { 76 | thrNMtx.Lock() 77 | defer thrNMtx.Unlock() 78 | if thrN > 0 { 79 | return thrN 80 | } 81 | defer func() { 82 | if ctx.Debug > 0 { 83 | Printf("using %d threads\n", thrN) 84 | } 85 | }() 86 | if ctx.ST { 87 | thrN = 1 88 | return thrN 89 | } 90 | // Use environment variable to have singlethreaded version 91 | if ctx.NCPUs > 0 { 92 | n := int(float64(runtime.NumCPU()) * ctx.NCPUsScale) 93 | if ctx.NCPUs > n { 94 | ctx.NCPUs = n 95 | } 96 | runtime.GOMAXPROCS(ctx.NCPUs) 97 | thrN = ctx.NCPUs 98 | if thrN > 1 { 99 | SetMT() 100 | } 101 | return thrN 102 | } 103 | thrN = int(float64(runtime.NumCPU()) * ctx.NCPUsScale) 104 | runtime.GOMAXPROCS(thrN) 105 | if thrN > 1 { 106 | SetMT() 107 | } 108 | return thrN 109 | } 110 | -------------------------------------------------------------------------------- /threads_test.go: -------------------------------------------------------------------------------- 1 | package dads 2 | 3 | import ( 4 | "os" 5 | "testing" 6 | ) 7 | 8 | func TestGetThreadsNum(t *testing.T) { 9 | // Environment context parse 10 | var ctx Ctx 11 | FatalOnError(os.Setenv("DA_DS", "ds")) 12 | ctx.Init() 13 | 14 | // Get actual number of threads available 15 | nThreads := GetThreadsNum(&ctx) 16 | ResetThreadsNum(&ctx) 17 | 18 | // Set context's ST/NCPUs manually (don't need to repeat tests from context_test.go) 19 | var testCases = []struct { 20 | ST bool 21 | NCPUs int 22 | NCPUsScale float64 23 | expected int 24 | }{ 25 | {ST: false, NCPUs: 0, NCPUsScale: 1.0, expected: nThreads}, 26 | {ST: false, NCPUs: 1, NCPUsScale: 1.0, expected: 1}, 27 | {ST: false, NCPUs: -1, NCPUsScale: 1.0, expected: nThreads}, 28 | {ST: false, NCPUs: 2, NCPUsScale: 1.0, expected: 2}, 29 | {ST: true, NCPUs: 0, NCPUsScale: 1.0, expected: 1}, 30 | {ST: true, NCPUs: 1, NCPUsScale: 1.0, expected: 1}, 31 | {ST: true, NCPUs: -1, NCPUsScale: 1.0, expected: 1}, 32 | {ST: true, NCPUs: 2, NCPUsScale: 1.0, expected: 1}, 33 | {ST: false, NCPUs: 2, NCPUsScale: 1.0, expected: 2}, 34 | {ST: false, NCPUs: nThreads + 1, NCPUsScale: 1.0, expected: nThreads}, 35 | {ST: true, NCPUs: nThreads + 1, NCPUsScale: 1.0, expected: 1}, 36 | {ST: false, NCPUs: 0, NCPUsScale: 2.0, expected: nThreads * 2}, 37 | {ST: false, NCPUs: 1, NCPUsScale: 2.0, expected: 1}, 38 | {ST: false, NCPUs: -1, NCPUsScale: 2.0, expected: nThreads * 2}, 39 | {ST: false, NCPUs: 2, NCPUsScale: 2.0, expected: 2}, 40 | {ST: true, NCPUs: 0, NCPUsScale: 2.0, expected: 1}, 41 | {ST: true, NCPUs: 1, NCPUsScale: 2.0, expected: 1}, 42 | {ST: true, NCPUs: -1, NCPUsScale: 2.0, expected: 1}, 43 | {ST: false, NCPUs: 2, NCPUsScale: 2.0, expected: 2}, 44 | {ST: false, NCPUs: nThreads + 1, NCPUsScale: 2.0, expected: nThreads + 1}, 45 | {ST: true, NCPUs: 2, NCPUsScale: 2.0, expected: 1}, 46 | {ST: true, NCPUs: nThreads + 1, NCPUsScale: 2.0, expected: 1}, 47 | } 48 | // Execute test cases 49 | for index, test := range testCases { 50 | ctx.ST = test.ST 51 | ctx.NCPUs = test.NCPUs 52 | ctx.NCPUsScale = test.NCPUsScale 53 | expected := test.expected 54 | got := GetThreadsNum(&ctx) 55 | if got != expected { 56 | t.Errorf( 57 | "test number %d, expected to return %d threads, got %d (default is %d on this machine)", 58 | index+1, expected, got, nThreads, 59 | ) 60 | } 61 | ResetThreadsNum(&ctx) 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /time_test.go: -------------------------------------------------------------------------------- 1 | package dads 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestParseDateWithTz(t *testing.T) { 8 | var testCases = []struct { 9 | input string 10 | expectedStr string 11 | expectedTz float64 12 | expectedValid bool 13 | }{ 14 | {input: "Mon, 30 Lut 2019 15:15:39 +0000", expectedStr: "", expectedValid: false, expectedTz: 0.0}, 15 | {input: "Mon, 30 Sep 2019 15:15:39 +0300", expectedStr: "2019-09-30T12:15:39Z", expectedValid: true, expectedTz: 3.0}, 16 | {input: "TUE, 1 oCt 2019 15:15:39 -1200", expectedStr: "2019-10-02T03:15:39Z", expectedValid: true, expectedTz: -12.0}, 17 | {input: "TUE, 1 oCt 2019 15:15:39 -1200", expectedStr: "2019-10-02T03:15:39Z", expectedValid: true, expectedTz: -12.0}, 18 | {input: "23 Dec 2013 14:51:30 gmt", expectedStr: "2013-12-23T14:51:30Z", expectedValid: true, expectedTz: 0.0}, 19 | {input: "> Tue, 02 Jul 2013 02:28:30 GMT", expectedStr: "2013-07-02T02:28:30Z", expectedValid: true, expectedTz: 0.0}, 20 | {input: "2017-04-03 09:52:03 -0700", expectedStr: "2017-04-03T16:52:03Z", expectedValid: true, expectedTz: -7.0}, 21 | {input: "2017-11-19 09:52:03 -1000", expectedStr: "2017-11-19T19:52:03Z", expectedValid: true, expectedTz: -10.0}, 22 | {input: ">>\t Wed, 29 Jan \t 2003 16:55\t +0200 (Pacific Standard Time)", expectedStr: "2003-01-29T14:55:00Z", expectedValid: true, expectedTz: 2.0}, 23 | {input: "Wed Nov 6 09:24:41 2019", expectedStr: "2019-11-06T09:24:41Z", expectedValid: true, expectedTz: 0.0}, 24 | {input: "> Wed Nov 06 09:24:41 19", expectedStr: "2019-11-06T09:24:41Z", expectedValid: true, expectedTz: 0.0}, 25 | {input: "Wed Nov 06 09:24 19", expectedStr: "2019-11-06T09:24:00Z", expectedValid: true, expectedTz: 0.0}, 26 | {input: "30 Sep 19\t15:15", expectedStr: "2019-09-30T15:15:00Z", expectedValid: true, expectedTz: 0.0}, 27 | {input: "2017-11-19T09:52:03", expectedStr: "2017-11-19T09:52:03Z", expectedValid: true, expectedTz: 0.0}, 28 | {input: "2017-11-19T09:52:03Z", expectedStr: "2017-11-19T09:52:03Z", expectedValid: true, expectedTz: 0.0}, 29 | {input: "2017-11-19\t09:52:03Z", expectedStr: "2017-11-19T09:52:03Z", expectedValid: true, expectedTz: 0.0}, 30 | {input: "Fri, 12 February 2016 14:53:49 +0900", expectedStr: "2016-02-12T05:53:49Z", expectedValid: true, expectedTz: 9.0}, 31 | {input: "Fri, 12 February 2016 14:53:49 +0430", expectedStr: "2016-02-12T10:23:49Z", expectedValid: true, expectedTz: 4.5}, 32 | {input: "Fri, 12 February 2016 14:53:49 +0430", expectedStr: "2016-02-12T10:23:49Z", expectedValid: true, expectedTz: 4.5}, 33 | {input: "Wed Dec 5 06:04:38 2018 -1000", expectedStr: "2018-12-05T16:04:38Z", expectedValid: true, expectedTz: -10.0}, 34 | {input: "Fri, 12 February 2016 14:53:49 +1130", expectedStr: "2016-02-12T03:23:49Z", expectedValid: true, expectedTz: 11.5}, 35 | {input: "Fri, 12 February 2016 14:53:49 +1200", expectedStr: "2016-02-12T02:53:49Z", expectedValid: true, expectedTz: 12.0}, 36 | {input: "Fri, 12 February 2016 14:53:49 -0600", expectedStr: "2016-02-12T20:53:49Z", expectedValid: true, expectedTz: -6.0}, 37 | {input: "Fri, 12 February 2016 14:53:49 -0030", expectedStr: "2016-02-12T15:23:49Z", expectedValid: true, expectedTz: -0.5}, 38 | {input: "Fri, 12 February 2016 14:53:49 -1030", expectedStr: "2016-02-13T01:23:49Z", expectedValid: true, expectedTz: -10.5}, 39 | {input: "Fri, 12 February 2016 14:53:49 -1200", expectedStr: "2016-02-13T02:53:49Z", expectedValid: true, expectedTz: -12.0}, 40 | {input: "2013-07-02 02:28:30 +0000 UTC", expectedStr: "2013-07-02T02:28:30Z", expectedValid: true, expectedTz: 0.0}, 41 | } 42 | // Execute test cases 43 | for index, test := range testCases { 44 | gotDt, _, gotTz, gotValid := ParseDateWithTz(test.input) 45 | if gotValid != test.expectedValid { 46 | t.Errorf("test number %d, expected '%s' validation result %v, got %v", index+1, test.input, test.expectedValid, gotValid) 47 | } else { 48 | gotStr := "" 49 | if gotValid { 50 | gotStr = ToYMDTHMSZDate(gotDt) 51 | } 52 | if gotStr != test.expectedStr { 53 | t.Errorf("test number %d, expected '%s' to parse to '%s'/%.1f, got '%s'/%.1f", index+1, test.input, test.expectedStr, test.expectedTz, gotStr, gotTz) 54 | } 55 | if gotTz != test.expectedTz { 56 | t.Errorf("test number %d, expected '%s' to parse to '%s'/%.1f, got '%s'/%.1f", index+1, test.input, test.expectedStr, test.expectedTz, gotStr, gotTz) 57 | } 58 | } 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /token.go: -------------------------------------------------------------------------------- 1 | package dads 2 | 3 | import ( 4 | "encoding/base64" 5 | "fmt" 6 | "os" 7 | "strconv" 8 | "sync" 9 | "time" 10 | 11 | "github.com/LF-Engineering/da-ds/build" 12 | 13 | "github.com/LF-Engineering/dev-analytics-libraries/auth0" 14 | "github.com/LF-Engineering/dev-analytics-libraries/elastic" 15 | "github.com/LF-Engineering/dev-analytics-libraries/http" 16 | "github.com/LF-Engineering/dev-analytics-libraries/slack" 17 | jsoniter "github.com/json-iterator/go" 18 | ) 19 | 20 | var ( 21 | gAuth0Client *auth0.ClientProvider 22 | gTokenEnv string 23 | gTokenEnvMtx *sync.Mutex 24 | ) 25 | 26 | // InitializeAuth0 - initializes Auth0 client using data stored in AUTH0_DATA 27 | func InitializeAuth0() error { 28 | var err error 29 | auth0DataB64 := os.Getenv("AUTH0_DATA") 30 | if auth0DataB64 == "" { 31 | return fmt.Errorf("you must specify AUTH0_DATA (so the program can generate an API token) or specify token with JWT_TOKEN") 32 | } 33 | var auth0Data []byte 34 | auth0Data, err = base64.StdEncoding.DecodeString(auth0DataB64) 35 | if err != nil { 36 | Printf("decode base64 error: %+v\n", err) 37 | return err 38 | } 39 | //fmt.Printf("auth0Data: %v\n", auth0Data) 40 | var data map[string]string 41 | err = jsoniter.Unmarshal([]byte(auth0Data), &data) 42 | if err != nil { 43 | Printf("unmarshal error: %+v\n", err) 44 | return err 45 | } 46 | AddRedacted(data["es_url"], true) 47 | AddRedacted(data["es_user"], true) 48 | AddRedacted(data["es_pass"], true) 49 | AddRedacted(data["client_id"], true) 50 | AddRedacted(data["client_secret"], true) 51 | AddRedacted(data["audience"], true) 52 | AddRedacted(data["url"], true) 53 | AddRedacted(data["slack_webhook_url"], true) 54 | // Providers 55 | httpClientProvider := http.NewClientProvider(60 * time.Second) 56 | esCacheClientProvider, err := elastic.NewClientProvider( 57 | &elastic.Params{ 58 | URL: data["es_url"], 59 | Username: data["es_user"], 60 | Password: data["es_pass"], 61 | }) 62 | if err != nil { 63 | Printf("ES client provider error: %+v\n", err) 64 | return err 65 | } 66 | appName := build.AppName 67 | ds := os.Getenv("DA_DS") 68 | if ds != "" { 69 | appName += "-" + ds 70 | } 71 | appNameVersion := fmt.Sprintf("%s-%v", appName, strconv.FormatInt(time.Now().Unix(), 10)) 72 | slackProvider := slack.New(data["slack_webhook_url"]) 73 | gAuth0Client, err = auth0.NewAuth0Client( 74 | data["env"], 75 | data["grant_type"], 76 | data["client_id"], 77 | data["client_secret"], 78 | data["audience"], 79 | data["url"], 80 | httpClientProvider, 81 | esCacheClientProvider, 82 | &slackProvider, 83 | appNameVersion, 84 | ) 85 | if err == nil { 86 | gTokenEnv = data["env"] 87 | } 88 | Printf("initialized %s auth0\n", gTokenEnv) 89 | return err 90 | } 91 | 92 | // GetAPIToken - return an API token to use dev-analytics-api API calls 93 | // If JWT_TOKEN env is specified - just use the provided token without any checks 94 | // Else get auth0 data from AUTH0_DATA and generate/reuse a token stored in ES cache 95 | func GetAPIToken() (string, error) { 96 | envToken := os.Getenv("JWT_TOKEN") 97 | if envToken != "" { 98 | return envToken, nil 99 | } 100 | if gTokenEnvMtx != nil { 101 | gTokenEnvMtx.Lock() 102 | defer gTokenEnvMtx.Unlock() 103 | } 104 | if gTokenEnv == "" { 105 | err := InitializeAuth0() 106 | if err != nil { 107 | return "", err 108 | } 109 | } 110 | token, err := gAuth0Client.GetToken() 111 | return token, err 112 | } 113 | -------------------------------------------------------------------------------- /util/dto.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | // ElasticResponse ... 4 | type ElasticResponse struct { 5 | Took int 6 | Errors bool 7 | Items []ElasticResponseItem 8 | } 9 | 10 | // ElasticResponseItem ... 11 | type ElasticResponseItem struct { 12 | Index ESResponseIndex 13 | } 14 | 15 | // ESResponseIndex ... 16 | type ESResponseIndex struct { 17 | ID string `json:"_id"` 18 | Status int 19 | } 20 | 21 | // EnrollmentOrgs ... 22 | type EnrollmentOrgs struct { 23 | Org string 24 | Orgs []string 25 | } 26 | -------------------------------------------------------------------------------- /util/failureHandler.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | b64 "encoding/base64" 5 | "fmt" 6 | 7 | "github.com/LF-Engineering/dev-analytics-libraries/elastic" 8 | jsoniter "github.com/json-iterator/go" 9 | ) 10 | 11 | // HTTPClientProvider used in connecting to remote http server 12 | type HTTPClientProvider interface { 13 | Request(url string, method string, header map[string]string, body []byte, params map[string]string) (statusCode int, resBody []byte, err error) 14 | } 15 | 16 | // Auth0Client ... 17 | type Auth0Client interface { 18 | GetToken() (string, error) 19 | } 20 | 21 | // HandleGapData ... 22 | func HandleGapData(gapURL string, HTTPRequest HTTPClientProvider, data []elastic.BulkData, auth0Client Auth0Client, env string) error { 23 | 24 | token, err := auth0Client.GetToken() 25 | if err != nil { 26 | return err 27 | } 28 | byteData, err := jsoniter.Marshal(data) 29 | if err != nil { 30 | return err 31 | } 32 | dataEnc := b64.StdEncoding.EncodeToString(byteData) 33 | gapBody := map[string]map[string]string{"index": {"content": dataEnc}} 34 | bData, err := jsoniter.Marshal(gapBody) 35 | if err != nil { 36 | return err 37 | } 38 | header := make(map[string]string) 39 | header["Authorization"] = fmt.Sprintf("Bearer %s", token) 40 | 41 | if gapURL != "" { 42 | _, _, err = HTTPRequest.Request(gapURL, "POST", header, bData, nil) 43 | if err != nil { 44 | return err 45 | } 46 | } 47 | return nil 48 | } 49 | 50 | // HandleFailedData ... 51 | func HandleFailedData(data []elastic.BulkData, byteResponse []byte) (failedIndexes []elastic.BulkData, err error) { 52 | var esRes ElasticResponse 53 | err = jsoniter.Unmarshal(byteResponse, &esRes) 54 | if err != nil { 55 | return failedIndexes, err 56 | } 57 | 58 | // loop throw elastic response to get failed indexes 59 | for _, item := range esRes.Items { 60 | if item.Index.Status != 200 { 61 | var singleBulk elastic.BulkData 62 | // loop throw real data to get failed ones 63 | for _, el := range data { 64 | if el.ID == item.Index.ID { 65 | singleBulk = el 66 | break 67 | } 68 | } 69 | failedIndexes = append(failedIndexes, singleBulk) 70 | } 71 | } 72 | return failedIndexes, nil 73 | } 74 | -------------------------------------------------------------------------------- /util/helper.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "net" 7 | "regexp" 8 | "strings" 9 | "time" 10 | ) 11 | 12 | var emailRegex = regexp.MustCompile(`^[a-z0-9._%+\-]+@[a-z0-9.\-]+\.[a-z]{2,4}$`) 13 | 14 | // IsEmailValid validate email address 15 | func IsEmailValid(e string) bool { 16 | if len(e) < 3 && len(e) > 254 { 17 | return false 18 | } 19 | 20 | if !emailRegex.MatchString(e) { 21 | return false 22 | } 23 | 24 | parts := strings.Split(e, "@") 25 | mx, err := net.LookupMX(parts[1]) 26 | if err != nil || len(mx) == 0 { 27 | return false 28 | } 29 | 30 | return true 31 | } 32 | 33 | // GetEnrollments get identity single and multi organization 34 | func GetEnrollments(auth0ClientProvider Auth0Client, httpClientProvider HTTPClientProvider, AffBaseURL string, projectSlug string, uuid string, date time.Time) (string, []string, error) { 35 | // space in projectSlug is to handle empty slug which encounter invalid url, it is optional and whatever you send it will return the same enrollment result 36 | if projectSlug == "" { 37 | projectSlug = " " 38 | } 39 | URL := fmt.Sprintf("%s/affiliation/%s/both/%s/%s", AffBaseURL, projectSlug, uuid, date.Format("2006-02-01 15:04:05")) 40 | token, err := auth0ClientProvider.GetToken() 41 | if err != nil { 42 | return "", []string{}, err 43 | } 44 | 45 | headers := make(map[string]string) 46 | headers["Authorization"] = "Bearer " + token 47 | 48 | _, body, err := httpClientProvider.Request(URL, "GET", headers, nil, nil) 49 | if err != nil { 50 | return "", []string{}, err 51 | } 52 | 53 | var res EnrollmentOrgs 54 | err = json.Unmarshal(body, &res) 55 | if err != nil { 56 | return "", []string{}, err 57 | } 58 | return res.Org, res.Orgs, nil 59 | 60 | } 61 | -------------------------------------------------------------------------------- /uuid.go: -------------------------------------------------------------------------------- 1 | package dads 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | "sync" 7 | 8 | "github.com/LF-Engineering/dev-analytics-libraries/uuid" 9 | ) 10 | 11 | var ( 12 | // uuidsNonEmptyCache caches UUIDNonEmpty calls 13 | uuidsNonEmptyCache = map[string]string{} 14 | uuidsNonEmptyCacheMtx *sync.RWMutex 15 | // uuidsAffsCache caches UUIDAffs calls 16 | uuidsAffsCache = map[string]string{} 17 | uuidsAffsCacheMtx *sync.RWMutex 18 | ) 19 | 20 | // ResetUUIDCache - resets cache 21 | func ResetUUIDCache() { 22 | uuidsNonEmptyCache = map[string]string{} 23 | uuidsAffsCache = map[string]string{} 24 | } 25 | 26 | // UUIDNonEmpty - generate UUID of string args (all must be non-empty) 27 | // uses internal cache 28 | // used to generate document UUID's 29 | func UUIDNonEmpty(ctx *Ctx, args ...string) (h string) { 30 | k := strings.Join(args, ":") 31 | if MT { 32 | uuidsNonEmptyCacheMtx.RLock() 33 | } 34 | h, ok := uuidsNonEmptyCache[k] 35 | if MT { 36 | uuidsNonEmptyCacheMtx.RUnlock() 37 | } 38 | if ok { 39 | return 40 | } 41 | if ctx.Debug > 1 { 42 | defer func() { 43 | Printf("UUIDNonEmpty(%v) --> %s\n", args, h) 44 | }() 45 | } 46 | defer func() { 47 | if MT { 48 | uuidsNonEmptyCacheMtx.Lock() 49 | } 50 | uuidsNonEmptyCache[k] = h 51 | if MT { 52 | uuidsNonEmptyCacheMtx.Unlock() 53 | } 54 | }() 55 | if ctx.LegacyUUID { 56 | var err error 57 | cmdLine := []string{"uuid.py", "a"} 58 | cmdLine = append(cmdLine, args...) 59 | h, _, err = ExecCommand(ctx, cmdLine, "", nil) 60 | FatalOnError(err) 61 | h = h[:len(h)-1] 62 | return 63 | } 64 | var err error 65 | h, err = uuid.Generate(args...) 66 | if err != nil { 67 | Printf("UUIDNonEmpty error for: %+v\n", args) 68 | h = "" 69 | } 70 | return 71 | } 72 | 73 | // UUIDAffs - generate UUID of string args 74 | // uses internal cache 75 | // downcases arguments, all but first can be empty 76 | func UUIDAffs(ctx *Ctx, args ...string) (h string) { 77 | k := strings.Join(args, ":") 78 | if MT { 79 | uuidsAffsCacheMtx.RLock() 80 | } 81 | h, ok := uuidsAffsCache[k] 82 | if MT { 83 | uuidsAffsCacheMtx.RUnlock() 84 | } 85 | if ok { 86 | return 87 | } 88 | if ctx.Debug > 1 { 89 | defer func() { 90 | Printf("UUIDAffs(%v) --> %s\n", args, h) 91 | }() 92 | } 93 | defer func() { 94 | if MT { 95 | uuidsAffsCacheMtx.Lock() 96 | } 97 | uuidsAffsCache[k] = h 98 | if MT { 99 | uuidsAffsCacheMtx.Unlock() 100 | } 101 | }() 102 | if ctx.LegacyUUID { 103 | var err error 104 | cmdLine := []string{"uuid.py", "u"} 105 | cmdLine = append(cmdLine, args...) 106 | h, _, err = ExecCommand(ctx, cmdLine, "", nil) 107 | FatalOnError(err) 108 | h = h[:len(h)-1] 109 | return 110 | } 111 | var err error 112 | if len(args) != 4 { 113 | err = fmt.Errorf("GenerateIdentity requires exactly 4 asrguments, got %+v", args) 114 | } else { 115 | h, err = uuid.GenerateIdentity(&args[0], &args[1], &args[2], &args[3]) 116 | } 117 | if err != nil { 118 | Printf("UUIDAffs error for: %+v\n", args) 119 | h = "" 120 | } 121 | return 122 | } 123 | -------------------------------------------------------------------------------- /uuid.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | #cython: language_level=3 4 | 5 | from hashlib import sha1 6 | from unicodedata import normalize, category 7 | from sys import argv 8 | 9 | def to_unicode(x, unaccent=False): 10 | """Convert a string to unicode""" 11 | s = str(x) 12 | if x == '': 13 | s = 'None' 14 | if unaccent: 15 | cs = [c for c in normalize('NFD', s) 16 | if category(c) != 'Mn'] 17 | s = ''.join(cs) 18 | return s 19 | 20 | def uuida(args): 21 | def check_value(v): 22 | if not isinstance(v, str): 23 | raise ValueError("%s value is not a string instance" % str(v)) 24 | elif not v: 25 | raise ValueError("value cannot be None or empty") 26 | else: 27 | return v 28 | s = ':'.join(map(check_value, args)) 29 | # print(s.encode('utf-8', errors="surrogateescape")) 30 | sha = sha1(s.encode('utf-8', errors='surrogateescape')) 31 | uuid_sha = sha.hexdigest() 32 | return uuid_sha 33 | 34 | def uuid(source, email=None, name=None, username=None): 35 | if source is None: 36 | raise ValueError("source cannot be None") 37 | if source == '': 38 | raise ValueError("source cannot be an empty string") 39 | if not (email or name or username): 40 | raise ValueError("identity data cannot be None or empty") 41 | s = ':'.join((to_unicode(source), 42 | to_unicode(email), 43 | to_unicode(name, unaccent=True), 44 | to_unicode(username))).lower() 45 | # print(s.encode('UTF-8', errors="surrogateescape")) 46 | sha = sha1(s.encode('UTF-8', errors="surrogateescape")) 47 | uuid_ = sha.hexdigest() 48 | return uuid_ 49 | 50 | if argv[1] == 'a': 51 | print(uuida(argv[2:])) 52 | else: 53 | print(uuid(argv[2], argv[3], argv[4], argv[5])) 54 | -------------------------------------------------------------------------------- /uuid/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.6 2 | WORKDIR / 3 | RUN curl https://www.python.org/ftp/python/3.6.1/Python-3.6.1.tar.xz | tar -xJ 4 | WORKDIR Python-3.6.1 5 | RUN ./configure --prefix=/var/lang 6 | RUN make -j$(getconf _NPROCESSORS_ONLN) libinstall inclinstall 7 | WORKDIR /src 8 | RUN rm -rf /Python-3.6.1 9 | RUN pip install patchelf-wrapper 10 | RUN pip3 install staticx 11 | ADD . /src 12 | RUN pip install -r requirements.txt 13 | -------------------------------------------------------------------------------- /uuid/README.md: -------------------------------------------------------------------------------- 1 | # Cython image 2 | 3 | Create a Cython image needed to compile pythond code from uuid.py to a static binary: `[DOCKER_USER=...] ./build.sh` 4 | 5 | # Compilation 6 | 7 | Do the actual compilation: `[DOCKER_USER=...] ./compile.sh`. 8 | 9 | # TODO 10 | 11 | The final binary *is* indeed static, but seems to be broken using `alpine` image, so this needs more investigation. 12 | 13 | This is probably due to missing Python data files, that are needed by this static binary. 14 | -------------------------------------------------------------------------------- /uuid/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | if [ -z "$DOCKER_USER" ] 3 | then 4 | DOCKER_USER=`docker info 2>/dev/null | grep User | awk '{print $2}'` 5 | fi 6 | if [ -z "$DOCKER_USER" ] 7 | then 8 | echo "$0: cannot detect your docker user, specify one with DOCKER_USER=..." 9 | exit 1 10 | fi 11 | echo "Building as $DOCKER_USER" 12 | docker build -f ./Dockerfile -t "${DOCKER_USER}/cython" . 13 | -------------------------------------------------------------------------------- /uuid/compile.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | if [ -z "$DOCKER_USER" ] 3 | then 4 | DOCKER_USER=`docker info 2>/dev/null | grep User | awk '{print $2}'` 5 | fi 6 | if [ -z "$DOCKER_USER" ] 7 | then 8 | echo "$0: cannot detect your docker user, specify one with DOCKER_USER=..." 9 | exit 1 10 | fi 11 | echo "Compiling as $DOCKER_USER" 12 | docker run -it -v "$(pwd):/src/" "${DOCKER_USER}/cython" ./compile_cython.sh 13 | -------------------------------------------------------------------------------- /uuid/compile_cython.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | rm uuid.c uuid duuid 2>/dev/null 3 | cython uuid.pyx --embed && gcc -Os -I/var/lang/include/python3.6m -o duuid uuid.c -lpython3.6m -lpthread -lm -lutil -ldl && staticx --strip duuid uuid && strip -s uuid && echo 'OK' 4 | rm uuid.c 2>/dev/null 5 | -------------------------------------------------------------------------------- /uuid/requirements.txt: -------------------------------------------------------------------------------- 1 | Cython==0.29 2 | -------------------------------------------------------------------------------- /uuid/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | if [ -z "$DOCKER_USER" ] 3 | then 4 | DOCKER_USER=`docker info 2>/dev/null | grep User | awk '{print $2}'` 5 | fi 6 | if [ -z "$DOCKER_USER" ] 7 | then 8 | echo "$0: cannot detect your docker user, specify one with DOCKER_USER=..." 9 | exit 1 10 | fi 11 | echo "Running as $DOCKER_USER" 12 | docker run -it -v "$(pwd):/src/" "${DOCKER_USER}/cython" /bin/bash 13 | -------------------------------------------------------------------------------- /uuid/uuid.pyx: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | #cython: language_level=3 4 | 5 | from hashlib import sha1 6 | from unicodedata import normalize, category 7 | from sys import argv 8 | 9 | def to_unicode(x, unaccent=False): 10 | """Convert a string to unicode""" 11 | s = str(x) 12 | if x == '': 13 | s = 'None' 14 | if unaccent: 15 | cs = [c for c in normalize('NFD', s) 16 | if category(c) != 'Mn'] 17 | s = ''.join(cs) 18 | return s 19 | 20 | def uuida(args): 21 | def check_value(v): 22 | if not isinstance(v, str): 23 | raise ValueError("%s value is not a string instance" % str(v)) 24 | elif not v: 25 | raise ValueError("value cannot be None or empty") 26 | else: 27 | return v 28 | s = ':'.join(map(check_value, args)) 29 | sha = sha1(s.encode('utf-8', errors='surrogateescape')) 30 | uuid_sha = sha.hexdigest() 31 | return uuid_sha 32 | 33 | def uuid(source, email=None, name=None, username=None): 34 | if source is None: 35 | raise ValueError("source cannot be None") 36 | if source == '': 37 | raise ValueError("source cannot be an empty string") 38 | if not (email or name or username): 39 | raise ValueError("identity data cannot be None or empty") 40 | s = ':'.join((to_unicode(source), 41 | to_unicode(email), 42 | to_unicode(name, unaccent=True), 43 | to_unicode(username))).lower() 44 | sha = sha1(s.encode('UTF-8', errors="surrogateescape")) 45 | uuid_ = sha.hexdigest() 46 | return uuid_ 47 | 48 | if argv[1] == 'a': 49 | print(uuida(argv[2:])) 50 | else: 51 | print(uuid(argv[2], argv[3], argv[4], argv[5])) 52 | --------------------------------------------------------------------------------