├── .gitignore
├── .travis.yml
├── LICENSE
├── README.md
├── cmd
    ├── escp
    │   └── main.go
    ├── esdiff
    │   └── main.go
    └── estail
    │   ├── .gitignore
    │   ├── README.md
    │   └── estail.go
├── esbulk
    ├── bulktypes.go
    └── esbulk.go
├── esdiff
    └── esdiff.go
├── esindex
    ├── doc.go
    ├── esindex.go
    └── optimize.go
├── esscroll
    └── esscroll.go
├── estypes
    ├── doc.go
    └── estypes.go
├── jobs
    ├── escopyjob.go
    └── validationjob.go
└── logging
    ├── logging.go
    └── shared.go


/.gitignore:
--------------------------------------------------------------------------------
1 | cmd/escp/escp
2 | cmd/esdiff/esdiff
3 | 
4 | .*.swp
5 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: go
2 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2018 Lytics Inc
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Elasticsearch Copier
 2 | 
 3 | Toolkit for copying and validating Elasticsearch indexes.
 4 | 
 5 | * `escp` copies an index
 6 | * `esdiff` compares documents in two indexes; intended for validating copies
 7 | 
 8 | ## Usage
 9 | ```sh
10 | # Install all utilities with go get:
11 | go get -v github.com/lytics/escp/...
12 | ```
13 | 
14 | ```sh
15 | # Copy srcindex on host1 to dstindex on host2,host3
16 | escp http://host1:9200/ srcindex host2:9200,host3:9200 dstindex
17 | ```
18 | 
19 | ```sh
20 | # Check document counts are equal and spot check documents
21 | esdiff http://host1:9200/ srcindex http://host2:9200/dstindex
22 | 
23 | # Check 25% of documents
24 | esdiff -d 4 http://host1:9200/ srcindex http://host2:9200 dstindex
25 | 
26 | # Check all documents
27 | esdiff -d 1 http://host1:9200/ srcindex http://host2:9200 dstindex
28 | ```
29 | 
30 | Other Tools
31 | -------------------------------
32 | * https://github.com/taskrabbit/elasticsearch-dump
33 | * https://github.com/mallocator/Elasticsearch-Exporter
34 | * http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/modules-snapshots.html
35 | 


--------------------------------------------------------------------------------
/cmd/escp/main.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"flag"
  6 | 	"fmt"
  7 | 	"os"
  8 | 	"strings"
  9 | 	"time"
 10 | 
 11 | 	"net/url"
 12 | 
 13 | 	"github.com/lytics/escp/jobs"
 14 | 	log "github.com/lytics/escp/logging"
 15 | )
 16 | 
 17 | func main() {
 18 | 	logger := log.NewStdLogger(true, log.DEBUG, "")
 19 | 
 20 | 	flag.Usage = func() {
 21 | 		fmt.Fprintf(os.Stderr, "Usage of %s http://SRCHOST1:9200 INDEX1 DESHOST2:9200,DESHOST3:9200,DESHOST4:9200 INDEX2\n", os.Args[0])
 22 | 		flag.PrintDefaults()
 23 | 	}
 24 | 
 25 | 	// Index creation settings
 26 | 	shards := 0
 27 | 	flag.IntVar(&shards, "shards", shards, "number of shards target index will have (default = same as old index)")
 28 | 	skipcreate := false
 29 | 	flag.BoolVar(&skipcreate, "skipcreate", skipcreate, "skip destination index creation")
 30 | 
 31 | 	// Tunables
 32 | 	scrolltimeout := 15 * time.Minute
 33 | 	flag.DurationVar(&scrolltimeout, "scrolltime", scrolltimeout, "time to keep scroll alive between requests")
 34 | 	scrollpage := 1000
 35 | 	flag.IntVar(&scrollpage, "scrollpage", scrollpage, "size of scroll pages (will actually be per source shard)")
 36 | 	scrolldocs := 5000
 37 | 	flag.IntVar(&scrolldocs, "scrolldocs", scrolldocs, "number of `docs` to buffer in memory from scroll")
 38 | 	bulksz := 128
 39 | 	flag.IntVar(&bulksz, "bulksz", bulksz, "size of bulk upload buffer in `KB`")
 40 | 
 41 | 	bulkpar := 0
 42 | 	flag.IntVar(&bulkpar, "bulkpar", bulkpar, "number of parallel bulk upload buffers to use; 0 = len(hosts)*2")
 43 | 
 44 | 	delayrefresh := true
 45 | 	flag.BoolVar(&delayrefresh, "delayrefresh", delayrefresh, "delay refresh until bulk indexing is complete")
 46 | 	delayreplicaton := false
 47 | 	flag.BoolVar(&delayreplicaton, "delayreplicaton", delayreplicaton, "delay replicaiton until bulk indexing is complete.  requires --replicationfactor=n")
 48 | 	replicationfactor := 1
 49 | 	flag.IntVar(&replicationfactor, "replicationfactor", replicationfactor, "if delayreplication is set the replicaiton setting will be set to this after coping.")
 50 | 
 51 | 	refreshint := time.Duration(0)
 52 | 	flag.DurationVar(&refreshint, "refreshint", refreshint, "if indexing is delayed, what to set the refresh interval to after copy; defaults to old index's setting or 1s")
 53 | 	maxsegs := 5
 54 | 	flag.IntVar(&maxsegs, "maxsegs", maxsegs, "if indexing is delayed, the max number of segments for the optimized index")
 55 | 	createdelay := time.Second
 56 | 	flag.DurationVar(&createdelay, "createdelay", createdelay, "time to sleep after index creation to let cluster go green")
 57 | 
 58 | 	logevery := 10 * time.Minute
 59 | 	flag.DurationVar(&logevery, "logevery", logevery, "rate at which to log progress metrics.")
 60 | 
 61 | 	flag.Parse()
 62 | 
 63 | 	bulksz = bulksz * 1024 //convert to KBs
 64 | 
 65 | 	if flag.NArg() != 4 {
 66 | 		logger.Errorf("expected 4 arguments, found %d\n", flag.NArg())
 67 | 		flag.Usage()
 68 | 		os.Exit(1)
 69 | 	}
 70 | 	if shards > 0 && skipcreate {
 71 | 		logger.Errorf("cannot set shards and skip index creation")
 72 | 		flag.Usage()
 73 | 		os.Exit(1)
 74 | 	}
 75 | 
 76 | 	src, err := jobs.ParseUrl(flag.Arg(0))
 77 | 	if err != nil {
 78 | 		logger.Errorf("error parsing url:%v err:%v", flag.Arg(0), err)
 79 | 		os.Exit(1)
 80 | 	}
 81 | 	srcIdx := flag.Arg(1)
 82 | 	if strings.HasSuffix(srcIdx, "/") {
 83 | 		srcIdx = srcIdx[:len(srcIdx)-1]
 84 | 	}
 85 | 
 86 | 	dstsR := strings.Split(flag.Arg(2), ",")
 87 | 	if len(dstsR) < 1 {
 88 | 		logger.Errorf("need at least one destination host")
 89 | 		flag.Usage()
 90 | 		os.Exit(1)
 91 | 	}
 92 | 	dsts := []*url.URL{}
 93 | 	for _, u := range dstsR {
 94 | 		d, err := jobs.ParseUrl(u)
 95 | 		if err != nil {
 96 | 			logger.Errorf("error parsing url:%v err:%v", u, err)
 97 | 			os.Exit(1)
 98 | 		}
 99 | 		dsts = append(dsts, d)
100 | 	}
101 | 	desidx := flag.Arg(3)
102 | 
103 | 	if bulkpar == 0 {
104 | 		bulkpar = len(dsts) * 2
105 | 	}
106 | 
107 | 	srcC := &jobs.SourceConfig{
108 | 		IndexName:     srcIdx,
109 | 		Host:          src,
110 | 		ScrollTimeout: scrolltimeout,
111 | 		ScrollPage:    scrollpage,
112 | 		ScrollDocs:    scrolldocs,
113 | 		Filter:        nil,
114 | 	}
115 | 	desC := &jobs.DesConfig{
116 | 		IndexName:         desidx,
117 | 		Hosts:             dsts,
118 | 		CreateDelay:       createdelay,
119 | 		RefreshInt:        refreshint,
120 | 		Shards:            shards,
121 | 		DelayRefresh:      delayrefresh,
122 | 		SkipCreate:        skipcreate,
123 | 		DelayReplicaton:   delayreplicaton,
124 | 		ReplicationFactor: replicationfactor,
125 | 		MaxSeg:            maxsegs,
126 | 		BulkSize:          bulksz,
127 | 		NumWorkers:        bulkpar,
128 | 	}
129 | 
130 | 	if err := jobs.Copy(context.Background(), srcC, desC, logger, logevery); err != nil {
131 | 		logger.Errorf("%v", err)
132 | 		os.Exit(1)
133 | 	}
134 | 
135 | }
136 | 


--------------------------------------------------------------------------------
/cmd/esdiff/main.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"flag"
 6 | 	"fmt"
 7 | 	"net/url"
 8 | 	"os"
 9 | 	"strings"
10 | 	"time"
11 | 
12 | 	"github.com/lytics/escp/jobs"
13 | 	log "github.com/lytics/escp/logging"
14 | )
15 | 
16 | func main() {
17 | 	logger := log.NewStdLogger(true, log.DEBUG, "")
18 | 	flag.Usage = func() {
19 | 		fmt.Fprintf(os.Stderr, "Usage of %s http://host1:9200 index1 http://host2:9200 index2\n", os.Args[0])
20 | 		flag.PrintDefaults()
21 | 	}
22 | 	timeout := "10m"
23 | 	flag.StringVar(&timeout, "timeout", timeout, "time to keep scroll cursor alive")
24 | 	pagesz := 1000
25 | 	flag.IntVar(&pagesz, "page", pagesz, "documents to retrieve at once from each shard")
26 | 	denom := 1000
27 | 	flag.IntVar(&denom, "d", denom, "1/`N` chance of each document being checked")
28 | 	force := false
29 | 	flag.BoolVar(&force, "force", force, "continue check even if document count varies")
30 | 	logevery := 10 * time.Minute
31 | 	flag.DurationVar(&logevery, "logevery", logevery, "rate at which to log progress metrics.")
32 | 
33 | 	flag.Parse()
34 | 	if flag.NArg() != 4 {
35 | 		fatalf("requires 2 arguments")
36 | 	}
37 | 
38 | 	surl, err := jobs.ParseUrl(flag.Arg(0))
39 | 	if err != nil {
40 | 		logger.Errorf("error parsing url:%v err:%v", flag.Arg(0), err)
41 | 		os.Exit(1)
42 | 	}
43 | 	srcIdx := flag.Arg(1)
44 | 	if strings.HasSuffix(srcIdx, "/") {
45 | 		srcIdx = srcIdx[:len(srcIdx)-1]
46 | 	}
47 | 
48 | 	durl, err := jobs.ParseUrl(flag.Arg(2))
49 | 	if err != nil {
50 | 		logger.Errorf("error parsing url:%v err:%v", flag.Arg(2), err)
51 | 		os.Exit(1)
52 | 	}
53 | 	dstIdx := flag.Arg(3)
54 | 	if strings.HasSuffix(dstIdx, "/") {
55 | 		dstIdx = dstIdx[:len(dstIdx)-1]
56 | 	}
57 | 
58 | 	if denom < 2 {
59 | 		denom = 1
60 | 	}
61 | 
62 | 	srcC := &jobs.SourceConfig{
63 | 		IndexName:     srcIdx,
64 | 		Host:          surl,
65 | 		ScrollTimeout: time.Minute,
66 | 		ScrollPage:    1000,
67 | 		ScrollDocs:    1,
68 | 	}
69 | 
70 | 	desC := &jobs.DesConfig{
71 | 		IndexName: dstIdx,
72 | 		Hosts:     []*url.URL{durl},
73 | 	}
74 | 
75 | 	vr, err := jobs.Validate(context.Background(), srcC, desC, denom, logger, logevery)
76 | 	//logger.Errorf("?: %v  %v", problems, err)
77 | 	if err == jobs.ErrMissMatch {
78 | 		logger.Errorf("MissMatch: %v", vr)
79 | 	} else if err != nil {
80 | 		logger.Errorf("validation failed with error:%v", err)
81 | 	} else {
82 | 		logger.Infof("results:%v", vr)
83 | 	}
84 | }
85 | 
86 | func fatalf(msg string, args ...interface{}) {
87 | 	fmt.Fprintf(os.Stderr, "fatal error: "+msg+"\n", args...)
88 | 	flag.Usage()
89 | 	os.Exit(2)
90 | }
91 | 


--------------------------------------------------------------------------------
/cmd/estail/.gitignore:
--------------------------------------------------------------------------------
 1 | # Compiled Object files, Static and Dynamic libs (Shared Objects)
 2 | *.o
 3 | *.a
 4 | *.so
 5 | 
 6 | # Folders
 7 | _obj
 8 | _test
 9 | 
10 | # Architecture specific extensions/prefixes
11 | *.[568vq]
12 | [568vq].out
13 | 
14 | *.cgo1.go
15 | *.cgo2.c
16 | _cgo_defun.c
17 | _cgo_gotypes.go
18 | _cgo_export.*
19 | 
20 | _testmain.go
21 | 
22 | # Build files
23 | *.exe
24 | *.test
25 | *.prof
26 | 
27 | *.orig
28 | *.swp
29 | 
30 | # Executables
31 | estail
32 | 


--------------------------------------------------------------------------------
/cmd/estail/README.md:
--------------------------------------------------------------------------------
 1 | # estail
 2 | 
 3 | **estail** is tool for outputing the latest data in a Elasticsearch timeseries index.  Where data is stored in mutiple indexes (one per bucket of time), for example kibana logstash indexes.  
 4 | 
 5 | Moved from https://github.com/lytics/estail (this old version only supported older versions of ES).  We worked to utilize the packages in the `escp` repo.
 6 | 
 7 | Work in Progress!  It currently works well for tailing data but doesn't suppore a `tail -f` style streaming of realtime changes.
 8 | 
 9 | ## Install
10 | 
11 | To install `estail`, run below command
12 | 
13 | ```
14 | go get -u github.com/lytics/estail
15 | ```
16 | 
17 | ## Usage
18 | 
19 | ```
20 | $ estail -h
21 | 
22 | Usage of estail:
23 |   -dur duration
24 |     	now() - dur are how many logs are pulled (default 30s)
25 |   -exclude string
26 |     	DOESNT WORK: comma separated list of field:value pairs to exclude
27 |   -host string
28 |     	host and port of elasticsearch (default "localhost:9200")
29 |   -include string
30 |     	DOESNT WORK: comma separated list of field:value pairs to include
31 |   -prefix string
32 |     	prefix of log indexes (default "logstash-2017.02.23")
33 |   -size int
34 |     	number of docs to return per polling interval (default 1000)
35 |   -ssl
36 |     	use https for URI scheme
37 |   -timestamp string
38 |     	timestap field to sort by (default "@timestamp")
39 | 
40 | ```
41 | 
42 | 


--------------------------------------------------------------------------------
/cmd/estail/estail.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"flag"
  6 | 	"fmt"
  7 | 	"os"
  8 | 	"strings"
  9 | 	"time"
 10 | 
 11 | 	"encoding/json"
 12 | 
 13 | 	"github.com/lytics/escp/esscroll"
 14 | 	log "github.com/lytics/escp/logging"
 15 | )
 16 | 
 17 | func fatalf(msg string, args ...interface{}) {
 18 | 	fmt.Printf(msg+"\n", args...)
 19 | 	os.Exit(2)
 20 | }
 21 | 
 22 | func main() {
 23 | 	host := "localhost:9200"
 24 | 	indexPrefix := "logstash-2017.02.23"
 25 | 	timeField := "@timestamp"
 26 | 	include := ""
 27 | 	exclude := ""
 28 | 	size := 1000
 29 | 	//poll := 1
 30 | 	useSSL := false
 31 | 	timeSpan := 30 * time.Second
 32 | 
 33 | 	flag.StringVar(&host, "host", host, "host and port of elasticsearch")
 34 | 	flag.StringVar(&indexPrefix, "prefix", indexPrefix, "prefix of log indexes")
 35 | 	flag.StringVar(&timeField, "timestamp", timeField, "timestap field to sort by")
 36 | 	flag.StringVar(&include, "include", include, "DOESNT WORK: comma separated list of field:value pairs to include")
 37 | 	flag.StringVar(&exclude, "exclude", exclude, "DOESNT WORK: comma separated list of field:value pairs to exclude")
 38 | 	flag.IntVar(&size, "size", size, "number of docs to return per polling interval")
 39 | 	//flag.IntVar(&poll, "poll", poll, "time in seconds to poll for new data from ES")
 40 | 	flag.BoolVar(&useSSL, "ssl", useSSL, "use https for URI scheme")
 41 | 	flag.DurationVar(&timeSpan, "dur", timeSpan, "now() - dur are how many logs are pulled")
 42 | 
 43 | 	flag.Parse()
 44 | 	flag.Usage = func() {
 45 | 		fmt.Fprintf(os.Stderr, `Usage of %s --host=localhost:9401 --dur=10s  # pull the latest 10 seconds of logs `, os.Args[0], os.Args[0])
 46 | 		flag.PrintDefaults()
 47 | 		return
 48 | 	}
 49 | 
 50 | 	logger := log.NewStdLogger(true, log.DEBUG, "")
 51 | 
 52 | 	var scheme string
 53 | 	if useSSL {
 54 | 		scheme = "https"
 55 | 	} else {
 56 | 		scheme = "http"
 57 | 	}
 58 | 	rootURL := fmt.Sprintf("%s://%s", scheme, host)
 59 | 
 60 | 	docValueFilter := map[string]interface{}{}
 61 | 	if len(include) == 0 && len(exclude) == 0 {
 62 | 		docValueFilter["match_all"] = map[string]interface{}{}
 63 | 	} else {
 64 | 		filter := map[string]interface{}{}
 65 | 		if len(include) > 0 {
 66 | 			filter["must"] = getTerms(include)
 67 | 		}
 68 | 		if len(exclude) > 0 {
 69 | 			filter["must_not"] = getTerms(exclude)
 70 | 		}
 71 | 		docValueFilter["bool"] = filter
 72 | 	}
 73 | 	timeRangeFilter := map[string]interface{}{
 74 | 		"range": map[string]interface{}{
 75 | 			timeField: map[string]interface{}{
 76 | 				"gt": time.Now().Add(-1 * timeSpan).UTC().Format(time.RFC3339Nano),
 77 | 			},
 78 | 		},
 79 | 	}
 80 | 
 81 | 	filter := map[string]interface{}{
 82 | 		"and": []interface{}{
 83 | 			timeRangeFilter,
 84 | 			docValueFilter,
 85 | 		},
 86 | 	}
 87 | 
 88 | 	scanURL := fmt.Sprintf("%s/%s", rootURL, indexPrefix)
 89 | 	// Start the scroll first to make sure the source parameter is valid
 90 | 	ess := esscroll.New(context.Background(), scanURL, time.Minute, size, 3, filter, 10*time.Minute, logger)
 91 | 	resp, err := ess.Start()
 92 | 	if err != nil {
 93 | 		fatalf("%v", err)
 94 | 	}
 95 | 
 96 | 	b, _ := json.Marshal(filter)
 97 | 	logger.Infof("Scrolling over %d documents from %v : filter:%v\n", resp.Total, rootURL, string(b))
 98 | 
 99 | 	for doc := range resp.Hits {
100 | 		b, err := doc.Source.MarshalJSON()
101 | 		if err != nil {
102 | 			fatalf("%v", err)
103 | 		}
104 | 		fmt.Printf("%s\n", string(b))
105 | 	}
106 | }
107 | 
108 | // split string and parse to terms for query filter
109 | func getTerms(args string) []map[string]interface{} {
110 | 	terms := []map[string]interface{}{}
111 | 	for k, v := range parsePairs(args) {
112 | 		terms = append(terms, map[string]interface{}{"terms": map[string]interface{}{k: v}})
113 | 	}
114 | 	return terms
115 | }
116 | 
117 | // split string and parse to key-value pairs
118 | func parsePairs(args string) map[string][]string {
119 | 	exkv := map[string][]string{}
120 | 	for _, pair := range strings.Split(args, ",") {
121 | 
122 | 		kv := strings.Split(pair, ":")
123 | 		if _, ok := exkv[kv[0]]; ok {
124 | 			exkv[kv[0]] = append(exkv[kv[0]], kv[1])
125 | 		} else {
126 | 			exkv[kv[0]] = []string{kv[1]}
127 | 		}
128 | 	}
129 | 	return exkv
130 | }
131 | 


--------------------------------------------------------------------------------
/esbulk/bulktypes.go:
--------------------------------------------------------------------------------
  1 | package esbulk
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"encoding/json"
  6 | 
  7 | 	"github.com/lytics/escp/estypes"
  8 | )
  9 | 
 10 | type BulkAction struct {
 11 | 	Index *estypes.Meta `json:"index,omitempty"`
 12 | }
 13 | 
 14 | func NewBatch() *Batch {
 15 | 	return &Batch{
 16 | 		docs: make(map[string]*estypes.Doc),
 17 | 	}
 18 | }
 19 | 
 20 | type Batch struct {
 21 | 	docs map[string]*estypes.Doc
 22 | }
 23 | 
 24 | func (b Batch) Reset() {
 25 | 	b.docs = make(map[string]*estypes.Doc)
 26 | }
 27 | 
 28 | func (b Batch) Add(id string, doc *estypes.Doc) {
 29 | 	b.docs[id] = doc
 30 | }
 31 | 
 32 | func (b Batch) Delete(id string) {
 33 | 	delete(b.docs, id)
 34 | }
 35 | 
 36 | func (b Batch) Len() int {
 37 | 	return len(b.docs)
 38 | }
 39 | 
 40 | func (b Batch) ByteLen() int {
 41 | 	totallen := 0
 42 | 	for _, bm := range b.docs {
 43 | 		totallen += len(bm.Source)
 44 | 	}
 45 | 	return totallen
 46 | }
 47 | 
 48 | func (b Batch) Encode(index string) ([]byte, error) {
 49 | 	buf := bytes.NewBuffer([]byte{})
 50 | 	enc := json.NewEncoder(buf)
 51 | 	for _, doc := range b.docs {
 52 | 		// Write action
 53 | 		action := BulkAction{}
 54 | 		action.Index = &doc.Meta
 55 | 		action.Index.Index = index
 56 | 		if err := enc.Encode(&action); err != nil {
 57 | 			return nil, err
 58 | 		}
 59 | 		// Write document
 60 | 		if err := enc.Encode(&doc.Source); err != nil {
 61 | 			return nil, err
 62 | 		}
 63 | 	}
 64 | 	bs := buf.Bytes()
 65 | 	return bs, nil
 66 | }
 67 | 
 68 | /*
 69 | {
 70 |    "took": 3,
 71 |    "errors": true,
 72 |    "items": [
 73 |       {  "create": {
 74 |             "_index":   "website",
 75 |             "_type":    "blog",
 76 |             "_id":      "123",
 77 |             "status":   409,
 78 |             "error":    "DocumentAlreadyExistsException
 79 |                         [[website][4] [blog][123]:
 80 |                         document already exists]"
 81 |       }},
 82 |       {  "index": {
 83 |             "_index":   "website",
 84 |             "_type":    "blog",
 85 |             "_id":      "123",
 86 |             "_version": 5,
 87 |             "status":   200
 88 |       }}
 89 |    ]
 90 | }
 91 | */
 92 | // BulkResponses response from ES's bulk endpoint
 93 | type BulkResponses struct {
 94 | 	Took        int                        `json:"took,omitempty"`
 95 | 	HasErrors   bool                       `json:"errors,omitempty"`
 96 | 	Items       []map[string]*BulkResponse `json:"items,omitempty"`
 97 | 	orginalBody []byte
 98 | }
 99 | 
100 | // Failed returns those items of a bulk response that have errors,
101 | // i.e. those that don't have a status code between 200 and 299.
102 | func (r *BulkResponses) Failed(exclude404 bool) []*BulkResponse {
103 | 	if r.Items == nil {
104 | 		return nil
105 | 	}
106 | 	errors := make([]*BulkResponse, 0)
107 | 	for _, item := range r.Items {
108 | 		for _, result := range item {
109 | 			if exclude404 && result.Status == 404 {
110 | 				continue
111 | 			}
112 | 			if !(result.Status >= 200 && result.Status <= 299) {
113 | 				errors = append(errors, result)
114 | 			}
115 | 		}
116 | 	}
117 | 	return errors
118 | }
119 | 
120 | // Succeeded returns those items of a bulk response that have no errors,
121 | // i.e. those have a status code between 200 and 299.
122 | func (r *BulkResponses) Succeeded(include404 bool) []*BulkResponse {
123 | 	if r.Items == nil {
124 | 		return nil
125 | 	}
126 | 	succeeded := make([]*BulkResponse, 0)
127 | 	for _, item := range r.Items {
128 | 		for _, result := range item {
129 | 			if result.Status >= 200 && result.Status <= 299 {
130 | 				succeeded = append(succeeded, result)
131 | 			}
132 | 			if include404 && result.Status == 404 {
133 | 				succeeded = append(succeeded, result)
134 | 			}
135 | 		}
136 | 	}
137 | 	return succeeded
138 | }
139 | 
140 | type BulkResponse struct {
141 | 	Index   string           `json:"_index,omitempty"`
142 | 	Type    string           `json:"_type,omitempty"`
143 | 	Id      string           `json:"_id,omitempty"`
144 | 	Version int              `json:"_version,omitempty"`
145 | 	Shards  *ESShardsResults `json:"_shards,omitempty"`
146 | 	Status  int              `json:"status,omitempty"`
147 | 	Found   bool             `json:"found,omitempty"`
148 | 	Error   *ESError         `json:"error,omitempty"`
149 | }
150 | 
151 | type ESShardsResults struct {
152 | 	Total       int `json:"total,omitempty"`
153 | 	Successfult int `json:"successful,omitempty"`
154 | 	Failed      int `json:"failed,omitempty"`
155 | }
156 | 
157 | type ESError struct {
158 | 	Type         string                   `json:"type"`
159 | 	Reason       string                   `json:"reason"`
160 | 	ResourceType string                   `json:"resource.type,omitempty"`
161 | 	ResourceId   string                   `json:"resource.id,omitempty"`
162 | 	Index        string                   `json:"index,omitempty"`
163 | 	Phase        string                   `json:"phase,omitempty"`
164 | 	Grouped      bool                     `json:"grouped,omitempty"`
165 | 	CausedBy     map[string]interface{}   `json:"caused_by,omitempty"`
166 | 	RootCause    []*ESError               `json:"root_cause,omitempty"`
167 | 	FailedShards []map[string]interface{} `json:"failed_shards,omitempty"`
168 | }
169 | 


--------------------------------------------------------------------------------
/esbulk/esbulk.go:
--------------------------------------------------------------------------------
  1 | package esbulk
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"context"
  6 | 	"encoding/json"
  7 | 	"errors"
  8 | 	"fmt"
  9 | 	"io/ioutil"
 10 | 	"math"
 11 | 	"math/rand"
 12 | 	"net/http"
 13 | 	"strings"
 14 | 	"sync"
 15 | 	"time"
 16 | 
 17 | 	"github.com/lytics/escp/esscroll"
 18 | 	"github.com/lytics/escp/estypes"
 19 | 	log "github.com/lytics/escp/logging"
 20 | )
 21 | 
 22 | var Client = http.DefaultClient
 23 | 
 24 | // ErrClosed is returned when a method is called on a closed indexer. Callers
 25 | // receiving this error should check the Indexer.Err() method to see if the
 26 | // bulk indexer terminated due to error.
 27 | var ErrClosed = errors.New("already closed")
 28 | 
 29 | type Indexer struct {
 30 | 	count uint64
 31 | 	docs  <-chan *estypes.Doc
 32 | 	err   chan error
 33 | }
 34 | 
 35 | // Err allows monitoring for errors while indexing is occurring. It will be
 36 | // closed when indexing is finished.
 37 | func (i *Indexer) Err() chan error { return i.err }
 38 | 
 39 | // New creates a new Elasticsearch bulk indexer. URL should be of the
 40 | // form http://eshost:9200/_bulk.
 41 | //
 42 | // bufsz is the size of the upload buffer in kilobytes. bufsz < 1 will default
 43 | // to 20mb.
 44 | //
 45 | // par is the number of parallel buffers to use. par < 1 will default to 3.
 46 | //
 47 | // Sends to docs should select on Indexer.Err to prevent deadlocking in case of
 48 | // indexer error.
 49 | func New(ctx context.Context, hosts []string, index string, bufsz, par int, docs <-chan *estypes.Doc, logger log.Logger) *Indexer {
 50 | 	indexer := &Indexer{
 51 | 		docs: docs,
 52 | 		// buffer an error per parallel upload buffer
 53 | 		err: make(chan error, par),
 54 | 	}
 55 | 
 56 | 	if bufsz < 1 {
 57 | 		bufsz = 20 * 1024
 58 | 	}
 59 | 	if par < 1 {
 60 | 		par = 3
 61 | 	}
 62 | 	targets := make([]string, len(hosts))
 63 | 	for i, h := range hosts {
 64 | 		targets[i] = fmt.Sprintf("%s/_bulk", h)
 65 | 	}
 66 | 	ti := 0
 67 | 
 68 | 	go func() {
 69 | 		defer close(indexer.err)
 70 | 
 71 | 		uploadat := bufsz
 72 | 		if bufsz > 1000 {
 73 | 			// upload at 500kb less than buffer size to avoid buffer resizing
 74 | 			uploadat = bufsz - 500
 75 | 		}
 76 | 
 77 | 		wg := new(sync.WaitGroup)
 78 | 		batchs := make(chan *Batch, par)
 79 | 		for i := 0; i < par; i++ {
 80 | 			batchs <- NewBatch()
 81 | 		}
 82 | 
 83 | 		var batch *Batch = nil
 84 | 		var sz = 0
 85 | 		for doc := range docs {
 86 | 			if batch == nil {
 87 | 				b := <-batchs
 88 | 				b.Reset()
 89 | 				batch = b
 90 | 			}
 91 | 
 92 | 			batch.Add(doc.ID, doc)
 93 | 			sz += len(doc.Source)
 94 | 
 95 | 			// Actually do the bulk insert once the buffer is full
 96 | 			if sz >= uploadat {
 97 | 				wg.Add(1)
 98 | 				go func(b *Batch, target string) {
 99 | 					defer wg.Done()
100 | 					if b.Len() == 0 {
101 | 						return
102 | 					}
103 | 					if err := upload(ctx, target, index, b, logger); err != nil {
104 | 						indexer.err <- err
105 | 						return
106 | 					}
107 | 					batchs <- b
108 | 				}(batch, targets[ti])
109 | 
110 | 				sz = 0
111 | 				batch = nil                  // go to next buffer in buffer pool
112 | 				ti = (ti + 1) % len(targets) // go to the next host
113 | 			}
114 | 			select {
115 | 			case <-ctx.Done():
116 | 				//TODO Save prgress
117 | 				return
118 | 			default:
119 | 			}
120 | 		}
121 | 
122 | 		// No more docs, if the buffer is non-empty upload it
123 | 		if batch != nil && batch.Len() > 0 {
124 | 			ti = (ti + 1) % len(targets)
125 | 			if err := upload(ctx, targets[ti], index, batch, logger); err != nil {
126 | 				indexer.err <- err
127 | 			}
128 | 		}
129 | 		wg.Wait() // wait for async uploads to complete too
130 | 	}()
131 | 
132 | 	return indexer
133 | }
134 | 
135 | // upload buffer to bulk API.
136 | func upload(ctx context.Context, url, index string, batch *Batch, logger log.Logger) error {
137 | 	st := time.Now()
138 | 	var lastFailedBrespErrs []*BulkResponse
139 | 	errsString := func(br []*BulkResponse) string {
140 | 		strs := []string{}
141 | 		for _, b := range br {
142 | 			strs = append(strs, fmt.Sprintf("%v->%v:%v", b.Id, b.Error.Type, b.Error.Reason))
143 | 		}
144 | 		return strings.Join(strs, ",")
145 | 	}
146 | 
147 | 	for try := 0; try < 64; try++ {
148 | 		select {
149 | 		case <-ctx.Done():
150 | 		default:
151 | 		}
152 | 
153 | 		buf, err := batch.Encode(index)
154 | 		if err != nil {
155 | 			return fmt.Errorf("esbulk.upload: error encoding batch: %v", err)
156 | 		}
157 | 		if len(buf) == 0 {
158 | 			logger.Infof("length of buffer to write is 0, skipping")
159 | 			time.Sleep(1 * time.Second)
160 | 			continue
161 | 		}
162 | 
163 | 		if try > 10 {
164 | 			logger.Warnf("slow upload warning: retry:%v of %v bytes:%v batchlen:%v runtime:%v errors:%v", try, 64, esscroll.IECFormat(uint64(len(buf))), batch.Len(), time.Since(st), errsString(lastFailedBrespErrs))
165 | 		}
166 | 
167 | 		resp, err := Client.Post(url, "application/json", bytes.NewReader(buf))
168 | 		if err != nil {
169 | 			logger.Warnf("esbulk.upload: error posting to ES: %v, bytes len: %d", err, len(buf))
170 | 			backoff(try)
171 | 			continue
172 | 		}
173 | 		if resp != nil {
174 | 			defer resp.Body.Close()
175 | 		}
176 | 
177 | 		b, err := ioutil.ReadAll(resp.Body)
178 | 		if err != nil {
179 | 			return fmt.Errorf("esbulk.upload: error reading response: %v", err)
180 | 		}
181 | 		if resp.StatusCode != 200 {
182 | 			return fmt.Errorf("esbulk.upload: non-200 response code: %d", resp.StatusCode)
183 | 		}
184 | 		bresp := &BulkResponses{}
185 | 		if err := json.Unmarshal(b, &bresp); err != nil {
186 | 			return fmt.Errorf("esbulk.upload: error decoding response: %v", err)
187 | 		}
188 | 		//log.Printf("BulkResponse successes: %d\n", len(bresp.Items))
189 | 
190 | 		ct := 0
191 | 		const include404 = false
192 | 		for _, successful := range bresp.Succeeded(include404) {
193 | 			// remove bulk successes from next try, so we only resent the
194 | 			// failed docs.
195 | 			batch.Delete(successful.Id)
196 | 			ct++
197 | 		}
198 | 		if batch.Len() == 0 {
199 | 			break
200 | 		}
201 | 
202 | 		lastFailedBrespErrs = bresp.Failed(!include404)
203 | 
204 | 		backoff(try)
205 | 	}
206 | 
207 | 	if batch.Len() > 0 {
208 | 		logger.Errorf("error: unable to write all docs to ES for this batch: %v remaining items", batch.Len())
209 | 	}
210 | 	batch.Reset()
211 | 
212 | 	return nil
213 | }
214 | 
215 | func backoff(try int) {
216 | 	nf := math.Pow(2, float64(try))
217 | 	nf = math.Max(1, nf)
218 | 	if try < 3 {
219 | 		nf = math.Min(nf, 2000)
220 | 	} else if try > 10 {
221 | 		nf = math.Min(nf, 8000)
222 | 	} else {
223 | 		nf = math.Min(nf, 4000)
224 | 	}
225 | 	r := rand.Int31n(int32(nf))
226 | 	d := time.Duration(int32(try*100)+r) * time.Millisecond
227 | 	time.Sleep(d)
228 | }
229 | 


--------------------------------------------------------------------------------
/esdiff/esdiff.go:
--------------------------------------------------------------------------------
  1 | package esdiff
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"encoding/json"
  6 | 	"fmt"
  7 | 	"io/ioutil"
  8 | 	"net/http"
  9 | 	"reflect"
 10 | 
 11 | 	"github.com/lytics/escp/estypes"
 12 | 	log "github.com/lytics/escp/logging"
 13 | )
 14 | 
 15 | const (
 16 | 	// DiffMissing is returned if the source document is missing in the
 17 | 	// destination.
 18 | 	DiffMissing = "missing"
 19 | 
 20 | 	// DiffSource is returned if the source document and destination document
 21 | 	// sources differ.
 22 | 	DiffSource = "source differs"
 23 | )
 24 | 
 25 | // ErrHTTP is returned for non-200 responses from the destination Elasticsearch
 26 | // server.
 27 | type ErrHTTP struct {
 28 | 	Code int
 29 | 	Body []byte
 30 | }
 31 | 
 32 | func (e *ErrHTTP) Error() string {
 33 | 	return fmt.Sprintf("non-200 status code: %d", e.Code)
 34 | }
 35 | 
 36 | // Check the source document against the destination URL. Returns a string
 37 | // describing any differences or any empty string if the documents matched.
 38 | //
 39 | // Errors from Elasticsearch or JSON unmarshalling are returned untouched
 40 | // with an empty diff string.
 41 | func Check(src *estypes.Doc, dst string, logger log.Logger) (diff string, err error) {
 42 | 	// Get the document from the target index
 43 | 	target := fmt.Sprintf("%s/%s/%s", dst, src.Type, src.ID)
 44 | 	resp, err := http.Get(target)
 45 | 	if err != nil {
 46 | 		return "", err
 47 | 	}
 48 | 	switch resp.StatusCode {
 49 | 	case 200:
 50 | 		// continue on
 51 | 	case 404:
 52 | 		// treat as diff
 53 | 		return DiffMissing, nil
 54 | 	default:
 55 | 		// treat all other respones as errors
 56 | 		buf, _ := ioutil.ReadAll(resp.Body)
 57 | 		resp.Body.Close()
 58 | 		return "", &ErrHTTP{resp.StatusCode, buf}
 59 | 	}
 60 | 
 61 | 	if resp.StatusCode != 200 {
 62 | 	}
 63 | 
 64 | 	newdoc := estypes.Doc{}
 65 | 	if err := json.NewDecoder(resp.Body).Decode(&newdoc); err != nil {
 66 | 		logger.Errorf("unable to unmarshal json body: Url:%s Err:%v", target, err)
 67 | 		ioutil.ReadAll(resp.Body)
 68 | 		resp.Body.Close()
 69 | 		return "", fmt.Errorf("error decoding destination document: %v", err)
 70 | 	}
 71 | 
 72 | 	if src.ID != newdoc.ID {
 73 | 		return "", fmt.Errorf("metadata mismatch; document _id %s != %s", src.ID, newdoc.ID)
 74 | 	}
 75 | 	if src.Type != newdoc.Type {
 76 | 		return "", fmt.Errorf("metadata mismatch; document type %s != %s", src.Type, newdoc.Type)
 77 | 	}
 78 | 
 79 | 	// Fast path
 80 | 	if bytes.Equal(src.Source, newdoc.Source) {
 81 | 		return "", nil
 82 | 	}
 83 | 
 84 | 	// Slow path
 85 | 	origsrc := map[string]interface{}{}
 86 | 	if err := json.Unmarshal(src.Source, &origsrc); err != nil {
 87 | 		return "", fmt.Errorf("error unmarshalling source doc: %v", err)
 88 | 	}
 89 | 	newsrc := make(map[string]interface{}, len(origsrc))
 90 | 	if err := json.Unmarshal(newdoc.Source, &newsrc); err != nil {
 91 | 		return "", fmt.Errorf("error unmarshalling destination doc: %v", err)
 92 | 	}
 93 | 
 94 | 	if len(origsrc) != len(newsrc) {
 95 | 		return fmt.Sprintf("%d fields in source; %d fields in target", src.ID, len(origsrc), len(newsrc)), nil
 96 | 	}
 97 | 
 98 | 	if !reflect.DeepEqual(origsrc, newsrc) {
 99 | 		return DiffSource, nil
100 | 	}
101 | 
102 | 	// We're good!
103 | 	return "", nil
104 | }
105 | 


--------------------------------------------------------------------------------
/esindex/doc.go:
--------------------------------------------------------------------------------
1 | // This package is for interacting with Elasticsearch indexes.
2 | package esindex
3 | 


--------------------------------------------------------------------------------
/esindex/esindex.go:
--------------------------------------------------------------------------------
  1 | package esindex
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"encoding/json"
  6 | 	"errors"
  7 | 	"fmt"
  8 | 	"io/ioutil"
  9 | 	"net/http"
 10 | 	"strings"
 11 | 
 12 | 	"github.com/lytics/escp/estypes"
 13 | )
 14 | 
 15 | // Metadata describing an Elasticsearch index.
 16 | type Meta struct {
 17 | 	Settings *Settings `json:"settings"`
 18 | }
 19 | 
 20 | // Settings for an Elasticsearch index.
 21 | type Settings struct {
 22 | 	Index *IndexSettings `json:"index"`
 23 | }
 24 | 
 25 | type IndexSettings struct {
 26 | 	Replicas        *int              `json:"number_of_replicas,string,omitempty"`
 27 | 	Shards          *int              `json:"number_of_shards,string,omitempty"`
 28 | 	RefreshInterval string            `json:"refresh_interval,omitempty"`
 29 | 	CompoundOnFlush bool              `json:"compound_on_flush,omitempty"`
 30 | 	CompoundFormat  bool              `json:"compound_format,omitempty"`
 31 | 	Mapping         *IndexMapping     `json:"mapping,omitempty"`
 32 | 	Unassigned      *UnassignedWarper `json:"unassigned,omitempty"`
 33 | }
 34 | 
 35 | type IndexMapping struct {
 36 | 	NestedFields *FieldsSetting `json:"nested_fields,omitempty"`
 37 | }
 38 | 
 39 | type FieldsSetting struct {
 40 | 	Limit int `json:"limit,string,omitempty"`
 41 | }
 42 | 
 43 | type UnassignedWarper struct {
 44 | 	NodeOption *NodeOptions `json:"node_left,omitempty"`
 45 | }
 46 | 
 47 | type NodeOptions struct {
 48 | 	DelayTimeout string `json:"delayed_timeout,omitempty"`
 49 | }
 50 | 
 51 | var (
 52 | 	ErrMissing = errors.New("index missing")
 53 | 	ErrExists  = errors.New("index exists")
 54 | )
 55 | 
 56 | // Create an index with the specified metadata. Returns ErrExists if the index
 57 | // already exists.
 58 | func Create(dst string, m *Meta) error {
 59 | 	// Make sure the index doesn't already exist first
 60 | 	existing, err := Get(dst)
 61 | 	if err != nil && err != ErrMissing {
 62 | 		return fmt.Errorf("error checking for existing index: %v", err)
 63 | 	}
 64 | 	if existing != nil {
 65 | 		return ErrExists
 66 | 	}
 67 | 
 68 | 	return put(dst, m)
 69 | }
 70 | 
 71 | // Get metadata about an index. Returns ErrMissing if index doesn't existing.
 72 | func Get(dst string) (*Meta, error) {
 73 | 	resp, err := http.Get(dst)
 74 | 	if err != nil {
 75 | 		return nil, fmt.Errorf("Get::Uri:%v err:%v", dst, err)
 76 | 	}
 77 | 	if resp.StatusCode == 404 {
 78 | 		return nil, ErrMissing
 79 | 	}
 80 | 	if resp.StatusCode != 200 {
 81 | 		return nil, fmt.Errorf("Get::Uri:%v Non-200 status code from source Elasticsearch: %d", dst, resp.StatusCode)
 82 | 	}
 83 | 
 84 | 	b, err := ioutil.ReadAll(resp.Body)
 85 | 	if err != nil {
 86 | 		return nil, fmt.Errorf("Get::Error reading response: %v", err)
 87 | 	}
 88 | 	idxmetamap := make(map[string]*Meta, 1)
 89 | 	if err := json.Unmarshal(b, &idxmetamap); err != nil {
 90 | 		return nil, fmt.Errorf("Get::error decoding response: err:%v body:%v", err, string(b))
 91 | 	}
 92 | 
 93 | 	parts := strings.Split(dst, "/")
 94 | 	idxname := parts[len(parts)-1]
 95 | 	idxmeta, ok := idxmetamap[idxname]
 96 | 	if !ok {
 97 | 		return nil, fmt.Errorf("Get:: index %s not found", idxname)
 98 | 	}
 99 | 	// Shards should always be set, so use this as an indicator things didn't get
100 | 	// unmarshalled properly.
101 | 	if idxmeta.Settings.Index.Shards == nil {
102 | 		return nil, fmt.Errorf("Get::unable to read existing shards for index %s", idxname)
103 | 	}
104 | 	return idxmeta, nil
105 | }
106 | 
107 | func GetDocCount(idx string) (uint64, error) {
108 | 	hresp, err := http.Get(idx + "/_search?size=0")
109 | 	if err != nil {
110 | 		return 0, fmt.Errorf("error contacting index:%v err:%v", idx, err)
111 | 	}
112 | 	newres := estypes.Results{}
113 | 	if err := json.NewDecoder(hresp.Body).Decode(&newres); err != nil {
114 | 		return 0, fmt.Errorf("error reading target index:%v err:%v", idx, err)
115 | 	}
116 | 	return newres.Hits.Total, nil
117 | }
118 | 
119 | // Update index metadata
120 | func Update(dst string, m *Meta) error {
121 | 	return put(dst+"/_settings", m)
122 | }
123 | 
124 | func put(dst string, m *Meta) error {
125 | 	buf, err := json.Marshal(m)
126 | 	if err != nil {
127 | 		return fmt.Errorf("error encoding index json: %v", err)
128 | 	}
129 | 	req, err := http.NewRequest("PUT", dst, bytes.NewReader(buf))
130 | 	if err != nil {
131 | 		return fmt.Errorf("error creating index request: %v", err)
132 | 	}
133 | 	resp, err := http.DefaultClient.Do(req)
134 | 	if err != nil {
135 | 		return fmt.Errorf("error creating index %s: %v", dst, err)
136 | 	}
137 | 	ackr := estypes.AckResponse{}
138 | 	if err := json.NewDecoder(resp.Body).Decode(&ackr); err != nil {
139 | 		return fmt.Errorf("error decoding index response: %v", err)
140 | 	}
141 | 	if !ackr.Ack {
142 | 		return estypes.ErrUnack
143 | 	}
144 | 	return nil
145 | }
146 | 


--------------------------------------------------------------------------------
/esindex/optimize.go:
--------------------------------------------------------------------------------
 1 | package esindex
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"io/ioutil"
 6 | 	"net/http"
 7 | )
 8 | 
 9 | // Optimize the target index (or indices) to have the maximum number of
10 | // segments. Segments < 1 will default to 1.
11 | //
12 | // Optimize blocks until the operation completes.
13 | func Optimize(target string, segn int) error {
14 | 	uri := fmt.Sprintf("%s/_forcemerge?max_num_segments=%d", target, segn)
15 | 	resp, err := http.Post(uri, "text/plain", nil)
16 | 	if err != nil {
17 | 		return fmt.Errorf("error optimizing: (POST %v) error:%v", uri, err)
18 | 	}
19 | 	defer resp.Body.Close()
20 | 	if resp.StatusCode < 200 || resp.StatusCode > 299 {
21 | 		return fmt.Errorf("non-2xx status code: %d uri:%v", resp.StatusCode, uri)
22 | 	}
23 | 	// This could block for a while
24 | 	if _, err := ioutil.ReadAll(resp.Body); err != nil {
25 | 		return fmt.Errorf("error reading optimizing: uri:%v error:%v", uri, err)
26 | 	}
27 | 	return nil
28 | }
29 | 


--------------------------------------------------------------------------------
/esscroll/esscroll.go:
--------------------------------------------------------------------------------
  1 | package esscroll
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"context"
  6 | 	"encoding/json"
  7 | 	"fmt"
  8 | 	"math"
  9 | 	"net/http"
 10 | 	"net/url"
 11 | 	"sync"
 12 | 	"time"
 13 | 
 14 | 	"github.com/lytics/escp/estypes"
 15 | 	log "github.com/lytics/escp/logging"
 16 | )
 17 | 
 18 | var Client = http.DefaultClient
 19 | 
 20 | type Response struct {
 21 | 	Total uint64
 22 | 	Hits  <-chan *estypes.Doc
 23 | 
 24 | 	mu  *sync.Mutex
 25 | 	err error
 26 | }
 27 | 
 28 | func (r *Response) setErr(err error) {
 29 | 	r.mu.Lock()
 30 | 	defer r.mu.Unlock()
 31 | 	r.err = err
 32 | }
 33 | 
 34 | func (r *Response) Err() error {
 35 | 	r.mu.Lock()
 36 | 	defer r.mu.Unlock()
 37 | 	return r.err
 38 | }
 39 | 
 40 | type ESScoll struct {
 41 | 	surl    string
 42 | 	timeout string
 43 | 	pagesz  int
 44 | 	buflen  int
 45 | 	filter  map[string]interface{}
 46 | 
 47 | 	logevery time.Duration
 48 | 	logger   log.Logger
 49 | 	ctx      context.Context
 50 | }
 51 | 
 52 | func New(ctx context.Context, indexUrl string, timeout time.Duration, pagesz, buflen int, filter map[string]interface{}, logevery time.Duration, logger log.Logger) *ESScoll {
 53 | 	surl := indexUrl + "/_search"
 54 | 	tout := fmt.Sprintf("%ds", int(timeout.Seconds()))
 55 | 	return &ESScoll{
 56 | 		surl:     surl,
 57 | 		timeout:  tout,
 58 | 		pagesz:   pagesz,
 59 | 		buflen:   buflen,
 60 | 		filter:   filter,
 61 | 		logevery: logevery,
 62 | 		logger:   logger,
 63 | 		ctx:      ctx,
 64 | 	}
 65 | }
 66 | 
 67 | // Start a new scroll. URL should be of the form http://host:port/indexname.
 68 | //
 69 | // When Response.Hits is closed, Response.Err() should be checked to see if the
 70 | // scroll completed successfully or not.
 71 | func (s *ESScoll) Start() (*Response, error) {
 72 | 	origurl, err := url.Parse(s.surl)
 73 | 	if err != nil {
 74 | 		return nil, err
 75 | 	}
 76 | 	searchurl := fmt.Sprintf("%s?scroll=%s&size=%d", s.surl, s.timeout, s.pagesz)
 77 | 
 78 | 	var resp *http.Response
 79 | 	if s.filter == nil {
 80 | 		resp, err = http.DefaultClient.Get(searchurl)
 81 | 	} else {
 82 | 		req := struct {
 83 | 			Filter map[string]interface{} `json:"filter"`
 84 | 		}{s.filter}
 85 | 		body, err := json.Marshal(req)
 86 | 		if err != nil {
 87 | 			return nil, err
 88 | 		}
 89 | 		resp, err = http.DefaultClient.Post(searchurl, "application/json", bytes.NewReader(body))
 90 | 	}
 91 | 
 92 | 	if err != nil {
 93 | 		return nil, err
 94 | 	}
 95 | 	defer resp.Body.Close()
 96 | 	if resp.StatusCode != 200 {
 97 | 		return nil, fmt.Errorf("non-200 status code on intial request %d from %v", resp.StatusCode, searchurl)
 98 | 	}
 99 | 
100 | 	result := estypes.Results{}
101 | 	if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
102 | 		return nil, err
103 | 	}
104 | 	if result.TimedOut {
105 | 		return nil, fmt.Errorf("initial scroll timed out")
106 | 	}
107 | 	if result.Hits == nil {
108 | 		return nil, fmt.Errorf("invalid response")
109 | 	}
110 | 
111 | 	out := make(chan *estypes.Doc, s.buflen) // each result will actually get pagesz*shards documents
112 | 	r := Response{Total: result.Hits.Total, Hits: out, mu: new(sync.Mutex)}
113 | 
114 | 	go func() {
115 | 		defer close(out)
116 | 		ctx, can := context.WithCancel(s.ctx)
117 | 		prog := NewProgress(s.logevery, s.logger)
118 | 		prog.Start(ctx)
119 | 		prog.SetDocCount(r.Total)
120 | 
121 | 		docspages := make(chan []*estypes.Doc, 2)
122 | 		wg := &sync.WaitGroup{}
123 | 		wg.Add(1)
124 | 		go func(wg *sync.WaitGroup, docspages chan []*estypes.Doc) {
125 | 			defer wg.Done()
126 | 			for hits := range docspages {
127 | 				select {
128 | 				case <-s.ctx.Done():
129 | 					//TODO Save prgress
130 | 					return
131 | 				default:
132 | 				}
133 | 				for _, hit := range hits {
134 | 					st := time.Now()
135 | 					out <- hit
136 | 					prog.MarkBlocked(time.Now().Sub(st))
137 | 				}
138 | 			}
139 | 		}(wg, docspages)
140 | 
141 | 		baseurl := origurl.Scheme + "://" + origurl.Host + "/_search/scroll?scroll=" + s.timeout + "&scroll_id="
142 | 
143 | 		//TODO the array of docs all the way into esbulk
144 | 		//TODO copy ScrollID with page
145 | 		cnt := 0
146 | 		for {
147 | 			select {
148 | 			case <-s.ctx.Done():
149 | 				//TODO Save prgress
150 | 				return
151 | 			default:
152 | 			}
153 | 			// Get the next page
154 | 			urli := baseurl + result.ScrollID
155 | 			//s.logger.Infof("fetching from:%v", urli)
156 | 			resp, err = Client.Get(urli)
157 | 			if err != nil {
158 | 				r.setErr(err)
159 | 				return
160 | 			}
161 | 			if resp.StatusCode != 200 {
162 | 				resp.Body.Close()
163 | 				r.setErr(fmt.Errorf("non-200 status code on continuation %d", resp.StatusCode))
164 | 				return
165 | 			}
166 | 
167 | 			// Reset and decode results
168 | 			result = estypes.Results{}
169 | 			if err = json.NewDecoder(resp.Body).Decode(&result); err != nil {
170 | 				resp.Body.Close()
171 | 				r.setErr(err)
172 | 				return
173 | 			}
174 | 			if result.TimedOut {
175 | 				r.setErr(fmt.Errorf("timed-out on scroll"))
176 | 				return
177 | 			}
178 | 
179 | 			if len(result.Hits.Hits) == 0 {
180 | 				//defer s.logger.Infof("completed: %v ", cnt)
181 | 				can()
182 | 				close(docspages)
183 | 				wg.Wait()
184 | 				return
185 | 			}
186 | 			cnt++
187 | 			hits := result.Hits.Hits
188 | 			docspages <- hits
189 | 			prog.MarkProssed(len(result.Hits.Hits))
190 | 		}
191 | 	}()
192 | 
193 | 	return &r, nil
194 | }
195 | 
196 | //TODO move this progress to it's own package and share it with esbulk so we collect retry and error, and other metrics.
197 | func NewProgress(logevery time.Duration, logger log.Logger) *progress {
198 | 	return &progress{
199 | 		logevery: logevery,
200 | 		logger:   logger,
201 | 	}
202 | }
203 | 
204 | type progress struct {
205 | 	logevery time.Duration
206 | 	logger   log.Logger
207 | 
208 | 	mu             sync.Mutex
209 | 	last           time.Time
210 | 	processed      uint64
211 | 	totalprocessed uint64
212 | 	blockedtotal   time.Duration
213 | 	blockedcnt     int
214 | 	expectedDocs   uint64
215 | 	starttime      time.Time
216 | }
217 | 
218 | func (p *progress) SetDocCount(n uint64) {
219 | 	p.mu.Lock()
220 | 	defer p.mu.Unlock()
221 | 	p.expectedDocs = uint64(n)
222 | }
223 | 
224 | func (p *progress) MarkBlocked(blockedDur time.Duration) {
225 | 	p.mu.Lock()
226 | 	defer p.mu.Unlock()
227 | 	p.blockedcnt += 1
228 | 	p.blockedtotal += blockedDur
229 | }
230 | 
231 | func (p *progress) MarkProssed(n int) {
232 | 	p.mu.Lock()
233 | 	defer p.mu.Unlock()
234 | 
235 | 	n2 := uint64(n)
236 | 	p.totalprocessed += n2
237 | 	p.processed += n2
238 | }
239 | func (p *progress) Start(ctx context.Context) {
240 | 	p.mu.Lock()
241 | 	defer p.mu.Unlock()
242 | 	p.starttime = time.Now()
243 | 	p.last = time.Now()
244 | 
245 | 	go func() {
246 | 		frsLog := time.After(20 * time.Second)
247 | 		for {
248 | 			select {
249 | 			case <-frsLog:
250 | 				p.log()
251 | 			case <-time.After(p.logevery):
252 | 				p.log()
253 | 			case <-ctx.Done():
254 | 				p.log()
255 | 				return
256 | 			}
257 | 		}
258 | 	}()
259 | }
260 | func (p *progress) log() {
261 | 	p.mu.Lock()
262 | 	defer p.mu.Unlock()
263 | 
264 | 	elapsed := time.Now().Sub(p.last)
265 | 	totalelapsed := time.Now().Sub(p.starttime)
266 | 	avetimeWaintToSend := time.Duration(int64(p.blockedtotal) / int64(max(1, p.blockedcnt)))
267 | 	processsedSec := p.processed / uint64(math.Max(1, elapsed.Seconds()))
268 | 	totalProcesssedSec := p.totalprocessed / uint64(math.Max(1, totalelapsed.Seconds()))
269 | 
270 | 	p.logger.Infof("%d / %d documents scrolled (doc_rate:[total:%d docs/s curr:%d docs/s]) (average chan send time:%v)",
271 | 		p.totalprocessed, p.expectedDocs, totalProcesssedSec, processsedSec, avetimeWaintToSend)
272 | 
273 | 	p.last = time.Now()
274 | 	p.processed = 0
275 | }
276 | 
277 | //IECFormat prints bytes in the International Electro-technical Commission format
278 | //http://play.golang.org/p/68w_QCsE4F
279 | // multiples of 1024
280 | func IECFormat(num_in uint64) string {
281 | 	suffix := "B" //just assume bytes
282 | 	num := float64(num_in)
283 | 	units := []string{"", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi"}
284 | 	for _, unit := range units {
285 | 		if num < 1024.0 {
286 | 			return fmt.Sprintf("%3.1f%s%s", num, unit, suffix)
287 | 		}
288 | 		num = (num / 1024)
289 | 	}
290 | 	return fmt.Sprintf("%.1f%s%s", num, "Yi", suffix)
291 | }
292 | 
293 | //TODO Implement continuing an already started scroll
294 | //func Continue(url, scrollID string) {}
295 | 
296 | func max(a, b int) int {
297 | 	if a > b {
298 | 		return a
299 | 	}
300 | 	return b
301 | }
302 | 


--------------------------------------------------------------------------------
/estypes/doc.go:
--------------------------------------------------------------------------------
1 | // This package contains common Elasticsearch data types used by multiple other
2 | // packages.
3 | package estypes
4 | 


--------------------------------------------------------------------------------
/estypes/estypes.go:
--------------------------------------------------------------------------------
  1 | package estypes
  2 | 
  3 | import (
  4 | 	"encoding/json"
  5 | 	"errors"
  6 | )
  7 | 
  8 | type Meta struct {
  9 | 	ID    string `json:"_id"`
 10 | 	Type  string `json:"_type"`
 11 | 	Index string `json:"_index"`
 12 | 	//Version string `json:"_version"` //FIXME _version not in _search results?!
 13 | }
 14 | 
 15 | type Doc struct {
 16 | 	Meta
 17 | 	Source json.RawMessage `json:"_source,omitempty"`
 18 | }
 19 | 
 20 | type Hits struct {
 21 | 	Hits  []*Doc `json:"hits"`
 22 | 	Total uint64 `json:"total"`
 23 | }
 24 | 
 25 | type Results struct {
 26 | 	Hits     *Hits  `json:"hits"`
 27 | 	TimedOut bool   `json:"timed_out"`
 28 | 	ScrollID string `json:"_scroll_id"`
 29 | }
 30 | 
 31 | type AckResponse struct {
 32 | 	Ack bool `json:"acknowledged"`
 33 | }
 34 | 
 35 | // ErrFailed should be returned any time Elasticsearch returns
 36 | // acknowledged=false.
 37 | var ErrUnack = errors.New("request unacknowledged")
 38 | 
 39 | // _search_shards endpoint data
 40 | type SearchShardsEndpoint struct {
 41 | 	Nodes  NodeInfo  `json:"nodes"`
 42 | 	Shards ShardList `json:"shards"`
 43 | }
 44 | 
 45 | func NewSearchShards() *SearchShardsEndpoint {
 46 | 	Nodes := make(map[string]NodeAttributes)
 47 | 	Shards := make(ShardList, 0)
 48 | 	return &SearchShardsEndpoint{Nodes, Shards}
 49 | }
 50 | 
 51 | type NodeInfo map[string]NodeAttributes
 52 | 
 53 | type NodeAttributes struct {
 54 | 	Name             string `json:"name"`
 55 | 	TransportAddress string `json:"transport_address"`
 56 | }
 57 | 
 58 | type ShardList []ShardInfo
 59 | 
 60 | type ShardInfo []ShardAttributes
 61 | 
 62 | type ShardAttributes struct {
 63 | 	State   string `json:"state"`
 64 | 	Primary bool   `json:"primary"`
 65 | 	Node    string `json:"node"`
 66 | 	//Relocating bool   `json:"relocating_node"`
 67 | 	Shard int    `json:"shard"`
 68 | 	Index string `json:"index"`
 69 | }
 70 | 
 71 | /*
 72 |  Structs for the /_stats endpoint
 73 | */
 74 | type Stats struct {
 75 | 	All     StatsAll                `json:"_all"`
 76 | 	Shards  StatsShards             `json:"_shards"`
 77 | 	Indices map[string]StatsIndices `json:"indices"`
 78 | }
 79 | 
 80 | type StatsAll struct{}
 81 | type StatsShards struct{}
 82 | 
 83 | type StatsIndices struct {
 84 | 	Primaries IndexPrimary `json:"primaries"`
 85 | 	//Totals    IndexTotal   `json:"total"`
 86 | }
 87 | 
 88 | // Index Primary Data
 89 | type IndexPrimary struct {
 90 | 	Store IndexStore `json:"store"`
 91 | }
 92 | 
 93 | type IndexStore struct {
 94 | 	IndexByteSize int `json:"size_in_bytes"`
 95 | }
 96 | 
 97 | type IndexInfo struct {
 98 | 	Name          string
 99 | 	ByteSize      int
100 | 	ShardCount    int
101 | 	BytesPerShard int
102 | }
103 | 
104 | type IndexSort []IndexInfo
105 | 
106 | func (is IndexSort) Len() int      { return len(is) }
107 | func (is IndexSort) Swap(i, j int) { is[i], is[j] = is[j], is[i] }
108 | func (is IndexSort) Less(i, j int) bool {
109 | 	if is[i].BytesPerShard == 0 {
110 | 		is[i].BytesPerShard = is[i].ByteSize / is[i].ShardCount
111 | 	}
112 | 	if is[j].BytesPerShard == 0 {
113 | 		is[j].BytesPerShard = is[j].ByteSize / is[j].ShardCount
114 | 	}
115 | 	return is[i].BytesPerShard < is[j].BytesPerShard
116 | }
117 | 


--------------------------------------------------------------------------------
/jobs/escopyjob.go:
--------------------------------------------------------------------------------
  1 | package jobs
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"encoding/json"
  6 | 	"net/url"
  7 | 	"strings"
  8 | 	"time"
  9 | 
 10 | 	"fmt"
 11 | 
 12 | 	"github.com/lytics/escp/esbulk"
 13 | 	"github.com/lytics/escp/esindex"
 14 | 	"github.com/lytics/escp/esscroll"
 15 | 	log "github.com/lytics/escp/logging"
 16 | )
 17 | 
 18 | func ParseUrl(u string) (*url.URL, error) {
 19 | 	if len(u) == 0 {
 20 | 		return nil, fmt.Errorf("no url provided")
 21 | 	}
 22 | 	if strings.HasSuffix(u, "/") {
 23 | 		u = u[:len(u)-1]
 24 | 	}
 25 | 	if !strings.HasPrefix(u, "http") {
 26 | 		u = "http://" + u
 27 | 	}
 28 | 	return url.Parse(u)
 29 | }
 30 | 
 31 | type SourceConfig struct {
 32 | 	IndexName     string                 // the sorce index name to read from.
 33 | 	Host          *url.URL               // the source index url to read from example: http://es1:9200
 34 | 	ScrollTimeout time.Duration          // time to keep scroll alive between requests
 35 | 	ScrollPage    int                    // size of scroll pages (will actually be per source shard)
 36 | 	ScrollDocs    int                    // number of `docs` to buffer in memory from scroll
 37 | 	Filter        map[string]interface{} // an es filter to apply to the source scroll (experimental)
 38 | }
 39 | 
 40 | func (s *SourceConfig) URL() string {
 41 | 	if s.Host.Scheme == "" {
 42 | 		s.Host.Scheme = "http"
 43 | 	}
 44 | 	return fmt.Sprintf("%s/%s", s.Host.String(), s.IndexName)
 45 | }
 46 | 
 47 | type DesConfig struct {
 48 | 	IndexName string     //The target index name
 49 | 	Hosts     []*url.URL //set of hosts to use during the copy, to send Bulk requests too.
 50 | 
 51 | 	CreateDelay       time.Duration // after creating a new target index, sleep this long before writing data.
 52 | 	RefreshInt        time.Duration // the refresh interval to use on the new index
 53 | 	Shards            int           // how many shards to use with the next index
 54 | 	DelayRefresh      bool          // Disable refreshing until the copy has completed
 55 | 	SkipCreate        bool          //Just start writting to the index, don't bother to create the index
 56 | 	DelayReplicaton   bool          //turn off ES replication until after the copy has finished.
 57 | 	ReplicationFactor int           //if delayreplication is set the replicaiton setting will be set to this after coping.
 58 | 	MaxSeg            int           //if indexing is delayed, the max number of segments for the optimized index
 59 | 
 60 | 	BulkSize   int // The pulk batch size to use when submitting writes to des index bulk queue.
 61 | 	NumWorkers int //number of parallel bulk upload buffers to use; 0 = len(hosts)*2
 62 | }
 63 | 
 64 | func (d *DesConfig) URLs() []string {
 65 | 	res := []string{}
 66 | 	for _, h := range d.Hosts {
 67 | 		if h.Scheme == "" {
 68 | 			h.Scheme = "http"
 69 | 		}
 70 | 		res = append(res, h.String())
 71 | 	}
 72 | 	return res
 73 | }
 74 | 
 75 | func (d *DesConfig) PrimaryURL() string {
 76 | 	// Use the first destination host as the "primary" node to talk too
 77 | 	if urls := d.URLs(); len(urls) > 0 {
 78 | 		return fmt.Sprintf("%s/%s", urls[0], d.IndexName)
 79 | 	}
 80 | 	return ""
 81 | }
 82 | 
 83 | func Copy(ctx context.Context, src *SourceConfig, des *DesConfig, logger log.Logger, logevery time.Duration) error {
 84 | 	srcUrl := src.URL()
 85 | 	priDesUrl := des.PrimaryURL()
 86 | 
 87 | 	idxmeta, err := esindex.Get(srcUrl)
 88 | 	if err != nil {
 89 | 		return fmt.Errorf("failed getting source index metadata: %v", err)
 90 | 	}
 91 | 
 92 | 	// Copy over shards setting if it wasn't explicitly set
 93 | 	if des.Shards == 0 {
 94 | 		des.Shards = *idxmeta.Settings.Index.Shards
 95 | 	}
 96 | 
 97 | 	// Copy over refreshint if it wasn't set in options but was set on the source
 98 | 	// index
 99 | 	refreshint := ""
100 | 	if des.RefreshInt == 0 {
101 | 		if idxmeta.Settings.Index.RefreshInterval != "" {
102 | 			refreshint = idxmeta.Settings.Index.RefreshInterval
103 | 		} else {
104 | 			refreshint = "1s" // default
105 | 		}
106 | 	} else {
107 | 		refreshint = fmt.Sprintf("%v", des.RefreshInt)
108 | 	}
109 | 
110 | 	// Start the scroll first to make sure the source parameter is valid
111 | 	ess := esscroll.New(ctx, srcUrl, src.ScrollTimeout, src.ScrollPage, src.ScrollDocs, src.Filter, logevery, logger)
112 | 	resp, err := ess.Start()
113 | 	if err != nil {
114 | 		return fmt.Errorf("error starting scroll: %v", err)
115 | 	}
116 | 
117 | 	// Create the destination index unless explicitly told not to
118 | 	if !des.SkipCreate {
119 | 		logger.Infof("Creating index %s with shards=%d refresh_interval=%s delay-refresh=%t", des.IndexName, des.Shards, refreshint, des.DelayRefresh)
120 | 		if des.Shards == 0 {
121 | 			des.Shards = *idxmeta.Settings.Index.Shards
122 | 		}
123 | 		m := esindex.Meta{Settings: &esindex.Settings{
124 | 			Index: &esindex.IndexSettings{
125 | 				Shards:          &des.Shards,
126 | 				RefreshInterval: refreshint,
127 | 				Mapping: &esindex.IndexMapping{ //TODO make this an argument
128 | 					NestedFields: &esindex.FieldsSetting{
129 | 						Limit: 10000,
130 | 					},
131 | 				},
132 | 				Unassigned: &esindex.UnassignedWarper{ //TODO make this an argument
133 | 					NodeOption: &esindex.NodeOptions{
134 | 						DelayTimeout: "5m",
135 | 					},
136 | 				},
137 | 			},
138 | 		}}
139 | 		if des.DelayRefresh {
140 | 			m.Settings.Index.RefreshInterval = "-1" // Disable refreshing until the copy has completed
141 | 		}
142 | 		if des.DelayReplicaton {
143 | 			i := 0
144 | 			m.Settings.Index.Replicas = &i
145 | 		}
146 | 		if err := esindex.Create(priDesUrl, &m); err != nil {
147 | 			logger.Errorf("index create failed:%v", err)
148 | 			return err
149 | 		}
150 | 
151 | 		time.Sleep(des.CreateDelay)
152 | 	}
153 | 
154 | 	desmeta, err := esindex.Get(priDesUrl)
155 | 	if err != nil {
156 | 		logger.Errorf("error loading destination index settings. err:%v", err)
157 | 		return err
158 | 	}
159 | 	b, err := json.Marshal(desmeta)
160 | 	if err != nil {
161 | 		logger.Errorf("error marshalling index settings. err:%v", err)
162 | 		return err
163 | 	}
164 | 
165 | 	logger.Infof("Copying %d documents from %s to %s/%s destination index settings: %v bulksize:%v",
166 | 		resp.Total, srcUrl, des.Hosts, des.IndexName, string(b), esscroll.IECFormat(uint64(des.BulkSize)))
167 | 
168 | 	indexer := esbulk.New(ctx, des.URLs(), des.IndexName, des.BulkSize, des.NumWorkers, resp.Hits, logger)
169 | 	if err := <-indexer.Err(); err != nil {
170 | 		return fmt.Errorf("Error indexing: %v", err)
171 | 	}
172 | 
173 | 	if err := resp.Err(); err != nil {
174 | 		logger.Errorf("Error searching: %v", err)
175 | 	}
176 | 
177 | 	select {
178 | 	case <-ctx.Done():
179 | 		return nil
180 | 	default:
181 | 	}
182 | 
183 | 	if des.DelayRefresh {
184 | 		logger.Infof("Copy completed. Refreshing index. This may take some time.")
185 | 		if err := esindex.Optimize(priDesUrl, des.MaxSeg); err != nil {
186 | 			return fmt.Errorf("Error optimizing index: %v", err)
187 | 		}
188 | 		logger.Infof("Optimize completed. Setting refresh interval to %s", refreshint)
189 | 
190 | 		// update refresh setting
191 | 		m := esindex.Meta{Settings: &esindex.Settings{Index: &esindex.IndexSettings{RefreshInterval: refreshint}}}
192 | 		if err := esindex.Update(priDesUrl, &m); err != nil {
193 | 			return fmt.Errorf("Error enabling refreshing: %v", err)
194 | 		}
195 | 	}
196 | 
197 | 	if des.DelayReplicaton {
198 | 		// update refresh setting
199 | 		m := esindex.Meta{Settings: &esindex.Settings{Index: &esindex.IndexSettings{Replicas: &des.ReplicationFactor}}}
200 | 		if err := esindex.Update(priDesUrl, &m); err != nil {
201 | 			return fmt.Errorf("Error enabling replicas[%v]: %v", des.ReplicationFactor, err)
202 | 		}
203 | 		logger.Infof("index updated to enable replication factor:%v", des.ReplicationFactor)
204 | 	}
205 | 
206 | 	desmeta, err = esindex.Get(priDesUrl)
207 | 	if err != nil {
208 | 		return fmt.Errorf("error loading destination index settings. err:%v", err)
209 | 	}
210 | 	b, err = json.Marshal(desmeta)
211 | 	if err != nil {
212 | 		return fmt.Errorf("error marshalling index settings. err:%v", err)
213 | 	}
214 | 	logger.Infof("copy job completed: destination index settngs: idx:%v settings:%v", priDesUrl, string(b))
215 | 	return nil
216 | }
217 | 


--------------------------------------------------------------------------------
/jobs/validationjob.go:
--------------------------------------------------------------------------------
 1 | package jobs
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"fmt"
 6 | 	"math/rand"
 7 | 	"time"
 8 | 
 9 | 	"github.com/lytics/escp/esdiff"
10 | 	"github.com/lytics/escp/esindex"
11 | 	"github.com/lytics/escp/esscroll"
12 | 	log "github.com/lytics/escp/logging"
13 | )
14 | 
15 | var ErrMissMatch = fmt.Errorf("missmatched results")
16 | 
17 | type ValidationResults struct {
18 | 	Total       int
19 | 	Checked     int
20 | 	Missing     int
21 | 	MissMatched int
22 | 	Matched     int
23 | 	Details     []string
24 | }
25 | 
26 | func (v *ValidationResults) String() string {
27 | 	return fmt.Sprintf("Checked %d/%d (%.1f%%) documents; missing=%d mismatched=%d matched=%d",
28 | 		v.Checked, v.Total, (float64(v.Checked)/float64(v.Total))*100.0,
29 | 		v.Missing, v.MissMatched, v.Matched)
30 | }
31 | 
32 | func Validate(ctx context.Context, src *SourceConfig, des *DesConfig, denom int, logger log.Logger, logevery time.Duration) (*ValidationResults, error) {
33 | 	dice := rand.New(rand.NewSource(time.Now().UnixNano()))
34 | 	vr := &ValidationResults{}
35 | 	desIdxUrl := fmt.Sprintf("%s/%s", des.Hosts[0], des.IndexName)
36 | 	srcUrl := fmt.Sprintf("%s/%s", src.Host, src.IndexName)
37 | 
38 | 	// Make sure the totals are the same before we do a bunch of work
39 | 	srccnt, err := esindex.GetDocCount(srcUrl)
40 | 	if err != nil {
41 | 		return vr, fmt.Errorf("error getting src doc count: %v", err)
42 | 	}
43 | 	descnt, err := esindex.GetDocCount(desIdxUrl)
44 | 	if err != nil {
45 | 		return vr, fmt.Errorf("error getting des doc count: %v", err)
46 | 	}
47 | 	if srccnt != descnt {
48 | 		logger.Warnf("Source and target have different document totals: %d vs. %d", srccnt, descnt)
49 | 		vr.Details = []string{fmt.Sprintf("DocCountMissMatch: %d vs. %d", srccnt, descnt)}
50 | 		return vr, ErrMissMatch
51 | 	}
52 | 
53 | 	// Start the scroll first to make sure the source parameter is valid
54 | 	ess := esscroll.New(ctx, srcUrl, src.ScrollTimeout, src.ScrollPage, src.ScrollDocs, src.Filter, logevery, logger)
55 | 	resp, err := ess.Start()
56 | 	if err != nil {
57 | 		return vr, fmt.Errorf("error starting scroll: %v", err)
58 | 	}
59 | 
60 | 	vr.Total = int(resp.Total)
61 | 
62 | 	logger.Infof("Scrolling over %d documents from %v \n", resp.Total, srcUrl)
63 | 
64 | 	for doc := range resp.Hits {
65 | 		if denom == 1 || dice.Intn(denom) == 0 {
66 | 			vr.Checked++
67 | 			diff, err := esdiff.Check(doc, desIdxUrl, logger)
68 | 			if err != nil {
69 | 				return vr, fmt.Errorf("fatal escheck error: %v", err)
70 | 			}
71 | 			switch diff {
72 | 			case "":
73 | 				vr.Matched++
74 | 			case esdiff.DiffMissing:
75 | 				vr.Missing++
76 | 				vr.Details = append(vr.Details, fmt.Sprintf("MissingDoc:%v", doc.ID))
77 | 			default:
78 | 				vr.MissMatched++
79 | 				vr.Details = append(vr.Details, fmt.Sprintf("DocMissMatch:%v", doc.ID))
80 | 			}
81 | 		}
82 | 	}
83 | 	if resp.Err() != nil {
84 | 		return vr, fmt.Errorf("scoll error:%v", resp.Err())
85 | 	}
86 | 
87 | 	if vr.Missing+vr.MissMatched > 0 {
88 | 		return vr, ErrMissMatch
89 | 	}
90 | 	return vr, nil
91 | }
92 | 


--------------------------------------------------------------------------------
/logging/logging.go:
--------------------------------------------------------------------------------
  1 | package logging
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"log"
  6 | 	"os"
  7 | )
  8 | 
  9 | //A logging interface
 10 | type Logger interface {
 11 | 	Debug(v ...interface{})
 12 | 	Debugf(format string, v ...interface{})
 13 | 
 14 | 	Info(v ...interface{})
 15 | 	Infof(format string, v ...interface{})
 16 | 
 17 | 	Warn(v ...interface{})
 18 | 	Warnf(format string, v ...interface{})
 19 | 
 20 | 	Error(v ...interface{})
 21 | 	Errorf(format string, v ...interface{})
 22 | }
 23 | 
 24 | func NewStdLogger(usercolor bool, loglvl int, prefix string) Logger {
 25 | 	logPrefix := map[int]string{
 26 | 		ERROR: "[ERROR] ",
 27 | 		WARN:  "[WARN] ",
 28 | 		INFO:  "[INFO] ",
 29 | 		DEBUG: "[DEBUG] ",
 30 | 	}
 31 | 	postfix := ""
 32 | 
 33 | 	if usercolor {
 34 | 		logColor := map[int]string{
 35 | 			ERROR: "\033[0m\033[31m",
 36 | 			WARN:  "\033[0m\033[33m",
 37 | 			INFO:  "\033[0m\033[35m",
 38 | 			DEBUG: "\033[0m\033[34m",
 39 | 		}
 40 | 
 41 | 		for lvl, color := range logColor {
 42 | 			logPrefix[lvl] = color + logPrefix[lvl]
 43 | 		}
 44 | 
 45 | 		postfix = "\033[0m"
 46 | 	}
 47 | 
 48 | 	l := &stdlogger{
 49 | 		logger:       log.New(os.Stderr, "", log.LstdFlags|log.Lshortfile|log.Lmicroseconds),
 50 | 		LogLevel:     loglvl,
 51 | 		LogLvlPrefix: logPrefix,
 52 | 		LogPrefix:    prefix,
 53 | 		LogPostfix:   postfix,
 54 | 	}
 55 | 
 56 | 	return l
 57 | }
 58 | 
 59 | type stdlogger struct {
 60 | 	logger       *log.Logger
 61 | 	LogLevel     int
 62 | 	LogLvlPrefix map[int]string
 63 | 	LogPrefix    string
 64 | 	LogPostfix   string
 65 | }
 66 | 
 67 | func (l *stdlogger) Debug(v ...interface{}) {
 68 | 	l.logP(DEBUG, v...)
 69 | }
 70 | 
 71 | func (l *stdlogger) Debugf(format string, v ...interface{}) {
 72 | 	l.logPf(DEBUG, format, v...)
 73 | }
 74 | 
 75 | func (l *stdlogger) Info(v ...interface{}) {
 76 | 	l.logP(INFO, v...)
 77 | }
 78 | 
 79 | func (l *stdlogger) Infof(format string, v ...interface{}) {
 80 | 	l.logPf(INFO, format, v...)
 81 | }
 82 | 
 83 | func (l *stdlogger) Warn(v ...interface{}) {
 84 | 	l.logP(WARN, v...)
 85 | }
 86 | 
 87 | func (l *stdlogger) Warnf(format string, v ...interface{}) {
 88 | 	l.logPf(WARN, format, v...)
 89 | }
 90 | 
 91 | func (l *stdlogger) Error(v ...interface{}) {
 92 | 	l.logP(ERROR, v...)
 93 | }
 94 | 
 95 | func (l *stdlogger) Errorf(format string, v ...interface{}) {
 96 | 	l.logPf(ERROR, format, v...)
 97 | }
 98 | 
 99 | func (l *stdlogger) logP(logLvl int, v ...interface{}) {
100 | 	if l.LogLevel >= logLvl && l.logger != nil {
101 | 		l.logger.Output(3,
102 | 			l.LogPrefix+l.LogLvlPrefix[logLvl]+fmt.Sprint(v...)+l.LogPostfix)
103 | 	}
104 | }
105 | 
106 | func (l *stdlogger) logPf(logLvl int, format string, v ...interface{}) {
107 | 	if l.LogLevel >= logLvl && l.logger != nil {
108 | 		l.logger.Output(3,
109 | 			l.LogPrefix+l.LogLvlPrefix[logLvl]+fmt.Sprintf(format, v...)+l.LogPostfix)
110 | 	}
111 | }
112 | 


--------------------------------------------------------------------------------
/logging/shared.go:
--------------------------------------------------------------------------------
 1 | package logging
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"path"
 6 | 	"runtime"
 7 | )
 8 | 
 9 | const (
10 | 	NOLOGGING = -1
11 | 	FATAL     = 0
12 | 	ERROR     = 1
13 | 	WARN      = 2
14 | 	INFO      = 3
15 | 	DEBUG     = 4
16 | )
17 | 
18 | func Whoami(skip int) string {
19 | 	pc, _, ln, ok := runtime.Caller(skip + 1)
20 | 	//pc, file, ln, ok := runtime.Caller(skip+1)
21 | 	if !ok {
22 | 		return "unknown"
23 | 	}
24 | 	funcPc := runtime.FuncForPC(pc)
25 | 	if funcPc == nil {
26 | 		return "unnamed"
27 | 	}
28 | 
29 | 	pathname := funcPc.Name()
30 | 	name := path.Base(pathname)
31 | 	return fmt.Sprintf("%v:%v", name, ln)
32 | }
33 | 


--------------------------------------------------------------------------------