├── qci
├── __init__.py
├── manual_run.sh
├── test_loadgen.ipy
├── test_loadgen2.ipy
├── test_loadgen3.ipy
├── test_endpoint.ipy
├── test_readwrite1.ipy
├── test_readwrite2.ipy
├── test_changedrange.ipy
├── utils.ipy
├── runtests.ipy
└── test_readstat1.ipy
├── .gitignore
├── internal
├── cephprovider
│ ├── test
│ ├── cephprovider.h
│ ├── cephcache.go
│ ├── cephprovider.c
│ └── cephprovider.go
├── bstore
│ ├── bstore.go
│ ├── linker.go
│ ├── blockcache.go
│ ├── blocktypes_test.go
│ ├── bstore_test.go
│ ├── blockstore.go
│ └── blocktypes.go
├── bprovider
│ ├── bprovider.go
│ └── bprovider_test.go
└── fileprovider
│ └── fileprovider.go
├── cpinterface
├── go.capnp
├── interface.capnp
└── cpinterface.go
├── Makefile
├── .project
├── tools
├── addtarget
└── scrub
├── btrdb.conf
├── quasar.conf
├── logconfig.xml
├── README.md
├── btrdbd
├── main.go
└── config.go
├── qtree
├── operators.go
├── qtree_test.go
├── qtree_utils.go
└── qtree2_test.go
├── quasar.go
└── quasar_test.go
/qci/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | bin/*
2 | *.pyc
3 | pkg/*
4 | src/*
5 | *~
6 | .idea
7 | quasar.iml
8 | *.log
9 |
--------------------------------------------------------------------------------
/internal/cephprovider/test:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UlricQin/btrdb/master/internal/cephprovider/test
--------------------------------------------------------------------------------
/cpinterface/go.capnp:
--------------------------------------------------------------------------------
1 | @0xd12a1c51fedd6c88;
2 | annotation package(file) :Text;
3 | annotation import(file) :Text;
4 | annotation doc(struct, field, enum) :Text;
5 | annotation tag(enumerant) : Text;
6 | annotation notag(enumerant) : Void;
7 | annotation customtype(field) : Text;
8 | $package("capn");
9 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 |
2 | bqserver:
3 | go build -o bin/qserver github.com/SoftwareDefinedBuildings/quasar/qserver
4 |
5 | cleanbins:
6 | rm -f bin/qserver bin/qtool
7 |
8 | bins: cleanbins bqserver
9 |
10 | cleandb:
11 | rm -f /srv/quasar/*.db
12 | rm -f /srv/quasartestdb/*
13 | mongo quasar2 --eval 'db.superblocks.remove({})'
14 |
15 | newdbs: cleandb bins
16 | ./bin/qserver -makedb
17 |
--------------------------------------------------------------------------------
/qci/manual_run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash -ex
2 | rm -f profile.*
3 | rm -f log.*
4 | export goversion=go_64_1.4.2
5 | export GOROOT=/srv/$goversion
6 | export GO=$GOROOT/bin/go
7 | mkdir -p gopath
8 | export GOPATH=`pwd`/gopath
9 | export PATH=$PATH:$GOROOT/bin/
10 | git pull
11 | $GO get -v -d ./...
12 | $GO build -a -v -o exe ./qserver
13 | export CEPHTYPE=filestore
14 | export TEST_TYPE=loadgen2
15 | ipython qci/runtests.ipy
16 |
--------------------------------------------------------------------------------
/.project:
--------------------------------------------------------------------------------
1 |
2 |
3 | quasar
4 |
5 |
6 |
7 |
8 |
9 | com.googlecode.goclipse.goBuilder
10 |
11 |
12 |
13 |
14 |
15 | com.googlecode.goclipse.core.goNature
16 |
17 |
18 |
--------------------------------------------------------------------------------
/tools/addtarget:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | if len(sys.argv) != 3:
3 | print "usage: addtarget "
4 | sys.exit(1)
5 |
6 | _client = pymongo.MongoClient()
7 | db = _client[sys.argv[1]][sys.argv[2]]
8 |
9 | outf = open("targets","a")
10 | print >>outf, "#addtargets %s:",sys.argv[3]
11 | for r in db.find({"Path":{"$regex":sys.argv[3]}}):
12 | print >>outf, "# %s : %s" % (r["Metadata"]["SourceName"], r["Path"])
13 | print >>outf, r["uuid"]
14 | printf >>outf, "\n"
15 |
16 | outf.close()
--------------------------------------------------------------------------------
/internal/bstore/bstore.go:
--------------------------------------------------------------------------------
1 | package bstore
2 |
3 | import (
4 | "github.com/op/go-logging"
5 | )
6 |
7 | var lg *logging.Logger
8 |
9 | func init() {
10 | lg = logging.MustGetLogger("log")
11 | }
12 |
13 | //Note to self, if you bump VSIZE such that the max blob goes past 2^16, make sure to adapt
14 | //providers
15 | const (
16 | VSIZE = 1024
17 | KFACTOR = 64
18 | VBSIZE = 2 + 9*VSIZE + 9*VSIZE + 2*VSIZE //Worst case with huffman
19 | CBSIZE = 1 + KFACTOR*9*6
20 | DBSIZE = VBSIZE
21 | PWFACTOR = uint8(6) //1<<6 == 64
22 | RELOCATION_BASE = 0xFF00000000000000
23 | )
24 |
--------------------------------------------------------------------------------
/internal/cephprovider/cephprovider.h:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 |
4 | typedef struct
5 | {
6 | rados_ioctx_t ctx;
7 | rados_completion_t *comps;
8 | int comp_len;
9 | int comp_cap;
10 | } cephprovider_handle_t;
11 |
12 | typedef cephprovider_handle_t* phandle_t;
13 |
14 | void initialize_provider(const char* conffile, const char* pool);
15 | phandle_t handle_create();
16 | void handle_write(phandle_t seg, uint8_t *uuid, uint64_t address, const char* data, int len, int trunc);
17 | uint64_t handle_obtainrange(cephprovider_handle_t *h);
18 | void handle_init_allocator(cephprovider_handle_t *h);
19 | int handle_read(phandle_t seg, uint8_t *uuid, uint64_t address, char* dest, int len);
20 | void handle_close(phandle_t seg);
21 |
--------------------------------------------------------------------------------
/qci/test_loadgen.ipy:
--------------------------------------------------------------------------------
1 | def start_loadgen():
2 | stdout=open("log.lg.stdout","w")
3 | uuids = [str(uuid.uuid4()) for x in xrange(20)]
4 | rc = subprocess.call(["./loadgen", "-i"]+uuids, stdout=stdout, stderr=subprocess.STDOUT)
5 | print "INSERT RV:",rc
6 | if rc != 0:
7 | sys.exit(rc)
8 | sys.stdout.flush()
9 | rc = subprocess.call(["./loadgen", "-v"]+uuids, stdout=stdout, stderr=subprocess.STDOUT)
10 | print "VERIFY RV:",rc
11 | if rc != 0:
12 | sys.exit(rc)
13 |
14 |
15 | p2 = Process(target=start_loadgen)
16 | p2.start()
17 | p2.join()
18 | if p2.exitcode != 0:
19 | print "EXITCODE FROM LOADGEN:", p2.exitcode
20 | os.kill(p.pid, 9)
21 | sys.exit(p2.exitcode)
22 | else:
23 | !rm FAILURE
24 |
--------------------------------------------------------------------------------
/btrdb.conf:
--------------------------------------------------------------------------------
1 | # This is the configuration file for QUASAR version 2
2 | # without this file, it will not start. It should be
3 | # located either in the directory from which quasar is
4 | # started, or in /etc/quasar/quasar.conf
5 |
6 | [storage]
7 | # Either file-based or Ceph-based storage can be used
8 | provider=file
9 | filepath=/srv/quasar/
10 |
11 | #provider=ceph
12 | #cephconf=/etc/ceph/ceph.conf
13 | #cephpool=data
14 |
15 | [http]
16 | enabled=true
17 | port=9000
18 | address=0.0.0.0
19 |
20 | [capnp]
21 | enabled=true
22 | port=4410
23 | address=0.0.0.0
24 |
25 | [mongo]
26 | server=localhost
27 | collection=quasar
28 |
29 | [cache]
30 | # Configure the RADOS and block caches. If you have a choice, rather
31 | # spend memory on the block cache.
32 |
33 | # This is measured in blocks, which are at most ~16K
34 | # blockcache=4000000 #64 GB
35 | blockcache=2000000 #32 GB
36 | # blockcache=1000000 #16 GB
37 | # blockcache=500000 #8 GB
38 | # blockcache=250000 #4 GB
39 | # blockcache=62500 #1 GB
40 |
41 | radosreadcache=2048 #in MB
42 | radoswritecache=256 #in MB
43 |
44 | [coalescence]
45 | earlytrip=16384 #readings
46 | interval=5000 #ms
47 |
--------------------------------------------------------------------------------
/quasar.conf:
--------------------------------------------------------------------------------
1 | # This is the configuration file for QUASAR version 2
2 | # without this file, it will not start. It should be
3 | # located either in the directory from which quasar is
4 | # started, or in /etc/quasar/quasar.conf
5 |
6 | [storage]
7 | # Either file-based or Ceph-based storage can be used
8 | provider=file
9 | filepath=/srv/quasar/
10 |
11 | #provider=ceph
12 | #cephconf=/etc/ceph/ceph.conf
13 | #cephpool=data
14 |
15 | [http]
16 | enabled=true
17 | port=9000
18 | address=0.0.0.0
19 |
20 | [capnp]
21 | enabled=true
22 | port=4410
23 | address=0.0.0.0
24 |
25 | [mongo]
26 | server=localhost
27 | collection=quasar
28 |
29 | [cache]
30 | # Configure the RADOS and block caches. If you have a choice, rather
31 | # spend memory on the block cache.
32 |
33 | # This is measured in blocks, which are at most ~16K
34 | # blockcache=4000000 #64 GB
35 | blockcache=2000000 #32 GB
36 | # blockcache=1000000 #16 GB
37 | # blockcache=500000 #8 GB
38 | # blockcache=250000 #4 GB
39 | # blockcache=62500 #1 GB
40 |
41 | radosreadcache=2048 #in MB
42 | radoswritecache=256 #in MB
43 |
44 | [coalescence]
45 | earlytrip=16384 #readings
46 | interval=5000 #ms
47 |
--------------------------------------------------------------------------------
/qci/test_loadgen2.ipy:
--------------------------------------------------------------------------------
1 | import random
2 | import uuid
3 | import subprocess
4 | import sys
5 |
6 | num_streams = 1
7 | def start_loadgen():
8 | global num_streams
9 | cf = open("loadConfig.ini", "w")
10 | random_seed = random.randint(0,10000)
11 | print "USING RANDOM SEED ", random_seed
12 | cf.write("""TOTAL_RECORDS=24000000
13 | TCP_CONNECTIONS={0}
14 | POINTS_PER_MESSAGE=5000
15 | NANOS_BETWEEN_POINTS=9000000
16 | MAX_TIME_RANDOM_OFFSET = 8999999
17 | FIRST_TIME=1420582220083869629
18 | DB_ADDR=localhost:4410
19 | NUM_STREAMS={0}
20 | RAND_SEED={1}
21 | """.format(num_streams, random_seed)
22 | )
23 |
24 | for i in xrange(num_streams):
25 | cf.write("UUID%d=%s\n" % (i+1, uuid.uuid4()))
26 | cf.close()
27 | stdout=open("log.lg.1.stdout","w")
28 | rc = subprocess.call(["./loadgen", "-i"], stdout=stdout, stderr=subprocess.STDOUT)
29 | print "INSERT RV:",rc
30 | if rc != 0:
31 | sys.exit(rc)
32 | sys.stdout.flush()
33 | term_quasar()
34 | time.sleep(2)
35 | proc_profiles("ins")
36 | start_quasar()
37 | time.sleep(10)
38 | stdout2=open("log.lg.2.stdout","w")
39 | rc = subprocess.call(["./loadgen", "-v"], stdout=stdout2, stderr=subprocess.STDOUT)
40 | print "VERIFY RV:",rc
41 | if rc != 0:
42 | sys.exit(rc)
43 | !rm FAILURE
44 |
45 | start_loadgen()
46 |
--------------------------------------------------------------------------------
/qci/test_loadgen3.ipy:
--------------------------------------------------------------------------------
1 | import random
2 | import uuid
3 | import subprocess
4 | import sys
5 | import time
6 | num_streams = 1
7 | def start_loadgen():
8 | global num_streams
9 | cf = open("loadConfig.ini", "w")
10 | random_seed = random.randint(0,10000)
11 | print "USING RANDOM SEED ", random_seed
12 | cf.write("""TOTAL_RECORDS=24000000
13 | TCP_CONNECTIONS={0}
14 | POINTS_PER_MESSAGE=5000
15 | NANOS_BETWEEN_POINTS=9000000
16 | MAX_TIME_RANDOM_OFFSET = 8999999
17 | FIRST_TIME=1420582220083869629
18 | DB_ADDR=localhost:4410
19 | NUM_STREAMS={0}
20 | RAND_SEED={1}
21 | """.format(num_streams, random_seed)
22 | )
23 |
24 | for i in xrange(num_streams):
25 | cf.write("UUID%d=%s\n" % (i+1, uuid.uuid4()))
26 | cf.close()
27 | stdout=open("log.lg.stdout1","w")
28 | rc = subprocess.call(["./loadgen", "-i"], stdout=stdout, stderr=subprocess.STDOUT)
29 | print "INSERT RV:",rc
30 | if rc != 0:
31 | sys.exit(rc)
32 | sys.stdout.flush()
33 | term_quasar()
34 | time.sleep(2)
35 | proc_profiles("inst")
36 | start_quasar()
37 | time.sleep(4)
38 | stdout2=open("log.lg.stdout2","w")
39 | rc = subprocess.call(["./loadgen", "-d"], stdout=stdout2, stderr=subprocess.STDOUT)
40 | print "DELETE RV:",rc
41 | if rc != 0:
42 | sys.exit(rc)
43 | !rm FAILURE
44 |
45 |
46 | start_loadgen()
47 |
--------------------------------------------------------------------------------
/qci/test_endpoint.ipy:
--------------------------------------------------------------------------------
1 | import random
2 | import uuid
3 | import subprocess
4 | import sys
5 | import time
6 | import json
7 | def start_loadgen():
8 | global num_streams
9 | cf = open("loadConfig.ini", "w")
10 | cf.write("""TOTAL_RECORDS=120000000
11 | TCP_CONNECTIONS=1
12 | POINTS_PER_MESSAGE=5000
13 | NANOS_BETWEEN_POINTS=9000000
14 | MAX_TIME_RANDOM_OFFSET = 8999999
15 | FIRST_TIME=1420582220083869629
16 | DB_ADDR=localhost:4410
17 | NUM_STREAMS=1
18 | UUID1=9f67541c-95ee-11e4-a7ac-0026b6df9cf2
19 | RAND_SEED=15
20 | """)
21 | cf.close()
22 | stdout=open("log.lg.stdout1","w")
23 | rc = subprocess.call(["./loadgen", "-i"], stdout=stdout, stderr=subprocess.STDOUT)
24 | print "INSERT RV:",rc
25 | if rc != 0:
26 | sys.exit(rc)
27 | sys.stdout.flush()
28 | term_quasar()
29 | time.sleep(2)
30 | proc_profiles("inst")
31 | start_quasar()
32 | time.sleep(4)
33 |
34 | # Check whether we have extra points
35 | dstr = !curl -s "http://localhost:9000/data/uuid/9f67541c-95ee-11e4-a7ac-0026b6df9cf2?starttime=1421395993269633024&endtime=1421455504336486400&unitoftime=ns&pw=37"
36 | data = json.loads(dstr[0])
37 | lastpoint = data[0]["XReadings"][-1]
38 | lasttime = (lastpoint[0] * 1000000) + lastpoint[1]
39 | if lasttime != (1421455504336486400 - (2 ** 37)):
40 | print "Extra or missing points detected at end of statistical query"
41 | print "last time:", lasttime
42 | sys.exit(1)
43 | !rm FAILURE
44 |
45 |
46 | start_loadgen()
47 |
--------------------------------------------------------------------------------
/logconfig.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | stdout
4 | console
5 |
6 | DEBUG
7 |
8 |
9 | file
10 | file
11 | FINEST
12 | quasar.log
13 | [%D %T] [%L] (%S) %M
14 | false
15 | 20M
16 | 0K
17 | true
18 |
19 |
20 | file
21 | file
22 | WARNING
23 | quasar.serious.log
24 | [%D %T] [%L] (%S) %M
25 | false
26 | 20M
27 | 0K
28 | true
29 |
30 |
31 |
--------------------------------------------------------------------------------
/qci/test_readwrite1.ipy:
--------------------------------------------------------------------------------
1 |
2 | import qdf
3 | import qdf.quasar
4 | import sys
5 | import random
6 | import uuid
7 | import time
8 | from twisted.internet import defer, protocol, reactor
9 | print "entered test readwrite1"
10 | EXIT_CODE = None
11 | def setexit(code):
12 | global EXIT_CODE
13 | EXIT_CODE = code
14 | reactor.stop()
15 |
16 | @defer.inlineCallbacks
17 | def testbody(db):
18 | print "connected"
19 | TOTALPOINTS = 1000000
20 | PERINSERT = 1000
21 | INTERVAL = int(1E9/120.)
22 | UID = str(uuid.uuid4())
23 | randomdata = [(x*INTERVAL, random.random()) for x in xrange(TOTALPOINTS)]
24 | idx = 0
25 | print "random data generated"
26 | for i in xrange(TOTALPOINTS/PERINSERT):
27 | yield db.insertValues(UID, randomdata[idx:idx+PERINSERT])
28 | idx += PERINSERT
29 | time.sleep(20)
30 | readdata = []
31 | idx = 0
32 | print "reading data"
33 | for i in xrange(TOTALPOINTS/PERINSERT):
34 | (status, rv) = yield db.queryStandardValues(UID, i*INTERVAL*PERINSERT, (i+1)*INTERVAL*PERINSERT)
35 | (version, values) = rv
36 | readdata += [(v.time, v.value) for v in values]
37 | print "len readdata:",len(readdata)
38 | print "len insert:",len(randomdata)
39 | for i in xrange(len(randomdata)):
40 | if randomdata[i][0] != readdata[i][0]:
41 | print "time mismatch index",i
42 | break
43 | if randomdata[i][1] != readdata[i][1]:
44 | print "value mismatch index",i
45 | break
46 | else:
47 | print "lists match"
48 | setexit(0)
49 | return
50 | setexit(1)
51 | return
52 |
53 | def onFail(param):
54 | print "Encountered error: ", param
55 | setexit(2)
56 |
57 | def entrypoint():
58 | print "in entrypoint"
59 | try:
60 | q = qdf.quasar.connectToArchiver("localhost", 4410)
61 | q.addCallback(testbody)
62 | q.addErrback(onFail)
63 | except Exception as e:
64 | print "ex: ",e
65 | setexit(1)
66 |
67 | reactor.callWhenRunning(entrypoint)
68 | reactor.run()
69 | if EXIT_CODE == None:
70 | EXIT_CODE = 42
71 | if EXIT_CODE != 0:
72 | sys.exit(EXIT_CODE)
73 | else:
74 | !rm FAILURE
75 |
--------------------------------------------------------------------------------
/tools/scrub:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import pymongo
3 | import uuid
4 | import rados
5 | import sys
6 | import time
7 |
8 | if len(sys.argv) != 4:
9 | print "usage: scrub "
10 | sys.exit(1)
11 |
12 | _client = pymongo.MongoClient()
13 | db = _client[sys.argv[2]].superblocks
14 |
15 |
16 | uuids = []
17 | #Get target uuids
18 | with open(sys.argv[3],"r") as uf:
19 | for l in uf.readlines():
20 | l = l.strip()
21 | if l.startswith("#") or len(l) == 0:
22 | continue
23 | uuids.append(uuid.UUID(l))
24 |
25 | #Get all metadata uuids
26 | known_uuids = [uuid.UUID(x) for x in db.distinct("uuid")]
27 | print "There are %d known uuids" % len(known_uuids)
28 |
29 | #Get all object names for these uuids
30 | cluster = rados.Rados(conffile="/etc/ceph/ceph.conf")
31 | print "Will attempt to connect to: " + str(cluster.conf_get('mon initial members'))
32 |
33 | cluster.connect()
34 | time.sleep(1)
35 | for i in xrange(10):
36 | try:
37 | cluster.require_state("connected")
38 | break
39 | except rados.RadosStateError as e:
40 | print e
41 | print "Not connected yet"
42 | time.sleep(1)
43 |
44 | ioctx = cluster.open_ioctx(sys.argv[1])
45 | obj_iter = ioctx.list_objects()
46 |
47 | rogue_uuids = set()
48 | toremove = []
49 | total = 0
50 | for obj in obj_iter:
51 | if obj.key == "allocator":
52 | continue
53 | total += 1
54 | uid = uuid.UUID(obj.key[:32])
55 | if uid not in known_uuids:
56 | rogue_uuids.add(uid)
57 | if uid in uuids:
58 | toremove.append(obj.key)
59 | if total != 0:
60 | print "A total of %d objects matched (%.2f%%)" % (len(toremove), (float(len(toremove))/total)*100)
61 | else:
62 | print "No objects"
63 | print "There are %d rogue uuids" % len(rogue_uuids)
64 |
65 | print "If you wish to continue and delete the quasar objects, type 'yes i really do' exactly"
66 | inp = raw_input(">")
67 | if inp != "yes i really do":
68 | print "Aborting"
69 | sys.exit(1)
70 |
71 | for key in toremove:
72 | print "Removing: ",key
73 | ioctx.remove_object(key)
74 |
75 | print "If you wish to continue and delete the metadata, type 'yes I really do' exactly"
76 | inp = raw_input(">")
77 | if inp != "yes I really do":
78 | print "Aborting"
79 | sys.exit(1)
80 |
81 | for u in uuids:
82 | print "Removing: ", str(u)
83 | rv = db.remove({"uuid":str(u)})
84 | print "OK, %d generations nuked" % rv["n"]
85 |
86 | print "Success"
87 |
--------------------------------------------------------------------------------
/internal/bprovider/bprovider.go:
--------------------------------------------------------------------------------
1 | package bprovider
2 |
3 | //A blob provider implements a simple interface for storing blobs
4 | //An address base gets locked in the form of a segment, and then an arbitrary number of
5 | //blobs are written sequentially from that base, with each write call returning the address
6 | //of the base of the next write. At the end, the segment is unlocked.
7 | //For reading, the blob provider needs to work out its own framing, as it gets given
8 | //a start address and must magically return the blob corresponding to that address
9 | //The addresses have no special form*, other than being uint64s. It is up to the provider
10 | //to encode whatever metadata it requires inside that uint64
11 |
12 | //*I lied, addresses must not have the top byte as FF, those are reserved for relocation addresses
13 |
14 | //In case it is not obvious, the challenge a bprovider faces is being able to hand out an address
15 | //and support an arbitrary sized blob being written to that address. At the moment the max size of
16 | //a blob can be determined by max(CBSIZE, VBSIZE) which is under 32k, but may be as little as 1k
17 | //for well compressed blocks.
18 |
19 | import (
20 | "errors"
21 | )
22 |
23 | var ErrNoSpace = errors.New("No more space")
24 | var ErrInvalidArgument = errors.New("Invalid argument")
25 | var ErrExists = errors.New("File exists")
26 |
27 | type Segment interface {
28 | //Returns the address of the first free word in the segment when it was locked
29 | BaseAddress() uint64
30 |
31 | //Unlocks the segment for the StorageProvider to give to other consumers
32 | //Implies a flush
33 | Unlock()
34 |
35 | //Writes a slice to the segment, returns immediately
36 | //Returns nil if op is OK, otherwise ErrNoSpace or ErrInvalidArgument
37 | //It is up to the implementer to work out how to report no space immediately
38 | //The uint64 is the address to be used for the next write
39 | Write(uuid []byte, address uint64, data []byte) (uint64, error)
40 |
41 | //Block until all writes are complete. Note this does not imply a flush of the underlying files.
42 | Flush()
43 | }
44 | type StorageProvider interface {
45 |
46 | //Called at startup of a normal run
47 | Initialize(opts map[string]string)
48 |
49 | //Called to create the database for the first time
50 | //Note that initialize is not called before this function call
51 | //and you can assume the program will exit shortly after this
52 | //function call
53 | CreateDatabase(opts map[string]string) error
54 |
55 | // Lock a segment, or block until a segment can be locked
56 | // Returns a Segment struct
57 | LockSegment(uuid []byte) Segment
58 |
59 | // Read the blob into the given buffer
60 | Read(uuid []byte, address uint64, buffer []byte) []byte
61 | }
62 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | BTrDB
2 | =====
3 |
4 | The Berkeley TRee DataBase is a high performance time series
5 | database designed to support high density data storage applications.
6 | This project used to be called QUASAR, but we have changed the name
7 | partly to match publications, and partly as a flag day. The capnp interface
8 | in BTrDB is designed to better support large queries and clusters and is not
9 | backwards compatible with the quasar interface.
10 |
11 | ### Dependencies
12 |
13 | BTrDB uses a MongoDB collection to store metadata. Also, if installed in High Availability
14 | mode, it requires a ceph pool. Note that even if not using ceph, librados needs to be
15 | installed.
16 |
17 | ### Installation
18 |
19 | To run an archiver, make sure that you have Go >= 1.4 installed and then
20 | run the following:
21 |
22 | ```
23 | apt-get install librados-dev
24 | go get github.com/SoftwareDefinedBuildings/btrdb/btrdbd
25 | ```
26 |
27 | This will install the tools into your
28 | $GOPATH/bin directory. If you have this directory on your $PATH then you do
29 | not need to do anything further. Otherwise you will need to add the binaries
30 | to your $PATH variable manually.
31 |
32 | Note that in order to run the btrdb server, you will need to copy btrdb.conf
33 | from the github repository to /etc/btrdb/btrdb.conf (or the directory that
34 | you are in).
35 |
36 | An alternative to 'go get'ing to your GOPATH is to clone the repository then do:
37 |
38 | ```
39 | apt-get install librados-dev
40 | go get -d ./... && go install ./btrdbd
41 | ```
42 |
43 | This will also put the btrdbd binary in your $GOPATH/bin.
44 |
45 | ### Configuration
46 |
47 | Sensible defaults (for a production deployment) are already found in btrdb.conf. Some things you may need
48 | to adjust:
49 | - The MongoDB server and collection name
50 | - The block cache size (defaults to 32GB). Note that quasar uses more than this, this is just
51 | a primary contributor to the RAM footprint.
52 | - The file storage path or ceph details
53 |
54 | Once your configuration is set up, you can set up the files, and database indices with
55 |
56 | ```
57 | btrdbd -makedb
58 | ```
59 |
60 | Which should print out:
61 | ```
62 | Configuration OK!
63 | Creating a new database
64 | Done
65 | ```
66 |
67 | You can now run a server with:
68 | ```
69 | btrdbd
70 | ```
71 |
72 | ### Using the database
73 |
74 | Note that we are presently working on release engineering, and hope to release the first (public) version in August 2016. If you are using it now, bear in mind it is still in development.
75 |
76 | To communicate with the database, there are [go bindings](https://github.com/SoftwareDefinedBuildings/btrdb-go) and [python bindings](https://github.com/SoftwareDefinedBuildings/btrdb-python). The go bindings are faster and more maintained.
77 |
78 |
79 |
80 |
--------------------------------------------------------------------------------
/internal/bstore/linker.go:
--------------------------------------------------------------------------------
1 | package bstore
2 |
3 | import (
4 | "log"
5 | "sort"
6 | "sync"
7 |
8 | "github.com/SoftwareDefinedBuildings/btrdb/internal/bprovider"
9 | )
10 |
11 | var ser_buf_pool = sync.Pool{
12 | New: func() interface{} {
13 | return make([]byte, DBSIZE)
14 | },
15 | }
16 |
17 | type pCBArr []*Coreblock
18 |
19 | func (dca pCBArr) Len() int {
20 | return len(dca)
21 | }
22 |
23 | func (dca pCBArr) Swap(i, j int) {
24 | dca[i], dca[j] = dca[j], dca[i]
25 | }
26 |
27 | func (dca pCBArr) Less(i, j int) bool {
28 | return dca[i].PointWidth < dca[j].PointWidth
29 | }
30 |
31 | func LinkAndStore(uuid []byte, bs *BlockStore, bp bprovider.StorageProvider, vblocks []*Vectorblock, cblocks []*Coreblock) map[uint64]uint64 {
32 | loaned_sercbufs := make([][]byte, len(cblocks))
33 | loaned_servbufs := make([][]byte, len(vblocks))
34 |
35 | //First sort the vblock array (time before lock costs less)
36 | sort.Sort(pCBArr(cblocks))
37 |
38 | //Then lets lock a segment
39 | seg := bp.LockSegment(uuid)
40 |
41 | backpatch := make(map[uint64]uint64, len(cblocks)+len(vblocks)+1)
42 | backpatch[0] = 0 //Null address is still null
43 |
44 | ptr := seg.BaseAddress()
45 |
46 | //First step is to write all the vector blocks, order is not important
47 | for i := 0; i < len(vblocks); i++ {
48 | vb := vblocks[i]
49 |
50 | //Store relocation for cb backpatch
51 | backpatch[vb.Identifier] = ptr
52 |
53 | //Update the block. VB should now look as if it were read from disk
54 | vb.Identifier = ptr
55 | //So we can cache it
56 | bs.cachePut(ptr, vb)
57 |
58 | //Now write it
59 | serbuf := ser_buf_pool.Get().([]byte)
60 | cutdown := vb.Serialize(serbuf)
61 | loaned_servbufs[i] = serbuf
62 | nptr, err := seg.Write(uuid, ptr, cutdown)
63 | if err != nil {
64 | log.Panicf("Got error on segment write: %v", err)
65 | }
66 | ptr = nptr
67 | }
68 |
69 | //Now we need to write the coreblocks out
70 | for i := 0; i < len(cblocks); i++ {
71 | cb := cblocks[i]
72 |
73 | //Relocate and backpatch
74 | for k := 0; k < KFACTOR; k++ {
75 | if cb.Addr[k] < RELOCATION_BASE {
76 | continue
77 | }
78 | nval, ok := backpatch[cb.Addr[k]]
79 | if !ok {
80 | log.Panicf("Failed to backpatch! (trying to find addr 0x%016x)", cb.Addr[k])
81 | }
82 | cb.Addr[k] = nval
83 | }
84 | backpatch[cb.Identifier] = ptr
85 | cb.Identifier = ptr
86 | bs.cachePut(ptr, cb)
87 |
88 | serbuf := ser_buf_pool.Get().([]byte)
89 | cutdown := cb.Serialize(serbuf)
90 | loaned_sercbufs[i] = serbuf
91 | nptr, err := seg.Write(uuid, ptr, cutdown)
92 | if err != nil {
93 | log.Panicf("Got error on segment write: %v", err)
94 | }
95 | ptr = nptr
96 | }
97 | seg.Unlock()
98 | //Return buffers to pool
99 | for _, v := range loaned_sercbufs {
100 | ser_buf_pool.Put(v)
101 | }
102 | for _, v := range loaned_servbufs {
103 | ser_buf_pool.Put(v)
104 | }
105 | return backpatch
106 | }
107 |
--------------------------------------------------------------------------------
/internal/bstore/blockcache.go:
--------------------------------------------------------------------------------
1 | package bstore
2 |
3 | import (
4 | "time"
5 | )
6 |
7 | type CacheItem struct {
8 | val Datablock
9 | vaddr uint64
10 | newer *CacheItem
11 | older *CacheItem
12 | }
13 |
14 | func (bs *BlockStore) initCache(size uint64) {
15 | bs.cachemax = size
16 | bs.cachemap = make(map[uint64]*CacheItem, size)
17 | go func() {
18 | for {
19 | lg.Info("Cachestats: %d misses, %d hits, %.2f %%",
20 | bs.cachemiss, bs.cachehit, (float64(bs.cachehit*100) / float64(bs.cachemiss+bs.cachehit)))
21 | time.Sleep(5 * time.Second)
22 | }
23 | }()
24 | }
25 |
26 | //This function must be called with the mutex held
27 | func (bs *BlockStore) cachePromote(i *CacheItem) {
28 | if bs.cachenew == i {
29 | //Already at front
30 | return
31 | }
32 | if i.newer != nil {
33 | i.newer.older = i.older
34 | }
35 | if i.older != nil {
36 | i.older.newer = i.newer
37 | }
38 | if bs.cacheold == i && i.newer != nil {
39 | //This was the tail of a list longer than 1
40 | bs.cacheold = i.newer
41 | } else if bs.cacheold == nil {
42 | //This was/is the only item in the list
43 | bs.cacheold = i
44 | }
45 |
46 | i.newer = nil
47 | i.older = bs.cachenew
48 | if bs.cachenew != nil {
49 | bs.cachenew.newer = i
50 | }
51 | bs.cachenew = i
52 | }
53 | func (bs *BlockStore) cachePut(vaddr uint64, item Datablock) {
54 | if bs.cachemax == 0 {
55 | return
56 | }
57 | bs.cachemtx.Lock()
58 | i, ok := bs.cachemap[vaddr]
59 | if ok {
60 | bs.cachePromote(i)
61 | } else {
62 | i = &CacheItem{
63 | val: item,
64 | vaddr: vaddr,
65 | }
66 | bs.cachemap[vaddr] = i
67 | bs.cachePromote(i)
68 | bs.cachelen++
69 | bs.cacheCheckCap()
70 | }
71 | bs.cachemtx.Unlock()
72 | }
73 |
74 | func (bs *BlockStore) cacheGet(vaddr uint64) Datablock {
75 | if bs.cachemax == 0 {
76 | bs.cachemiss++
77 | return nil
78 | }
79 | bs.cachemtx.Lock()
80 | rv, ok := bs.cachemap[vaddr]
81 | if ok {
82 | bs.cachePromote(rv)
83 | }
84 | bs.cachemtx.Unlock()
85 | if ok {
86 | bs.cachehit++
87 | return rv.val
88 | } else {
89 | bs.cachemiss++
90 | return nil
91 | }
92 | }
93 |
94 | //debug function
95 | func (bs *BlockStore) walkCache() {
96 | fw := 0
97 | bw := 0
98 | it := bs.cachenew
99 | for {
100 | if it == nil {
101 | break
102 | }
103 | fw++
104 | if it.older == nil {
105 | lg.Info("fw walked to end, compare %p/%p", it, bs.cacheold)
106 | }
107 | it = it.older
108 | }
109 | it = bs.cacheold
110 | for {
111 | if it == nil {
112 | break
113 | }
114 | bw++
115 | if it.newer == nil {
116 | lg.Info("bw walked to end, compare %p/%p", it, bs.cachenew)
117 | }
118 | it = it.newer
119 | }
120 | lg.Info("Walked cache fw=%v, bw=%v, map=%v", fw, bw, len(bs.cachemap))
121 | }
122 |
123 | //This must be called with the mutex held
124 | func (bs *BlockStore) cacheCheckCap() {
125 | for bs.cachelen > bs.cachemax {
126 | i := bs.cacheold
127 | delete(bs.cachemap, i.vaddr)
128 | if i.newer != nil {
129 | i.newer.older = nil
130 | }
131 | bs.cacheold = i.newer
132 | bs.cachelen--
133 | }
134 | }
135 |
--------------------------------------------------------------------------------
/qci/test_readwrite2.ipy:
--------------------------------------------------------------------------------
1 |
2 | import qdf
3 | import qdf.quasar
4 | import sys
5 | import random
6 | import uuid
7 | import time
8 | from twisted.internet import defer, protocol, reactor
9 | print "entered test readwrite1"
10 | EXIT_CODE = None
11 | def setexit(code):
12 | global EXIT_CODE
13 | EXIT_CODE = code
14 | reactor.stop()
15 |
16 | @defer.inlineCallbacks
17 | def testbody(db):
18 | print "connected"
19 | TOTALPOINTS = 1000000
20 | PERINSERT = 1000
21 | INTERVAL = int(1E9/120.)
22 | UID = str(uuid.uuid4())
23 | randomdata = [(x*INTERVAL, random.random()) for x in xrange(TOTALPOINTS)]
24 | idx = 0
25 | print "random data generated"
26 | for i in xrange(TOTALPOINTS/PERINSERT):
27 | yield db.insertValues(UID, randomdata[idx:idx+PERINSERT])
28 | idx += PERINSERT
29 | time.sleep(20)
30 | readdata = []
31 | idx = 0
32 | print "reading data"
33 | for i in xrange(TOTALPOINTS/PERINSERT):
34 | (status, rv) = yield db.queryStandardValues(UID, i*INTERVAL*PERINSERT, (i+1)*INTERVAL*PERINSERT)
35 | (version, values) = rv
36 | readdata += [(v.time, v.value) for v in values]
37 | print "len readdata:",len(readdata)
38 | print "len insert:",len(randomdata)
39 | for i in xrange(len(randomdata)):
40 | if randomdata[i][0] != readdata[i][0]:
41 | print "time mismatch index",i
42 | break
43 | if randomdata[i][1] != readdata[i][1]:
44 | print "value mismatch index",i
45 | break
46 | else:
47 | print "lists match"
48 | #delete middle 1/3 of data
49 | st = randomdata[len(randomdata)/3][0]
50 | et = randomdata[2*len(randomdata)/3][0]
51 | (status, rv) = yield db.deleteRange(UID, st, et)
52 | #also delete it from our data
53 | ndat = randomdata[0:len(randomdata)/3] #exlusive
54 | ndat += randomdata[2*len(randomdata)/3:] #inclusive
55 |
56 | readdata = []
57 | print "reading data AFTER DELETE"
58 | for i in xrange(TOTALPOINTS/PERINSERT):
59 | (status, rv) = yield db.queryStandardValues(UID, i*INTERVAL*PERINSERT, (i+1)*INTERVAL*PERINSERT)
60 | (version, values) = rv
61 | readdata += [(v.time, v.value) for v in values]
62 | print "len readdata:",len(readdata)
63 | print "len insert:",len(ndat)
64 | odataskip = randomdata[666664:666668]
65 | print "odataskip:",odataskip
66 | for i in xrange(len(ndat)):
67 | if ndat[i][0] != readdata[i][0]:
68 | print "time mismatch index",i
69 | break
70 | if ndat[i][1] != readdata[i][1]:
71 | print "value mismatch index",i
72 | print "received",readdata[i][1]
73 | print "expected",ndat[i][1]
74 | print "nearby expected", ndat[i-2:i+2]
75 | print "nearby received", readdata[i-2:i+2]
76 | print "nearby ODAT", randomdata[i-2:i+2]
77 | break
78 | else:
79 | print "lists match"
80 | setexit(0)
81 | return
82 |
83 | setexit(1)
84 | return
85 |
86 | def onFail(param):
87 | print "Encountered error: ", param
88 | setexit(2)
89 |
90 | def entrypoint():
91 | print "in entrypoint"
92 | try:
93 | q = qdf.quasar.connectToArchiver("localhost", 4410)
94 | q.addCallback(testbody)
95 | q.addErrback(onFail)
96 | except Exception as e:
97 | print "ex: ",e
98 | setexit(1)
99 |
100 | reactor.callWhenRunning(entrypoint)
101 | reactor.run()
102 | if EXIT_CODE == None:
103 | EXIT_CODE = 42
104 | if EXIT_CODE != 0:
105 | sys.exit(EXIT_CODE)
106 | else:
107 | !rm FAILURE
108 |
--------------------------------------------------------------------------------
/qci/test_changedrange.ipy:
--------------------------------------------------------------------------------
1 |
2 | import qdf
3 | import qdf.quasar
4 | import sys
5 | import random
6 | import uuid
7 | import time
8 | from twisted.internet import defer, protocol, reactor
9 | print "entered test changedrange"
10 | EXIT_CODE = None
11 | def setexit(code):
12 | global EXIT_CODE
13 | EXIT_CODE = code
14 | reactor.stop()
15 |
16 | @defer.inlineCallbacks
17 | def testbody(db):
18 | print "connected"
19 | TOTALPOINTS = 1000000
20 | PERINSERT = 1000
21 | INTERVAL = int(1E9/120.)
22 | UID = str(uuid.uuid4())
23 | OFFSET = random.randrange(100,1000000000000)
24 | randomdata = [(OFFSET + x*INTERVAL, random.random()) for x in xrange(TOTALPOINTS)]
25 | e_t = randomdata[-1][0]
26 | s_t = OFFSET
27 | print "SET: ", randomdata[0], randomdata[-1]
28 | print "e_t:", e_t
29 | print "s_t:", s_t
30 | idx = 0
31 | print "random data generated"
32 | for i in xrange(TOTALPOINTS/PERINSERT):
33 | yield db.insertValues(UID, randomdata[idx:idx+PERINSERT])
34 | idx += PERINSERT
35 |
36 | #immediate query
37 | srep = []
38 | (status, rv) = yield db.queryStatisticalValues(UID, 0, (1<<55), 55)
39 | print "status: ", status
40 | (version, values) = rv
41 | for v in values:
42 | srep.append([v.time, v.min, v.mean, v.max, v.count])
43 | print "preflush:", srep
44 | print "version:", version
45 | #preflush_count = srep[0][4]
46 | preflush_count = 0
47 | print "flushing"
48 | yield db.flush(UID)
49 |
50 | srep = []
51 | (status, rv) = yield db.queryStatisticalValues(UID, 0, (1<<55), 55)
52 | print "status: ", status
53 | (version, values) = rv
54 | for v in values:
55 | srep.append([v.time, v.min, v.mean, v.max, v.count])
56 |
57 | #postflush_count = srep[0][4]
58 | print "postflush:", srep
59 | print "version:", version
60 |
61 | print "flushing2"
62 | yield db.flush(UID)
63 |
64 | srep = []
65 | (status, rv) = yield db.queryStatisticalValues(UID, 0, (1<<55), 55)
66 | print "status2: ", status
67 | (version, values) = rv
68 | for v in values:
69 | srep.append([v.time, v.min, v.mean, v.max, v.count])
70 |
71 | #postflush_count = srep[0][4]
72 | print "postflush2:", srep
73 | print "version2:", version
74 |
75 | #print "prepost counts: ",preflush_count, postflush_count
76 |
77 | def expected_cr(st, et, res):
78 | return st & ~((1<>>> USING %v AS SEED <<<<<", sd)
12 | rand.Seed(sd)
13 | }
14 |
15 | func Test_DeCompose(t *testing.T) {
16 | for i := 0; i < 16; i++ {
17 | x := rand.Float64()
18 | packed_m, packed_e := decompose(x)
19 | //log.Warning("x= %v m=%v e=%v",x, packed_m, packed_e)
20 | rv := recompose(packed_m, packed_e)
21 | if rv != x {
22 | t.Errorf("Number did not convert: +v", x)
23 | }
24 | }
25 | for i := 0; i < 10000000; i++ {
26 | x := rand.Float64()
27 | packed_m, packed_e := decompose(x)
28 | rv := recompose(packed_m, packed_e)
29 | if rv != x {
30 | t.Errorf("Number did not convert: +v", x)
31 | }
32 | }
33 | }
34 |
35 | func Test_2DeCompose(t *testing.T) {
36 | log.Warning("testing")
37 | for i := 0; i < 16; i++ {
38 | x := float64(i * 100000.0)
39 | packed_m, packed_e := decompose(x)
40 | rv := recompose(packed_m, packed_e)
41 | if rv != x {
42 | t.Errorf("Number did not convert: exp %v got %v", x, rv)
43 | }
44 | }
45 | }
46 |
47 | func Test_CB1(t *testing.T) {
48 | c := new(Coreblock)
49 | for i := 0; i < KFACTOR; i++ {
50 | c.Addr[i] = uint64(i + 1)
51 | }
52 | sarr := make([]byte, CBSIZE)
53 | donearr := c.Serialize(sarr)
54 | cn := new(Coreblock)
55 | cn.Deserialize(donearr)
56 | if !CompareNoTags(*c, *cn, []string{"implicit"}) {
57 | t.Error("Core block SERDES faled")
58 | }
59 | }
60 |
61 | func Test_Pack1(t *testing.T) {
62 | tst := func(x uint64) int {
63 | b := make([]byte, 9)
64 | ln := writeUnsignedHuff(b, x)
65 | for i := ln; i < 9; i++ {
66 | if b[i] != 0 {
67 | t.Errorf("Unexpected non-null byte")
68 | }
69 | }
70 | xr, _, _ := readUnsignedHuff(b)
71 | if xr != x {
72 | t.Errorf("Number did not match:", x, xr)
73 | }
74 | return ln
75 | }
76 | //First test around the boundaries
77 | var order uint64
78 | for order = 0; order < 64; order++ {
79 | for offset := -4; offset < 4; offset++ {
80 | x := uint64((1 << order) + offset)
81 | tst(x)
82 | }
83 | }
84 |
85 | //Now test that the huff boundaries have the write number of chars
86 | bcheck := []struct {
87 | n uint64
88 | exp int
89 | }{
90 | {(1 << 7) - 1, 1},
91 | {(1 << 7), 2},
92 | {(1 << 14) - 1, 2},
93 | {(1 << 14), 3},
94 | {(1 << 20) - 1, 3},
95 | {(1 << 20), 4},
96 | {(1 << 28) - 1, 4},
97 | {(1 << 28), 5},
98 | {(1 << 36) - 1, 5},
99 | {(1 << 36), 6},
100 | {(1 << 42) - 1, 6},
101 | {(1 << 42), 7},
102 | {(1 << 50) - 1, 7},
103 | {(1 << 50), 8},
104 | {(1 << 58) - 1, 8},
105 | {(1 << 58), 9},
106 | {0xFFFFFFFFFFFFFFFF, 9},
107 | }
108 | for _, ob := range bcheck {
109 | l := tst(ob.n)
110 | if l != ob.exp {
111 | t.Errorf("Did not get expected number of bytes out test=", ob, l)
112 | }
113 | }
114 |
115 | //Check the big number
116 | tst(0xFFFFFFFFFFFFFFFF)
117 |
118 | //Check the small number
119 | tst(0)
120 |
121 | //Check random numbers
122 | for i := 0; i < 100000; i++ {
123 | x := uint64(rand.Int63())
124 | tst(x)
125 | }
126 | }
127 |
128 | func Test_Pack2(t *testing.T) {
129 | //Unsigned numbers are probably covered ok, lets try a few signed numbers
130 | //Check random numbers
131 | tst := func(x int64) int {
132 | b := make([]byte, 9)
133 | ln := writeSignedHuff(b, x)
134 | for i := ln; i < 9; i++ {
135 | if b[i] != 0 {
136 | t.Errorf("Unexpected non-null byte")
137 | }
138 | }
139 | xr, _, _ := readSignedHuff(b)
140 | if xr != x {
141 | t.Errorf("Number did not match:", x, xr)
142 | }
143 | return ln
144 | }
145 | for i := 0; i < 10000000; i++ {
146 | x := rand.Int63()
147 | tst(x)
148 | }
149 | tst(-1)
150 | tst(-0x7FFFFFFFFFFFFFFF)
151 | tst(0x7FFFFFFFFFFFFFFF)
152 | }
153 |
--------------------------------------------------------------------------------
/qci/utils.ipy:
--------------------------------------------------------------------------------
1 | import time
2 |
3 | def getid():
4 | return int(time.time() - 1423015475)
5 |
6 | def build_loadgen(totalrecords, numstreams, pointspermessage):
7 | !go get github.com/SoftwareDefinedBuildings/quasarloadgenerator
8 | !git clone https://github.com/SoftwareDefinedBuildings/quasarloadgenerator
9 | !cd quasarloadgenerator && git checkout delete-data
10 | !cd quasarloadgenerator && go get -d ...
11 | !cd quasarloadgenerator && go build -o ../loadgen .
12 |
13 | def mkconf(cephpool, collection, filepath="/srv/quasar/"):
14 | if cephpool is not None:
15 | conf="""
16 | [storage]
17 | provider=ceph
18 | cephconf=/etc/ceph/ceph.conf
19 | cephpool={cephpool}
20 | """.format(cephpool=cephpool)
21 | else:
22 | conf="""
23 | [storage]
24 | provider=file
25 | filepath={filepath}
26 | """.format(filepath=filepath)
27 | conf = conf + """
28 | [http]
29 | enabled=true
30 | port=9000
31 | address=0.0.0.0
32 |
33 | [capnp]
34 | enabled=true
35 | port=4410
36 | address=0.0.0.0
37 |
38 | [mongo]
39 | server=localhost
40 | collection={collection}
41 |
42 | [debug]
43 | heapprofile=true
44 | cpuprofile=true
45 |
46 | [cache]
47 | # Configure the RADOS and block caches. If you have a choice, rather
48 | # spend memory on the block cache.
49 |
50 | # This is measured in blocks, which are at most ~16K
51 | blockcache=62500 #1 GB
52 |
53 | # Choose a RADOS cache roughly equal to (num concurrent reads) * (object size)
54 | # the transaction size is at most 16 MB, but is usually around 1.6MB. The
55 | # objects can vary in size, so the cache can be capped either in quantity or
56 | # in total size (or both)
57 | radoscachecount=2048 #in objects
58 | radoscachesize=256 #in MB
59 |
60 | [coalescence]
61 | earlytrip=16384 #readings
62 | interval=5000 #ms
63 | """.format(collection=collection)
64 | with open("quasar.conf","w") as f:
65 | f.write(conf)
66 |
67 | def wait_for_stable_ceph():
68 | x = !ceph -s
69 | while any(("creating" in y) or ("peering" in y) or ("unclean" in y) for y in x):
70 | print "Waiting for creation:"
71 | print x
72 | time.sleep(5)
73 | x = !ceph -s
74 |
75 | def mkceph_local(cephpool):
76 | !ceph osd pool create $cephpool 4096 4096 replicated local 2
77 | time.sleep(5)
78 | wait_for_stable_ceph()
79 |
80 | def mkceph_remote(cephpool):
81 | !ceph osd pool create $cephpool 4096 4096 replicated remote 2
82 | time.sleep(5)
83 | wait_for_stable_ceph()
84 |
85 | def mkceph_tier(cephpool):
86 | cache = cephpool+"-cache"
87 | !ceph osd pool create $cephpool 4096 4096 replicated remote 2
88 | time.sleep(5)
89 | !ceph osd pool create $cache 4096 4096 replicated local 2
90 | time.sleep(5)
91 | wait_for_stable_ceph()
92 | !ceph osd tier add $cephpool $cache
93 | !ceph osd tier cache-mode $cache writeback
94 | !ceph osd tier set-overlay $cephpool $cache
95 | !ceph osd pool set $cache hit_set_type bloom
96 | !ceph osd pool set $cache hit_set_period 7200
97 | !ceph osd pool set $cache cache_min_flush_age 120
98 | wait_for_stable_ceph()
99 |
100 | def mkceph_primary(cephpool):
101 | !ceph osd pool create $cephpool 4096 4096 replicated primary 2
102 | time.sleep(5)
103 | wait_for_stable_ceph()
104 |
105 | def delceph_pool(cephpool):
106 | !ceph osd pool delete $cephpool $cephpool --yes-i-really-really-mean-it
107 |
108 | def delceph_tier(cephpool):
109 | cache = cephpool+"-cache"
110 | !ceph osd tier cache-mode $cache forward
111 | !rados -p $cache cache-flush-evict-all > log.evict
112 | !ceph osd tier remove-overlay $cephpool
113 | !ceph osd tier remove $cephpool $cache
114 | delceph_pool(cache)
115 | delceph_pool(cephpool)
116 |
117 | #get QDF pulled
118 | !git clone https://github.com/SoftwareDefinedBuildings/QDF.git
119 | !mv QDF/qdf .
120 |
--------------------------------------------------------------------------------
/btrdbd/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "flag"
5 | "fmt"
6 | "os"
7 | "os/signal"
8 | "runtime"
9 | "runtime/pprof"
10 | "strconv"
11 | "time"
12 |
13 | "github.com/SoftwareDefinedBuildings/btrdb"
14 | "github.com/SoftwareDefinedBuildings/btrdb/cpinterface"
15 | "github.com/SoftwareDefinedBuildings/btrdb/httpinterface"
16 | "github.com/SoftwareDefinedBuildings/btrdb/internal/bstore"
17 | "github.com/op/go-logging"
18 | )
19 |
20 | var log *logging.Logger
21 |
22 | func init() {
23 | logging.SetFormatter(logging.MustStringFormatter("%{color}%{shortfile} ▶%{color:reset} %{message}"))
24 | log = logging.MustGetLogger("log")
25 |
26 | }
27 |
28 | var createDB = flag.Bool("makedb", false, "create a new database")
29 |
30 | func main() {
31 | loadConfig()
32 | flag.Parse()
33 |
34 | go func() {
35 | for {
36 | time.Sleep(10 * time.Second)
37 | fmt.Println("Num goroutines: ", runtime.NumGoroutine())
38 | }
39 | }()
40 | if Configuration.Debug.Cpuprofile {
41 | f, err := os.Create("profile.cpu")
42 | if err != nil {
43 | log.Panicf("Error creating CPU profile: %v", err)
44 | }
45 | f2, err := os.Create("profile.block")
46 | if err != nil {
47 | log.Panicf("Error creating Block profile: %v", err)
48 | }
49 | pprof.StartCPUProfile(f)
50 | runtime.SetBlockProfileRate(1)
51 | defer runtime.SetBlockProfileRate(0)
52 | defer pprof.Lookup("block").WriteTo(f2, 1)
53 | defer pprof.StopCPUProfile()
54 | }
55 |
56 | if *createDB {
57 | fmt.Printf("Creating a new database\n")
58 | bstore.CreateDatabase(Params)
59 | fmt.Printf("Done\n")
60 | os.Exit(0)
61 | }
62 | nCPU := runtime.NumCPU()
63 | runtime.GOMAXPROCS(nCPU)
64 | cfg := btrdb.QuasarConfig{
65 | DatablockCacheSize: uint64(Configuration.Cache.BlockCache),
66 | TransactionCoalesceEnable: true,
67 | TransactionCoalesceInterval: uint64(*Configuration.Coalescence.Interval),
68 | TransactionCoalesceEarlyTrip: uint64(*Configuration.Coalescence.Earlytrip),
69 | Params: Params,
70 | }
71 | q, err := btrdb.NewQuasar(&cfg)
72 | if err != nil {
73 | log.Panicf("error: ", err)
74 | }
75 |
76 | if Configuration.Http.Enabled {
77 | go httpinterface.QuasarServeHTTP(q, *Configuration.Http.Address+":"+strconv.FormatInt(int64(*Configuration.Http.Port), 10))
78 | }
79 | if Configuration.Capnp.Enabled {
80 | go cpinterface.ServeCPNP(q, "tcp", *Configuration.Capnp.Address+":"+strconv.FormatInt(int64(*Configuration.Capnp.Port), 10))
81 | }
82 |
83 | if Configuration.Debug.Heapprofile {
84 | go func() {
85 | idx := 0
86 | for {
87 | f, err := os.Create(fmt.Sprintf("profile.heap.%05d", idx))
88 | if err != nil {
89 | log.Panicf("Could not create memory profile %v", err)
90 | }
91 | idx = idx + 1
92 | pprof.WriteHeapProfile(f)
93 | f.Close()
94 | time.Sleep(30 * time.Second)
95 | }
96 | }()
97 | }
98 |
99 | sigchan := make(chan os.Signal, 1)
100 | signal.Notify(sigchan, os.Interrupt)
101 |
102 | for {
103 | time.Sleep(5 * time.Second)
104 | log.Info("Still alive")
105 |
106 | select {
107 | case _ = <-sigchan:
108 | log.Warning("Received Ctrl-C, waiting for graceful shutdown")
109 | time.Sleep(4 * time.Second) //Allow http some time
110 | log.Warning("Checking for pending inserts")
111 | for {
112 | if q.IsPending() {
113 | log.Warning("Pending inserts... waiting... ")
114 | time.Sleep(2 * time.Second)
115 | } else {
116 | log.Warning("No pending inserts")
117 | break
118 | }
119 | }
120 | if Configuration.Debug.Heapprofile {
121 | log.Warning("writing heap profile")
122 | f, err := os.Create("profile.heap.FIN")
123 | if err != nil {
124 | log.Panicf("Could not create memory profile %v", err)
125 | }
126 | pprof.WriteHeapProfile(f)
127 | f.Close()
128 |
129 | }
130 | return //end the program
131 | default:
132 |
133 | }
134 | }
135 | }
136 |
--------------------------------------------------------------------------------
/internal/cephprovider/cephcache.go:
--------------------------------------------------------------------------------
1 | package cephprovider
2 |
3 | import (
4 | "sync"
5 | "time"
6 | //"runtime"
7 | )
8 |
9 | //We are caching 1MB blocks for read, so the address should have the bottom 20 bits clear
10 | const R_ADDRMASK = ^((uint64(1) << 20) - 1)
11 | const R_OFFSETMASK = (uint64(1) << 20) - 1
12 |
13 | type CephCache struct {
14 | cachemap map[uint64]*CacheItem
15 | cachemiss uint64
16 | cachehit uint64
17 | cacheold *CacheItem
18 | cachenew *CacheItem
19 | cachemtx sync.Mutex
20 | cachelen uint64
21 | cachemax uint64
22 | cacheinv uint64
23 | pool *sync.Pool
24 | }
25 | type CacheItem struct {
26 | val []byte
27 | addr uint64
28 | newer *CacheItem
29 | older *CacheItem
30 | }
31 |
32 | func (cc *CephCache) initCache(size uint64) {
33 | cc.cachemax = size
34 | cc.cachemap = make(map[uint64]*CacheItem, size)
35 | cc.pool = &sync.Pool{
36 | New: func() interface{} {
37 | return make([]byte, R_CHUNKSIZE)
38 | },
39 | }
40 |
41 | go func() {
42 | for {
43 | log.Info("Ceph BlockCache: %d invs %d misses, %d hits, %.2f %%",
44 | cc.cacheinv, cc.cachemiss, cc.cachehit, (float64(cc.cachehit*100) / float64(cc.cachemiss+cc.cachehit)))
45 | time.Sleep(5 * time.Second)
46 | }
47 | }()
48 | }
49 |
50 | //This function must be called with the mutex held
51 | func (cc *CephCache) cachePromote(i *CacheItem) {
52 | if cc.cachenew == i {
53 | //Already at front
54 | return
55 | }
56 | if i.newer != nil {
57 | i.newer.older = i.older
58 | }
59 | if i.older != nil {
60 | i.older.newer = i.newer
61 | }
62 | if cc.cacheold == i && i.newer != nil {
63 | //This was the tail of a list longer than 1
64 | cc.cacheold = i.newer
65 | } else if cc.cacheold == nil {
66 | //This was/is the only item in the list
67 | cc.cacheold = i
68 | }
69 |
70 | i.newer = nil
71 | i.older = cc.cachenew
72 | if cc.cachenew != nil {
73 | cc.cachenew.newer = i
74 | }
75 | cc.cachenew = i
76 | }
77 |
78 | func (cc *CephCache) cachePut(addr uint64, item []byte) {
79 | if cc.cachemax == 0 {
80 | return
81 | }
82 | cc.cachemtx.Lock()
83 | i, ok := cc.cachemap[addr]
84 | if ok {
85 | cc.cachePromote(i)
86 | } else {
87 | i = &CacheItem{
88 | val: item,
89 | addr: addr,
90 | }
91 | cc.cachemap[addr] = i
92 | cc.cachePromote(i)
93 | cc.cachelen++
94 | cc.cacheCheckCap()
95 | }
96 | cc.cachemtx.Unlock()
97 | }
98 |
99 | func (cc *CephCache) getBlank() []byte {
100 | rv := cc.pool.Get().([]byte)
101 | rv = rv[0:R_CHUNKSIZE]
102 |
103 | return rv
104 | }
105 |
106 | func (cc *CephCache) cacheGet(addr uint64) []byte {
107 | if cc.cachemax == 0 {
108 | cc.cachemiss++
109 | return nil
110 | }
111 | cc.cachemtx.Lock()
112 | rv, ok := cc.cachemap[addr]
113 | if ok {
114 | cc.cachePromote(rv)
115 | }
116 | cc.cachemtx.Unlock()
117 | if ok {
118 | cc.cachehit++
119 | return rv.val
120 | } else {
121 | cc.cachemiss++
122 | return nil
123 | }
124 | }
125 |
126 | //This is rare and only happens if the block cache is too small
127 | func (cc *CephCache) cacheInvalidate(addr uint64) {
128 | if cc.cachemax == 0 {
129 | return
130 | }
131 | cc.cachemtx.Lock()
132 | i, ok := cc.cachemap[addr]
133 | if ok {
134 | if i.newer != nil {
135 | i.newer.older = i.older
136 | }
137 | if i.older != nil {
138 | i.older.newer = i.newer
139 | }
140 | if cc.cacheold == i {
141 | //This was the tail of a list longer than 1
142 | cc.cacheold = i.newer
143 | }
144 | if cc.cachenew == i {
145 | cc.cachenew = i.older
146 | }
147 | cc.cachelen--
148 | cc.cacheinv++
149 | delete(cc.cachemap, addr)
150 | }
151 | cc.cachemtx.Unlock()
152 | }
153 |
154 | //This must be called with the mutex held
155 | func (cc *CephCache) cacheCheckCap() {
156 | for cc.cachelen > cc.cachemax {
157 | i := cc.cacheold
158 |
159 | delete(cc.cachemap, i.addr)
160 | if i.newer != nil {
161 | i.newer.older = nil
162 | }
163 | cc.cacheold = i.newer
164 | cc.cachelen--
165 | }
166 | }
167 |
--------------------------------------------------------------------------------
/qci/runtests.ipy:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env ipython
2 | from multiprocessing import Process, Pipe
3 | import os
4 | import time
5 | import sys
6 | import subprocess
7 | import uuid
8 | import pymongo
9 | %run qci/utils.ipy
10 | runid = getid()
11 | print "RUN ID IS", runid
12 | build_loadgen(10000000,20,1000)
13 | cephpool = "q"+str(runid)
14 | collection = "q"+str(runid)
15 | filestore = "q"+str(runid)
16 | if "CEPHTYPE" not in os.environ or os.environ["CEPHTYPE"] == "local":
17 | mkceph_local(cephpool)
18 | elif os.environ["CEPHTYPE"] == "remote":
19 | mkceph_remote(cephpool)
20 | elif os.environ["CEPHTYPE"] == "primary":
21 | mkceph_primary(cephpool)
22 | elif os.environ["CEPHTYPE"] == "tier":
23 | mkceph_tier(cephpool)
24 | elif os.environ["CEPHTYPE"] == "filestore":
25 | cephpool = None
26 |
27 | mkconf(cephpool, collection, filestore)
28 |
29 | #Create database
30 | rc = subprocess.call(["./exe","-makedb"])
31 | print "rc0", rc
32 | if rc != 0:
33 | sys.exit(rc)
34 |
35 | #start quasar
36 | def start_q_():
37 | stdout=open("log.q.stdout.%d" % (int(time.time())),"w")
38 | rc = subprocess.call(["./exe"],stdout=stdout, stderr=subprocess.STDOUT)
39 | print "rc1", rc
40 | if rc != 0:
41 | sys.exit(rc)
42 |
43 |
44 | def start_quasar():
45 | global p
46 | p = Process(target=start_q_)
47 | p.start()
48 |
49 | def term_quasar():
50 | #send sigint
51 | #os.kill(p.pid, 2)
52 | !pkill --signal 2 exe
53 |
54 | time.sleep(120)
55 | #os.kill(p.pid, 9)
56 | !pkill --signal 9 exe
57 |
58 | def kill_quasar():
59 | !pkill --signal 9 exe
60 |
61 | def proc_profiles(pfx):
62 | !go tool pprof -text -cum exe profile.cpu > log.profile.cpu.cum
63 | !go tool pprof -text exe profile.cpu > log.profile.cpu
64 | hps = !ls profile.heap.*
65 | for hp in hps:
66 | num = hp.split(".")[-1]
67 | !go tool pprof -text exe $hp > temp
68 | tot = !cat temp | head -n 1 | cut -d ' ' -f 3
69 | tot = tot[0]
70 | fname = "log.heap."+pfx+"."+num+"___"+tot
71 | !mv temp $fname
72 | !rm -f profile.heap.*
73 |
74 | start_quasar()
75 | #wait a bit
76 | time.sleep(10)
77 |
78 | if not p.is_alive():
79 | print "quasar died:", p.exitcode
80 | sys.exit(1)
81 |
82 | !rm FAILURE
83 | !touch FAILURE
84 | if "TEST_TYPE" not in os.environ or os.environ["TEST_TYPE"] == "loadgen":
85 | %run qci/test_loadgen.ipy
86 | elif os.environ["TEST_TYPE"] == "readwrite1":
87 | print "running reqdwrite1"
88 | %run qci/test_readwrite1.ipy
89 | print "run complete"
90 | elif os.environ["TEST_TYPE"] == "readwrite2":
91 | print "running reqdwrite2"
92 | %run qci/test_readwrite2.ipy
93 | print "run complete"
94 | elif os.environ["TEST_TYPE"] == "readstat1":
95 | print "running readstat1"
96 | %run qci/test_readstat1.ipy
97 | print "run complete"
98 | elif os.environ["TEST_TYPE"] == "loadgen2":
99 | print "running loadgen2"
100 | %run qci/test_loadgen2.ipy
101 | print "run complete"
102 | elif os.environ["TEST_TYPE"] == "loadgen3":
103 | print "running loadgen3"
104 | %run qci/test_loadgen3.ipy
105 | print "run complete"
106 | elif os.environ["TEST_TYPE"] == "endpoint":
107 | print "running endpoint"
108 | %run qci/test_endpoint.ipy
109 | print "run complete"
110 | elif os.environ["TEST_TYPE"] == "changedrange":
111 | print "running changedrange"
112 | %run qci/test_changedrange.ipy
113 | print "run complete"
114 |
115 | failed = !cat FAILURE; echo $?
116 | failed = (failed[-1] == "0")
117 |
118 | if not p.is_alive():
119 | print "quasar died:", p.exitcode
120 | sys.exit(1)
121 |
122 | if not failed:
123 | print "WRITING SUCCESS FILE"
124 | with open("success","w") as f:
125 | f.write("OK\n")
126 |
127 | term_quasar()
128 |
129 | proc_profiles("end")
130 |
131 | if os.environ["CEPHTYPE"] == "tier":
132 | delceph_tier(cephpool)
133 | elif os.environ["CEPHTYPE"] == "filestore":
134 | !rm -r {filestore}
135 | cl = pymongo.MongoClient()
136 | cl.drop_database(collection)
137 | cl.disconnect()
138 | else:
139 | delceph_pool(cephpool)
140 |
141 | print "done"
142 |
143 | if failed:
144 | sys.exit(1)
145 |
146 |
--------------------------------------------------------------------------------
/qtree/operators.go:
--------------------------------------------------------------------------------
1 | package qtree
2 |
3 | import (
4 | "math"
5 |
6 | "github.com/SoftwareDefinedBuildings/btrdb/internal/bstore"
7 | )
8 |
9 | func (n *QTreeNode) OpCountMean() (uint64, float64) {
10 | total := 0.0
11 | cnt := uint64(0)
12 | if n.isLeaf {
13 | for i := 0; i < int(n.vector_block.Len); i++ {
14 | total += n.vector_block.Value[i]
15 | }
16 | return uint64(n.vector_block.Len), total / float64(n.vector_block.Len)
17 | } else {
18 | for i := 0; i < bstore.KFACTOR; i++ {
19 | if n.core_block.Count[i] == 0 {
20 | continue
21 | }
22 | cnt += n.core_block.Count[i]
23 | total += n.core_block.Mean[i] * float64(n.core_block.Count[i])
24 | }
25 | return cnt, total / float64(cnt)
26 | }
27 | }
28 |
29 | func (n *QTreeNode) OpMin() float64 {
30 | if n.isLeaf {
31 | min := n.vector_block.Value[0]
32 | for i := 0; i < int(n.vector_block.Len); i++ {
33 | if n.vector_block.Value[i] < min {
34 | min = n.vector_block.Value[i]
35 | }
36 | }
37 | return min
38 | } else {
39 | min := float64(0)
40 | minset := false
41 | for i := 0; i < len(n.core_block.Min); i++ {
42 | if n.core_block.Count[i] == 0 {
43 | continue
44 | }
45 | if !minset || n.core_block.Min[i] < min {
46 | min = n.core_block.Min[i]
47 | minset = true
48 | }
49 | }
50 | return min
51 | }
52 | }
53 |
54 | func (n *QTreeNode) OpMax() float64 {
55 | if n.isLeaf {
56 | max := n.vector_block.Value[0]
57 | for i := 0; i < int(n.vector_block.Len); i++ {
58 | if n.vector_block.Value[i] > max {
59 | max = n.vector_block.Value[i]
60 | }
61 | }
62 | return max
63 | } else {
64 | max := float64(0)
65 | maxset := false
66 | for i := 0; i < len(n.core_block.Max); i++ {
67 | if n.core_block.Count[i] == 0 {
68 | continue
69 | }
70 | if !maxset || n.core_block.Max[i] > max {
71 | max = n.core_block.Max[i]
72 | maxset = true
73 | }
74 | }
75 | return max
76 | }
77 | }
78 |
79 | /*
80 |
81 | ok so here is the problem. If we call opreduce on a core node, then we can only deliver
82 | pointwidths GREATER than our pointwidth and less than pointwidth + 6 right?
83 | but as a leaf we can potentially deliver pointwidths down to 0...
84 | */
85 | func (n *QTreeNode) OpReduce(pointwidth uint8, index uint64) (uint64, float64, float64, float64) {
86 | if !n.isLeaf && pointwidth < n.PointWidth() {
87 | log.Panic("Bad pointwidth for core. See code comment")
88 | }
89 | if pointwidth > n.PointWidth()+PWFACTOR {
90 | log.Panic("Can't guarantee this PW")
91 | }
92 | maxpw := n.PointWidth() + PWFACTOR
93 | pwdelta := pointwidth - n.PointWidth()
94 | width := int64(1) << pointwidth
95 | maxidx := 1 << (maxpw - pointwidth)
96 | if maxidx <= 0 || index >= uint64(maxidx) {
97 | log.Critical("node is %s", n.TreePath())
98 | log.Panic("bad index", maxidx, index)
99 | }
100 | sum := 0.0
101 | min := math.NaN()
102 | max := math.NaN()
103 | minset := false
104 | maxset := false
105 | count := uint64(0)
106 | if n.isLeaf {
107 | st := n.StartTime() + int64(index)*width
108 | et := st + width
109 | if n.vector_block.Len != 0 {
110 | for i := 0; i < int(n.vector_block.Len); i++ {
111 | if n.vector_block.Time[i] < st {
112 | continue
113 | }
114 | if n.vector_block.Time[i] >= et {
115 | break
116 | }
117 | v := n.vector_block.Value[i]
118 | sum += v
119 | if !minset || v < min {
120 | minset = true
121 | min = v
122 | }
123 | if !maxset || v > max {
124 | maxset = true
125 | max = v
126 | }
127 | count++
128 | }
129 | }
130 | return count, min, sum / float64(count), max
131 | } else {
132 | s := index << pwdelta
133 | e := (index + 1) << pwdelta
134 | for i := s; i < e; i++ {
135 | if n.core_block.Count[i] == 0 {
136 | continue
137 | }
138 | count += n.core_block.Count[i]
139 | sum += n.core_block.Mean[i] * float64(n.core_block.Count[i])
140 | if !minset || n.core_block.Min[i] < min {
141 | minset = true
142 | min = n.core_block.Min[i]
143 | }
144 | if !maxset || n.core_block.Max[i] > max {
145 | maxset = true
146 | max = n.core_block.Max[i]
147 | }
148 | }
149 | mean := sum / float64(count)
150 | return count, min, mean, max
151 | }
152 | }
153 |
--------------------------------------------------------------------------------
/internal/bprovider/bprovider_test.go:
--------------------------------------------------------------------------------
1 | package bprovider_test
2 |
3 | import (
4 | "math/rand"
5 | "sync"
6 | "testing"
7 | "time"
8 |
9 | "github.com/SoftwareDefinedBuildings/btrdb/internal/bprovider"
10 | "github.com/SoftwareDefinedBuildings/btrdb/internal/cephprovider"
11 | "github.com/SoftwareDefinedBuildings/btrdb/internal/fileprovider"
12 | "github.com/op/go-logging"
13 | )
14 |
15 | var log *logging.Logger
16 |
17 | func init() {
18 | log = logging.MustGetLogger("log")
19 | }
20 |
21 | func makeFileProvider() *fileprovider.FileStorageProvider {
22 | params := map[string]string{
23 | "dbpath": "/srv/quasartestdb/",
24 | }
25 | fp := new(fileprovider.FileStorageProvider)
26 | err := fp.CreateDatabase(params)
27 | if err != nil {
28 | log.Panicf("Error on create %v", err)
29 | }
30 | fp.Initialize(params)
31 | return fp
32 | }
33 |
34 | func makeCephProvider() *cephprovider.CephStorageProvider {
35 | params := map[string]string{}
36 | cp := new(cephprovider.CephStorageProvider)
37 | /*err := cp.CreateDatabase(params)
38 | if err != nil {
39 | log.Panicf("Error on create %v",err)
40 | }*/
41 | cp.Initialize(params)
42 | return cp
43 | }
44 |
45 | func TestCephInitDB(t *testing.T) {
46 | params := map[string]string{}
47 | cp := new(cephprovider.CephStorageProvider)
48 | err := cp.CreateDatabase(params)
49 | if err != nil {
50 | log.Panicf("Error on create %v", err)
51 | }
52 | }
53 |
54 | func x_RW1(t *testing.T, sp bprovider.StorageProvider) {
55 | seg := sp.LockSegment()
56 | addr := seg.BaseAddress()
57 | data := make([]byte, 1024)
58 | for i := 0; i < 1024; i++ {
59 | data[i] = byte(i)
60 | }
61 | _, err := seg.Write(addr, data)
62 | if err != nil {
63 | t.Fatalf("Got error on write: %v", err)
64 | }
65 | seg.Unlock()
66 |
67 | //Read back
68 | rdata := make([]byte, 30000)
69 | rslice := sp.Read(addr, rdata)
70 | if len(rslice) != len(data) {
71 | t.Fatalf("Got wrong slice len back")
72 | }
73 | for i := 0; i < 1024; i++ {
74 | if rslice[i] != data[i] {
75 | t.Fatalf("Index %v differed got %v, expected %v", i, rslice[i], data[i])
76 | }
77 | }
78 | }
79 |
80 | func x_RWFuzz(t *testing.T, sp bprovider.StorageProvider) {
81 | wg := sync.WaitGroup{}
82 | const par = 2096
83 | const seglimlim = 50
84 | const arrszlim = 20482
85 | const maxseeds = 1
86 | for si := 1; si <= maxseeds; si++ {
87 | log.Warning("Trying seed %v", si)
88 | rand.Seed(int64(si))
89 | wg.Add(par)
90 | for li := 0; li < par; li++ {
91 | lic := li
92 | go func() {
93 |
94 | seg := sp.LockSegment()
95 | addr := seg.BaseAddress()
96 | log.Warning("Segment %v base addr 0x%016x", lic, addr)
97 | seglimit := 1 //rand.Int() % seglimlim
98 | stored_data := make([][]byte, seglimit)
99 | stored_addrs := make([]uint64, seglimit)
100 | for k := 0; k < seglimit; k++ {
101 | arrsize := rand.Int() % arrszlim
102 | data := make([]byte, arrsize)
103 | for i := 0; i < arrsize; i++ {
104 | data[i] = byte(rand.Int())
105 | }
106 | stored_data[k] = data
107 | naddr, err := seg.Write(addr, data)
108 | if err != nil {
109 | log.Error("ea %v", err)
110 | t.Errorf("Got error on write: %v", err)
111 | return
112 | }
113 | stored_addrs[k] = addr
114 | addr = naddr
115 | }
116 | seg.Unlock()
117 | sleeptime := time.Duration(rand.Int() % 2000)
118 | time.Sleep(sleeptime * time.Millisecond)
119 | //Read back
120 | for k := 0; k < seglimit; k++ {
121 | rdata := make([]byte, 33000)
122 | rslice := sp.Read(stored_addrs[k], rdata)
123 | if len(rslice) != len(stored_data[k]) {
124 | log.Error("eb")
125 | t.Errorf("Got wrong slice len back")
126 | return
127 | }
128 | for j := 0; j < len(stored_data[k]); j++ {
129 | if rslice[j] != stored_data[k][j] {
130 | log.Error("ec")
131 | t.Errorf("Index %v differed got %v, expected %v", j, rslice[j], stored_data[k][j])
132 | }
133 | }
134 | }
135 | wg.Done()
136 | }()
137 | }
138 | wg.Wait()
139 | }
140 | }
141 |
142 | func Test_FP_RW1(t *testing.T) {
143 | fp := makeFileProvider()
144 | x_RW1(t, fp)
145 | }
146 |
147 | func Test_FP_FUZZ(t *testing.T) {
148 | fp := makeFileProvider()
149 | x_RWFuzz(t, fp)
150 | }
151 |
152 | func Test_CP_RW1(t *testing.T) {
153 | cp := makeCephProvider()
154 | x_RW1(t, cp)
155 | }
156 |
157 | func Test_CP_FUZZ(t *testing.T) {
158 | cp := makeCephProvider()
159 | x_RWFuzz(t, cp)
160 | }
161 |
--------------------------------------------------------------------------------
/btrdbd/config.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "fmt"
5 | "os"
6 | "strconv"
7 |
8 | gcfg "gopkg.in/gcfg.v1"
9 | )
10 |
11 | type Config struct {
12 | Http struct {
13 | Port *int
14 | Address *string
15 | Enabled bool
16 | }
17 | Capnp struct {
18 | Port *int
19 | Address *string
20 | Enabled bool
21 | }
22 | Mongo struct {
23 | Server *string
24 | Collection *string
25 | }
26 | Storage struct {
27 | Provider string
28 | Filepath *string
29 | Cephconf *string
30 | Cephpool *string
31 | }
32 | Cache struct {
33 | BlockCache int
34 | RadosWriteCache *int
35 | RadosReadCache *int
36 | }
37 | Debug struct {
38 | Cpuprofile bool
39 | Heapprofile bool
40 | }
41 | Coalescence struct {
42 | Earlytrip *int
43 | Interval *int
44 | }
45 | }
46 |
47 | var Configuration Config
48 | var Params map[string]string
49 |
50 | func loadConfig() {
51 | found := false
52 | err := gcfg.ReadFileInto(&Configuration, "./btrdb.conf")
53 | if err != nil {
54 | fmt.Printf("Could not load configuration file './btrdb.conf':\n%v\n", err)
55 | } else {
56 | found = true
57 | }
58 |
59 | if !found {
60 | err := gcfg.ReadFileInto(&Configuration, "/etc/btrdb/btrdb.conf")
61 | if err != nil {
62 | fmt.Printf("Could not load configuration file '/etc/btrdb/btrdb.conf':\n%v\n", err)
63 | } else {
64 | found = true
65 | }
66 | }
67 |
68 | if !found {
69 | fmt.Printf("Aborting: no configuration found!\n")
70 | os.Exit(1)
71 | }
72 |
73 | if Configuration.Mongo.Server == nil || *Configuration.Mongo.Server == "" {
74 | fmt.Printf("Aborting: configuration missing MongoDB server address\n")
75 | os.Exit(1)
76 | }
77 | if Configuration.Mongo.Collection == nil || *Configuration.Mongo.Collection == "" {
78 | fmt.Printf("Aborting: configuration missing MongoDB collection\n")
79 | os.Exit(1)
80 | }
81 |
82 | if Configuration.Storage.Provider == "file" {
83 | if Configuration.Storage.Filepath == nil {
84 | fmt.Printf("Aborting: using Files for storage, but no filepath specified\n")
85 | os.Exit(1)
86 | }
87 | } else if Configuration.Storage.Provider == "ceph" {
88 | if Configuration.Storage.Cephconf == nil {
89 | fmt.Printf("Aborting: using Ceph for storage, but no cephconf specified\n")
90 | os.Exit(1)
91 | }
92 | if Configuration.Storage.Cephpool == nil {
93 | fmt.Printf("Aborting: using Ceph for storage, but no cephpool specified\n")
94 | os.Exit(1)
95 | }
96 | } else {
97 | fmt.Printf("Aborting: unknown storage provider specified\n")
98 | os.Exit(1)
99 | }
100 |
101 | if Configuration.Cache.RadosWriteCache == nil {
102 | z := 0
103 | Configuration.Cache.RadosWriteCache = &z
104 | }
105 | if Configuration.Cache.RadosReadCache == nil {
106 | z := 0
107 | Configuration.Cache.RadosReadCache = &z
108 | }
109 |
110 | if Configuration.Http.Enabled && Configuration.Http.Port == nil {
111 | fmt.Printf("Aborting: http server enabled, but no port specified\n")
112 | os.Exit(1)
113 | }
114 |
115 | if Configuration.Http.Enabled && Configuration.Http.Address == nil {
116 | fmt.Printf("Aborting: http server enabled, but no address specified\n")
117 | os.Exit(1)
118 | }
119 |
120 | if Configuration.Capnp.Enabled && Configuration.Capnp.Port == nil {
121 | fmt.Printf("Aborting: capn proto server enabled, but no port specified\n")
122 | os.Exit(1)
123 | }
124 |
125 | if Configuration.Capnp.Enabled && Configuration.Capnp.Address == nil {
126 | fmt.Printf("Aborting: capn proto server enabled, but no address specified\n")
127 | os.Exit(1)
128 | }
129 |
130 | if Configuration.Coalescence.Earlytrip == nil {
131 | fmt.Printf("Aborting: transaction coalescence early trip object count not set\n")
132 | os.Exit(1)
133 | }
134 |
135 | if Configuration.Coalescence.Interval == nil {
136 | fmt.Printf("Aborting: transaction coalescence commit interval not set\n")
137 | os.Exit(1)
138 | }
139 |
140 | Params = map[string]string{
141 | "mongoserver": *Configuration.Mongo.Server,
142 | "provider": Configuration.Storage.Provider,
143 | "cachesize": strconv.FormatInt(int64(Configuration.Cache.BlockCache), 10),
144 | "collection": *Configuration.Mongo.Collection,
145 | }
146 | if Configuration.Storage.Provider == "ceph" {
147 | Params["cephconf"] = *Configuration.Storage.Cephconf
148 | Params["cephpool"] = *Configuration.Storage.Cephpool
149 | Params["cephrcache"] = strconv.FormatInt(int64(*Configuration.Cache.RadosReadCache), 10)
150 | Params["cephwcache"] = strconv.FormatInt(int64(*Configuration.Cache.RadosWriteCache), 10)
151 | }
152 | if Configuration.Storage.Provider == "file" {
153 | Params["dbpath"] = *Configuration.Storage.Filepath
154 | }
155 |
156 | fmt.Printf("Configuration OK!\n")
157 | }
158 |
--------------------------------------------------------------------------------
/qci/test_readstat1.ipy:
--------------------------------------------------------------------------------
1 |
2 | import qdf
3 | import qdf.quasar
4 | import sys
5 | import random
6 | import uuid
7 | import time
8 | import numpy as np
9 | from twisted.internet import defer, protocol, reactor
10 | print "entered test readwrite1"
11 | EXIT_CODE = None
12 | def setexit(code):
13 | global EXIT_CODE
14 | EXIT_CODE = code
15 | reactor.stop()
16 |
17 | def statify(data, pw, starttime, endtime):
18 | rv = {}
19 | mask = ~((1< t {
107 | t = nt
108 | }
109 | }
110 | return rv
111 | }
112 |
113 | func MakeWTree() (*QTree, uuid.UUID) {
114 | id := uuid.NewRandom()
115 | mBS()
116 | tr, err := NewWriteQTree(_bs, id)
117 | if err != nil {
118 | log.Panic(err)
119 | }
120 | return tr, id
121 | }
122 | func CompareData(lhs []Record, rhs []Record) {
123 | if len(lhs) != len(rhs) {
124 | log.Panic("lhs != rhs len")
125 | }
126 | for i, v := range lhs {
127 | if rhs[i] != v {
128 | log.Panic("data differs")
129 | }
130 | }
131 | }
132 | func TestTreeSWriteLarge(t *testing.T) {
133 | mBS()
134 | testuuid := uuid.NewRandom()
135 | tr, err := NewWriteQTree(_bs, testuuid)
136 | log.Printf("Generated tree %v", testuuid.String())
137 | if err != nil {
138 | t.Error(err)
139 | }
140 | log.Printf("Generating dummy records")
141 | records := GenData(0, 40*DAY, HOUR, 2*MINUTE, func(t int64) float64 {
142 | return float64(t)
143 | })
144 | log.Printf("We generated %v records", len(records))
145 |
146 | tr.InsertValues(records)
147 | tr.Commit()
148 |
149 | tr, err = NewReadQTree(_bs, testuuid, bstore.LatestGeneration)
150 | if err != nil {
151 | log.Panic(err)
152 | }
153 | rrec, err := tr.ReadStandardValuesBlock(0, 40*DAY+2*MINUTE)
154 | if err != nil {
155 | log.Panic(err)
156 | }
157 | log.Printf("We read %v records", len(rrec))
158 | if len(rrec) != len(records) {
159 | t.FailNow()
160 | }
161 | for i := 0; i < len(rrec); i++ {
162 | if records[i].Time != rrec[i].Time ||
163 | records[i].Val != rrec[i].Val {
164 | t.FailNow()
165 | }
166 | //log.Printf("[%5d] w=%v r=%v d=%v", i, records[i].Time, rrec[i].Time,
167 | // int64(records[i].Time- rrec[i].Time))
168 | }
169 |
170 | }
171 |
172 | func BenchmarkMultiSWrite(b *testing.B) {
173 | mBS()
174 | testuuid := uuid.NewRandom()
175 | log.Printf("MultiSWrite is using %v", testuuid.String())
176 | log.Printf("Generating dummy records")
177 | records := GenData(0, 1*DAY, SECOND, 100*MILLISECOND, func(t int64) float64 {
178 | return float64(t)
179 | })
180 | log.Printf("We generated %v records, randomizing a copy", len(records))
181 | rec_copy_orig := make([]Record, len(records))
182 | perm := rand.Perm(len(records))
183 | for i, v := range perm {
184 | rec_copy_orig[v] = records[i]
185 | }
186 | b.ResetTimer()
187 | for iter := 0; iter < b.N; iter++ {
188 | rec_copy := make([]Record, len(rec_copy_orig))
189 | copy(rec_copy, rec_copy_orig)
190 | iperstage := 4000
191 | idx := 0
192 | for {
193 | tr, err := NewWriteQTree(_bs, testuuid)
194 | if err != nil {
195 | b.Error(err)
196 | }
197 | end := idx + iperstage
198 | if end > len(rec_copy) {
199 | end = len(rec_copy)
200 | }
201 | tr.InsertValues(rec_copy[idx:end])
202 | tr.Commit()
203 | idx = end
204 | if idx == len(rec_copy) {
205 | break
206 | }
207 | }
208 | /*
209 | //Read back the records
210 | tr, err := NewReadQTree(_bs, testuuid, bstore.LatestGeneration)
211 | if err != nil {
212 | log.Panic(err)
213 | }
214 | rrec, err := tr.ReadStandardValuesBlock(0, 40*DAY+2*MINUTE)
215 | if err != nil {
216 | log.Panic(err)
217 | }
218 | */
219 | }
220 | }
221 | func TestTreeMultiSWrite(t *testing.T) {
222 | mBS()
223 | testuuid := uuid.NewRandom()
224 | log.Printf("MultiSWrite is going into %v", testuuid.String())
225 | log.Printf("Generating dummy records")
226 | records := GenData(0, 1*HOUR, 1*MINUTE, 2*SECOND, func(t int64) float64 {
227 | return float64(t)
228 | })
229 | log.Printf("We generated %v records, randomizing a copy", len(records))
230 | rec_copy := make([]Record, len(records))
231 | perm := rand.Perm(len(records))
232 | for i, v := range perm {
233 | rec_copy[v] = records[i]
234 | }
235 | iperstage := 30
236 | idx := 0
237 | for {
238 | tr, err := NewWriteQTree(_bs, testuuid)
239 | if err != nil {
240 | t.Error(err)
241 | }
242 | end := idx + iperstage
243 | if end > len(rec_copy) {
244 | end = len(rec_copy)
245 | }
246 | tr.InsertValues(rec_copy[idx:end])
247 | tr.root.PrintCounts(2)
248 | tr.Commit()
249 | idx = end
250 | if idx == len(rec_copy) {
251 | break
252 | }
253 | }
254 |
255 | //Read back the records
256 | tr, err := NewReadQTree(_bs, testuuid, bstore.LatestGeneration)
257 | if err != nil {
258 | log.Panic(err)
259 | }
260 | rrec, err := tr.ReadStandardValuesBlock(0, 40*DAY+2*MINUTE)
261 | if err != nil {
262 | log.Panic(err)
263 | }
264 | //Verify we have the same number (for now)
265 | log.Printf("wrote %v, read %v", len(records), len(rrec))
266 | tr.root.PrintCounts(0)
267 | if len(records) != len(rrec) {
268 | t.FailNow()
269 | }
270 | }
271 |
--------------------------------------------------------------------------------
/cpinterface/interface.capnp:
--------------------------------------------------------------------------------
1 | using Go = import "go.capnp";
2 | $Go.package("cpinterface");
3 | $Go.import("github.com/SoftwareDefinedBuildings/btrdb/cpinterface");
4 |
5 | @0x85360901bcc4bed2;
6 |
7 | ###
8 | # Request type, each request gives back exactly one response
9 | ###
10 | struct Request {
11 | # This will be added to the response, so that requests can be mapped
12 | # to responses as they can come back out of order.
13 | echoTag @0 : UInt64;
14 | union {
15 | void @1 : Void;
16 | queryStandardValues @2 : CmdQueryStandardValues;
17 | queryStatisticalValues @3 : CmdQueryStatisticalValues;
18 | queryWindowValues @9 : CmdQueryWindowValues;
19 | queryVersion @4 : CmdQueryVersion;
20 | queryNearestValue @5 : CmdQueryNearestValue;
21 | queryChangedRanges @6 : CmdQueryChangedRanges;
22 | insertValues @7 : CmdInsertValues;
23 | deleteValues @8 : CmdDeleteValues;
24 | }
25 | }
26 |
27 | # The basic record type. Times are measured in nanoseconds
28 | # since the Epoch. At the time of writing, BTrDB is only
29 | # capable of storing dates from approx 1935 to 2078...
30 | struct Record {
31 | time @0 : Int64;
32 | value @1 : Float64;
33 | }
34 |
35 | # Query pre-aggregated statistical records from the database.
36 | # these are particularly useful for plotting applications
37 | # and locating where data is.
38 | struct StatisticalRecord {
39 | time @0 : Int64;
40 | count @1 : UInt64;
41 | min @2 : Float64;
42 | mean @3 : Float64;
43 | max @4 : Float64;
44 | }
45 |
46 | # Query from startTime (inclusive) to endTime (exclusive) in
47 | # nanoseconds.
48 | # If you want consistent values over a series of
49 | # reads, or you wish to view a stream as it was in the past
50 | # then you can specify a nonzero version. Repeating a query
51 | # with the same version is guaranteed to return the same results
52 | # irrespective of any deletes or adds that take place.
53 | # returns many RecordLists
54 | struct CmdQueryStandardValues {
55 | uuid @0 : Data;
56 | version @1 : UInt64;
57 | startTime @2 : Int64;
58 | endTime @3 : Int64;
59 | }
60 |
61 |
62 | # Query from startTime (inclusive) to endTime (exclusive) in
63 | # nanoseconds. Note that both of those times will be rounded
64 | # down if they have set bits in the bottom pointWidth bits.
65 | # pointWidth is the log of the number of records to aggregate
66 | # per result. A PW of 30 therefore means (1<<30) ns per record
67 | # which is about a second.
68 | # If you want consistent values over a series of
69 | # reads, or you wish to view a stream as it was in the past
70 | # then you can specify a nonzero version
71 | # returns many StatisticalRecordLists
72 | struct CmdQueryStatisticalValues {
73 | uuid @0 : Data;
74 | version @1 : UInt64;
75 | startTime @2 : Int64;
76 | endTime @3 : Int64;
77 | pointWidth @4 : UInt8;
78 | }
79 |
80 | # Query from startTime (inclusive) to endTime (exclusive) in
81 | # nanoseconds. Aggregate windows with an end time less than or equal
82 | # to endTime will be returned. Windows start from exactly startTime and
83 | # increase by Width. Leap seconds etc are your problem. The depth
84 | # (currently unimplemented) represents the minimum PW to descend to
85 | # while computing windows.
86 | # If you want consistent values over a series of
87 | # reads, or you wish to view a stream as it was in the past
88 | # then you can specify a nonzero version
89 | # returns many StatisticalRecordLists
90 | struct CmdQueryWindowValues {
91 | uuid @0 : Data;
92 | version @1 : UInt64;
93 | startTime @2 : Int64;
94 | endTime @3 : Int64;
95 | width @4 : UInt64;
96 | depth @5 : UInt8;
97 | }
98 |
99 | # For every UUID given, return the current version and last
100 | # modified time of the stream.
101 | # returns VersionList
102 | struct CmdQueryVersion {
103 | uuids @0 : List(Data);
104 | }
105 |
106 | # Query the next (or previous if backward=true) value in the
107 | # stream, starting from time.
108 | # returns a RecordList
109 | struct CmdQueryNearestValue {
110 | uuid @0 : Data;
111 | version @1 : UInt64;
112 | time @2 : Int64;
113 | backward @3 : Bool;
114 | }
115 |
116 | # For the given UUID, return all the time ranges that have
117 | # changed between the given generations. toGeneration is
118 | # not included. Note that depending on how full the stream is,
119 | # the returned result may be rounded off. A sparsely populated
120 | # stream returns less accurate results than a densely populated
121 | # one.
122 | # returns many RangeLists
123 | struct CmdQueryChangedRanges {
124 | uuid @0 : Data;
125 | fromGeneration @1 : UInt64;
126 | toGeneration @2 : UInt64;
127 | unused @3 : UInt64;
128 | resolution @4 : UInt8;
129 | }
130 |
131 | # Insert values. If sync is true, the database will flush the
132 | # results to disk before returning success. Please PLEASE don't
133 | # use that without seriously considering if you need it, as it
134 | # disables transaction coalescence and reduces performance
135 | # by several orders of magnitude.
136 | # returns Void
137 | struct CmdInsertValues {
138 | uuid @0 : Data;
139 | values @1 : List(Record);
140 | sync @2 : Bool;
141 | }
142 |
143 | # Delete the values between the given times.
144 | # returns Void
145 | struct CmdDeleteValues {
146 | uuid @0 : Data;
147 | startTime @1 : Int64;
148 | endTime @2 : Int64;
149 | }
150 |
151 | ###
152 | # Response type
153 | ###
154 | struct Response {
155 | echoTag @0 : UInt64;
156 | statusCode @1 : StatusCode;
157 | final @2 : Bool;
158 | union {
159 | void @3 : Void;
160 | records @4 : Records;
161 | statisticalRecords @5 : StatisticalRecords;
162 | versionList @6 : Versions;
163 | changedRngList @7 : Ranges;
164 | }
165 | }
166 |
167 | # Contains all the error codes that are emitted by Quasar
168 | enum StatusCode {
169 | ok @0;
170 |
171 | # Returned (ATM) for almost everything
172 | internalError @1;
173 |
174 | # Returned for a bad UUID or a bad version
175 | noSuchStreamOrVersion @2;
176 |
177 | # Returned for a bad parameter, like time range
178 | invalidParameter @3;
179 |
180 | # Returned from nearest value when it doesn't exist
181 | noSuchPoint @4;
182 | }
183 |
184 | # Contains a list of records, and the version of the stream
185 | # used to satisfy the request.
186 | struct Records {
187 | version @0 : UInt64;
188 | values @1 : List(Record);
189 | }
190 |
191 | # Contains a list of statistical records and the version of
192 | # the stream used to satisfy the request.
193 | struct StatisticalRecords {
194 | version @0 : UInt64;
195 | values @1 : List(StatisticalRecord);
196 | }
197 |
198 | # Contains the latest version numbers for the requested
199 | # streams
200 | struct Versions {
201 | uuids @0 : List(Data);
202 | versions @1 : List(UInt64);
203 | }
204 |
205 | # Represents a range of time that has been changed
206 | struct ChangedRange {
207 | startTime @0 : Int64;
208 | endTime @1 : Int64;
209 | }
210 |
211 | # Response to the QueryChangedRanges
212 | struct Ranges {
213 | version @0 : UInt64;
214 | values @1 : List(ChangedRange);
215 | }
216 |
--------------------------------------------------------------------------------
/internal/cephprovider/cephprovider.c:
--------------------------------------------------------------------------------
1 |
2 |
3 | #include
4 | #include
5 | #include
6 | #include
7 | #include "cephprovider.h"
8 | #include
9 | #include
10 |
11 | #define ADDR_LOCK_SIZE 0x1000000000
12 | #define COMP_CAP_STEP 64
13 | #define OID_SIZE 43 //32 for uuid, 10 for id, 1 for nul
14 |
15 | rados_t cluster;
16 | char* pool;
17 |
18 | const char nibbles [] = {'0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f'};
19 |
20 | void make_object_id(uint8_t *uuid, uint64_t address, char* dest)
21 | {
22 | int i;
23 | int dp;
24 | for (i=0;i<16;i++)
25 | {
26 | int nibble;
27 | dest[i*2] = nibbles[uuid[i]>>4];
28 | dest[i*2+1] = nibbles[uuid[i]&0xF];
29 | }
30 | for (i=0;i<10;i++)
31 | {
32 | dest[32+i] = nibbles[address >> (4*(9-i)) & 0xF];
33 | }
34 | dest[OID_SIZE-1] = 0;
35 | }
36 |
37 | void initialize_provider(const char* conffile, const char* cephpool)
38 | {
39 | int err;
40 | err = rados_create(&cluster, NULL);
41 | if (err < 0)
42 | {
43 | fprintf(stderr, "could not create RADOS cluster handle\n");
44 | errno = -err;
45 | return;
46 | }
47 |
48 | err = rados_conf_read_file(cluster, conffile);
49 | if (err < 0)
50 | {
51 | fprintf(stderr, "could not create load ceph conf\n");
52 | errno = -err;
53 | return;
54 | }
55 |
56 | err = rados_connect(cluster);
57 | if (err < 0)
58 | {
59 | fprintf(stderr, "could not create connect to cluster\n");
60 | errno = -err;
61 | return;
62 | }
63 |
64 | pool = (char*) malloc(strlen(cephpool)+1);
65 | strcpy(pool, cephpool);
66 |
67 | errno = 0;
68 | }
69 |
70 | cephprovider_handle_t* handle_create()
71 | {
72 | int err;
73 | cephprovider_handle_t *rv = (cephprovider_handle_t*) malloc(sizeof(cephprovider_handle_t));
74 | rv->comps = (rados_completion_t*) malloc(sizeof(rados_completion_t) *COMP_CAP_STEP);
75 | rv->comp_cap = COMP_CAP_STEP;
76 | rv->comp_len = 0;
77 |
78 | err = rados_ioctx_create(cluster, pool, &rv->ctx);
79 | if (err < 0)
80 | {
81 | fprintf(stderr, "could not create io context\n");
82 | errno = -err;
83 | rados_ioctx_destroy(rv->ctx);
84 | free(rv);
85 | return NULL;
86 | }
87 | errno = 0;
88 | return rv;
89 | }
90 |
91 | void handle_write(cephprovider_handle_t *h, uint8_t *uuid, uint64_t address, const char *data, int len, int trunc)
92 | {
93 | //The ceph provider uses 24 bits of address per object, and the top 40 bits as an object ID
94 | int offset = address & 0xFFFFFF;
95 | uint64_t id = address >> 24;
96 | int err;
97 | char oid [OID_SIZE];
98 | make_object_id(uuid, id, &oid[0]);
99 | if (trunc)
100 | {
101 | err = rados_trunc(h->ctx, oid, len + offset);
102 | if (err < 0)
103 | {
104 | fprintf(stderr, "could not trunc\n");
105 | errno = -err;
106 | return;
107 | }
108 | }
109 | //Check we have a completion we can use
110 | if (h->comp_len == h->comp_cap)
111 | {
112 | h->comp_cap += COMP_CAP_STEP;
113 | h->comps = realloc(h->comps, (h->comp_cap * sizeof(rados_completion_t)));
114 | if (!h->comps)
115 | {
116 | return;
117 | }
118 | }
119 | err = rados_aio_create_completion(NULL, NULL, NULL, &(h->comps[h->comp_len]));
120 | if (err < 0)
121 | {
122 | fprintf(stderr, "could not create completion\n");
123 | errno = -err;
124 | return;
125 | }
126 | err = rados_aio_write(h->ctx, oid, h->comps[h->comp_len], data, len, offset);
127 | if (err < 0)
128 | {
129 | fprintf(stderr, "could not aio write\n");
130 | errno = -err;
131 | return;
132 | }
133 | h->comp_len++;
134 | errno = 0;
135 | }
136 |
137 | int handle_read(cephprovider_handle_t *h, uint8_t *uuid, uint64_t address, char* dest, int len)
138 | {
139 | //The ceph provider uses 24 bits of address per object, and the top 40 bits as an object ID
140 | int offset = address & 0xFFFFFF;
141 | uint64_t id = address >> 24;
142 | int rv;
143 | char oid [OID_SIZE];
144 | make_object_id(uuid, id, &oid[0]);
145 | rv = rados_read(h->ctx, oid, dest, len, offset);
146 | if (rv < 0)
147 | {
148 | fprintf(stderr, "could not read %s\n", oid);
149 | errno = -rv;
150 | return -1;
151 | }
152 | errno = 0;
153 | return rv;
154 | }
155 |
156 | void handle_init_allocator(cephprovider_handle_t *h)
157 | {
158 | int err;
159 | struct timeval dur;
160 | dur.tv_sec = 5;
161 | dur.tv_usec = 0;
162 | uint64_t addr;
163 | if (h->comp_len == h->comp_cap)
164 | {
165 | h->comp_cap += COMP_CAP_STEP;
166 | h->comps = realloc(h->comps, (h->comp_cap * sizeof(rados_completion_t)));
167 | if (!h->comps)
168 | {
169 | errno = -err;
170 | return;
171 | }
172 | }
173 | err = rados_aio_create_completion(NULL, NULL, NULL, &(h->comps[h->comp_len]));
174 | if (err < 0)
175 | {
176 | fprintf(stderr, "could not create completion\n");
177 | errno = -err;
178 | return;
179 | }
180 |
181 | err = rados_lock_exclusive(h->ctx, "allocator", "alloc_lock", "main", "alloc", &dur, 0);
182 | if (err < 0) {
183 | fprintf(stderr, "could not lock allocator\n");
184 | errno = -err;
185 | return;
186 | }
187 | addr = 0x1000000; //Not zero!!
188 | err = rados_aio_write_full(h->ctx, "allocator", h->comps[h->comp_len], (char *) &addr, 8);
189 | if (err < 0) {
190 | fprintf(stderr, "could not write allocator\n");
191 | errno = -err;
192 | return;
193 | }
194 | rados_aio_wait_for_safe(h->comps[h->comp_len]);
195 | err = rados_unlock(h->ctx, "allocator", "alloc_lock", "main");
196 | if (err < 0) {
197 | fprintf(stderr, "could not unlock allocator\n");
198 | errno = -err;
199 | return;
200 | }
201 | rados_aio_release(h->comps[h->comp_len]);
202 | errno = 0;
203 | }
204 |
205 | //Returns the address of the start of a range that can be
206 | //used
207 | uint64_t handle_obtainrange(cephprovider_handle_t *h)
208 | {
209 | int err;
210 | int rv;
211 | int then;
212 | struct timeval dur;
213 | dur.tv_sec = 60;
214 | dur.tv_usec = 0;
215 | uint64_t addr;
216 | if (h->comp_len == h->comp_cap)
217 | {
218 | h->comp_cap += COMP_CAP_STEP;
219 | h->comps = realloc(h->comps, (h->comp_cap * sizeof(rados_completion_t)));
220 | if (!h->comps)
221 | {
222 | errno = -err;
223 | return 0;
224 | }
225 | }
226 | err = rados_aio_create_completion(NULL, NULL, NULL, &(h->comps[h->comp_len]));
227 | if (err < 0)
228 | {
229 | fprintf(stderr, "could not create completion\n");
230 | errno = -err;
231 | return 0;
232 | }
233 | then = (int) time();
234 | while((int)time() - then < 60)
235 | {
236 | err = rados_lock_exclusive(h->ctx, "allocator", "alloc_lock", "main", "alloc", &dur, 0);
237 | if (err == 0) {
238 | break;
239 | }
240 | }
241 | if (err < 0) {
242 | fprintf(stderr, "could not lock allocator\n");
243 | errno = -err;
244 | return 0;
245 | }
246 | rv = rados_read(h->ctx, "allocator", (char *) &addr, 8, 0);
247 | if (rv < 0 || rv != 8) {
248 | fprintf(stderr, "could not read allocator\n");
249 | errno = -err;
250 | return 0;
251 | }
252 | printf("read allocation 0x%016" PRIx64 "\n",addr);
253 | addr += ADDR_LOCK_SIZE;
254 | printf("writing allocation 0x%016" PRIx64 "\n",addr);
255 | err = rados_aio_write_full(h->ctx, "allocator", h->comps[h->comp_len], (char *) &addr, 8);
256 | if (err < 0) {
257 | fprintf(stderr, "could not write allocator\n");
258 | errno = -err;
259 | return 0;
260 | }
261 | rados_aio_wait_for_safe(h->comps[h->comp_len]);
262 | err = rados_unlock(h->ctx, "allocator", "alloc_lock", "main");
263 | if (err < 0) {
264 | fprintf(stderr, "could not unlock allocator\n");
265 | errno = -err;
266 | return 0;
267 | }
268 | rados_aio_release(h->comps[h->comp_len]);
269 | errno = 0;
270 | printf("Returning %016" PRIx64 "\n", addr - ADDR_LOCK_SIZE);
271 | return addr - ADDR_LOCK_SIZE;
272 | }
273 |
274 | void handle_close(cephprovider_handle_t *h)
275 | {
276 | int i;
277 | for (i=0; i < h->comp_len; i++)
278 | {
279 | rados_aio_wait_for_complete(h->comps[i]);
280 | rados_aio_release(h->comps[i]);
281 | }
282 | free(h->comps);
283 | rados_ioctx_destroy(h->ctx);
284 | free(h);
285 |
286 | errno = 0;
287 | }
288 |
--------------------------------------------------------------------------------
/internal/fileprovider/fileprovider.go:
--------------------------------------------------------------------------------
1 | package fileprovider
2 |
3 | import (
4 | "fmt"
5 | "io"
6 | "os"
7 | "sync"
8 |
9 | "github.com/SoftwareDefinedBuildings/btrdb/internal/bprovider"
10 | "github.com/op/go-logging"
11 | )
12 |
13 | var log *logging.Logger
14 |
15 | func init() {
16 | log = logging.MustGetLogger("log")
17 | }
18 |
19 | const NUMFILES = 256
20 |
21 | type writeparams struct {
22 | Address uint64
23 | Data []byte
24 | }
25 |
26 | type FileProviderSegment struct {
27 | sp *FileStorageProvider
28 | fidx int
29 | f *os.File
30 | base int64
31 | ptr int64
32 | wchan chan writeparams
33 | wg sync.WaitGroup
34 | }
35 |
36 | type FileStorageProvider struct {
37 | fidx chan int
38 | retfidx chan int
39 | dbf []*os.File
40 | dbrf []*os.File
41 | dbrf_mtx []sync.Mutex
42 | favail []bool
43 | }
44 |
45 | func (seg *FileProviderSegment) writer() {
46 |
47 | for args := range seg.wchan {
48 | off := int64(args.Address & ((1 << 50) - 1))
49 | lenarr := make([]byte, 2)
50 | lenarr[0] = byte(len(args.Data))
51 | lenarr[1] = byte(len(args.Data) >> 8)
52 | _, err := seg.f.WriteAt(lenarr, off)
53 | if err != nil {
54 | log.Panic("File writing error %v", err)
55 | }
56 | _, err = seg.f.WriteAt(args.Data, off+2)
57 | if err != nil {
58 | log.Panic("File writing error %v", err)
59 | }
60 | }
61 | seg.wg.Done()
62 | }
63 | func (seg *FileProviderSegment) init() {
64 | seg.wchan = make(chan writeparams, 16)
65 | seg.wg.Add(1)
66 | go seg.writer()
67 | }
68 |
69 | //Returns the address of the first free word in the segment when it was locked
70 | func (seg *FileProviderSegment) BaseAddress() uint64 {
71 | //This seems arbitrary, why not go with the top 8 bits? The reason is this:
72 | //a) this still leaves 1PB per file
73 | //b) The huffman encoding can do 58 bits in 8 bytes, but anything more is 9
74 | //c) if we later decide to more than 256 files, we can
75 | return (uint64(seg.fidx) << 50) + uint64(seg.base)
76 | }
77 |
78 | //Unlocks the segment for the StorageProvider to give to other consumers
79 | //Implies a flush
80 | func (seg *FileProviderSegment) Unlock() {
81 | seg.Flush()
82 | seg.sp.retfidx <- seg.fidx
83 | }
84 |
85 | //Writes a slice to the segment, returns immediately
86 | //Returns nil if op is OK, otherwise ErrNoSpace or ErrInvalidArgument
87 | //It is up to the implementer to work out how to report no space immediately
88 | //The uint64 rv is the address to be used for the next write
89 | func (seg *FileProviderSegment) Write(uuid []byte, address uint64, data []byte) (uint64, error) {
90 | //TODO remove
91 | if seg.ptr != int64(address&((1<<50)-1)) {
92 | log.Panic("Pointer does not match address %x vs %x", seg.ptr, int64(address&((1<<50)-1)))
93 | }
94 | wp := writeparams{Address: address, Data: data}
95 | seg.wchan <- wp
96 | seg.ptr = int64(address&((1<<50)-1)) + int64(len(data)) + 2
97 | return uint64(seg.ptr) + (uint64(seg.fidx) << 50), nil
98 | }
99 |
100 | //Block until all writes are complete, not
101 | func (seg *FileProviderSegment) Flush() {
102 | close(seg.wchan)
103 | seg.wg.Wait()
104 | }
105 |
106 | //Provide file indices into fidx, does not return
107 | func (sp *FileStorageProvider) provideFiles() {
108 | for {
109 | //Read all returned files
110 | ldretfi:
111 | for {
112 | select {
113 | case fi := <-sp.retfidx:
114 | sp.favail[fi] = true
115 | default:
116 | break ldretfi
117 | }
118 | }
119 |
120 | //Greedily select file
121 | minidx := -1
122 | var minv int64 = 0
123 | for i := 0; i < NUMFILES; i++ {
124 | if !sp.favail[i] {
125 | continue
126 | }
127 | off, err := sp.dbf[i].Seek(0, os.SEEK_CUR)
128 | if err != nil {
129 | log.Panic(err)
130 | }
131 | if minidx == -1 || off < minv {
132 | minidx = i
133 | minv = off
134 | }
135 | }
136 |
137 | //Return it, or do blocking read if not found
138 | if minidx != -1 {
139 | sp.favail[minidx] = false
140 | sp.fidx <- minidx
141 | } else {
142 | //Do a blocking read on retfidx to avoid fast spin on nonblocking
143 | fi := <-sp.retfidx
144 | sp.favail[fi] = true
145 | }
146 |
147 | }
148 | }
149 |
150 | //Called at startup
151 | func (sp *FileStorageProvider) Initialize(opts map[string]string) {
152 | //Initialize file indices thingy
153 | sp.fidx = make(chan int)
154 | sp.retfidx = make(chan int, NUMFILES+1)
155 | sp.dbf = make([]*os.File, NUMFILES)
156 | sp.dbrf = make([]*os.File, NUMFILES)
157 | sp.dbrf_mtx = make([]sync.Mutex, NUMFILES)
158 | sp.favail = make([]bool, NUMFILES)
159 | for i := 0; i < NUMFILES; i++ {
160 | //Open file
161 | dbpath, ok := opts["dbpath"]
162 | if !ok {
163 | log.Panic("Expected dbpath")
164 | }
165 | fname := fmt.Sprintf("%s/blockstore.%02x.db", dbpath, i)
166 | //write file descriptor
167 | {
168 | f, err := os.OpenFile(fname, os.O_RDWR, 0666)
169 | if err != nil && os.IsNotExist(err) {
170 | log.Critical("Aborting: seems database does not exist. Have you run `btrdbd -makedb`?")
171 | os.Exit(1)
172 | }
173 | if err != nil {
174 | log.Panicf("Problem with blockstore DB: ", err)
175 | }
176 | sp.dbf[i] = f
177 | }
178 | //Read file descriptor
179 | {
180 | f, err := os.OpenFile(fname, os.O_RDONLY, 0666)
181 | if err != nil {
182 | log.Panicf("Problem with blockstore DB: ", err)
183 | }
184 | sp.dbrf[i] = f
185 | }
186 | sp.favail[i] = true
187 | }
188 | go sp.provideFiles()
189 |
190 | }
191 |
192 | // Lock a segment, or block until a segment can be locked
193 | // Returns a Segment struct
194 | func (sp *FileStorageProvider) LockSegment(uuid []byte) bprovider.Segment {
195 | //Grab a file index
196 | fidx := <-sp.fidx
197 | f := sp.dbf[fidx]
198 | l, err := f.Seek(0, os.SEEK_END)
199 | if err != nil {
200 | log.Panicf("Error on lock segment: %v", err)
201 | }
202 |
203 | //Construct segment
204 | seg := &FileProviderSegment{sp: sp, fidx: fidx, f: sp.dbf[fidx], base: l, ptr: l}
205 | seg.init()
206 |
207 | return seg
208 | }
209 |
210 | //This is the size of a maximal size cblock + header
211 | const FIRSTREAD = 3459
212 |
213 | func (sp *FileStorageProvider) Read(uuid []byte, address uint64, buffer []byte) []byte {
214 | fidx := address >> 50
215 | off := int64(address & ((1 << 50) - 1))
216 | if fidx > NUMFILES {
217 | log.Panic("Encoded file idx too large")
218 | }
219 | sp.dbrf_mtx[fidx].Lock()
220 | nread, err := sp.dbrf[fidx].ReadAt(buffer[:FIRSTREAD], off)
221 | if err != nil && err != io.EOF {
222 | log.Panic("Non EOF read error: %v", err)
223 | }
224 | if nread < 2 {
225 | log.Panic("Unexpected (very) short read")
226 | }
227 | //Now we read the blob size
228 | bsize := int(buffer[0]) + (int(buffer[1]) << 8)
229 | if bsize > nread-2 {
230 | _, err := sp.dbrf[fidx].ReadAt(buffer[nread:bsize+2], off+int64(nread))
231 | if err != nil {
232 | log.Panic("Read error: %v", err)
233 | }
234 | }
235 | sp.dbrf_mtx[fidx].Unlock()
236 | return buffer[2 : bsize+2]
237 | }
238 |
239 | //Called to create the database for the first time
240 | func (sp *FileStorageProvider) CreateDatabase(opts map[string]string) error {
241 | for i := 0; i < NUMFILES; i++ {
242 | //Open file
243 | dbpath, ok := opts["dbpath"]
244 | if !ok {
245 | log.Panicf("Expected dbpath")
246 | }
247 | fname := fmt.Sprintf("%s/blockstore.%02x.db", dbpath, i)
248 | //write file descriptor
249 | {
250 | f, err := os.OpenFile(fname, os.O_RDWR|os.O_CREATE|os.O_EXCL, 0666)
251 | if err != nil && !os.IsExist(err) {
252 | log.Panicf("Problem with blockstore DB: ", err)
253 | } else if os.IsExist(err) {
254 | return bprovider.ErrExists
255 | }
256 | //Add a file tag
257 | //An exercise left for the reader: if you remove this, everything breaks :-)
258 | //Hint: what is the physical address of the first byte of file zero?
259 | _, err = f.Write([]byte("QUASARDB"))
260 | if err != nil {
261 | log.Panicf("Could not write to blockstore:", err)
262 | }
263 |
264 | err = f.Close()
265 | if err != nil {
266 | log.Panicf("Error on close %v", err)
267 | }
268 | }
269 | }
270 | return nil
271 | }
272 |
--------------------------------------------------------------------------------
/quasar.go:
--------------------------------------------------------------------------------
1 | package btrdb
2 |
3 | import (
4 | "fmt"
5 | "sync"
6 | "time"
7 |
8 | "github.com/pborman/uuid"
9 | "github.com/SoftwareDefinedBuildings/btrdb/internal/bstore"
10 | "github.com/SoftwareDefinedBuildings/btrdb/qtree"
11 | "github.com/op/go-logging"
12 | )
13 |
14 | var log *logging.Logger
15 |
16 | func init() {
17 | log = logging.MustGetLogger("log")
18 | }
19 |
20 | type openTree struct {
21 | store []qtree.Record
22 | id uuid.UUID
23 | sigEC chan bool
24 | }
25 |
26 | const MinimumTime = -(16 << 56)
27 | const MaximumTime = (48 << 56)
28 | const LatestGeneration = bstore.LatestGeneration
29 |
30 | type Quasar struct {
31 | cfg QuasarConfig
32 | bs *bstore.BlockStore
33 |
34 | //Transaction coalescence
35 | globlock sync.Mutex
36 | treelocks map[[16]byte]*sync.Mutex
37 | openTrees map[[16]byte]*openTree
38 | }
39 |
40 | func newOpenTree(id uuid.UUID) *openTree {
41 | return &openTree{
42 | id: id,
43 | }
44 | }
45 |
46 | type QuasarConfig struct {
47 | //Measured in the number of datablocks
48 | //So 1000 is 8 MB cache
49 | DatablockCacheSize uint64
50 |
51 | //This enables the grouping of value inserts
52 | //with a commit every Interval millis
53 | //If the number of stored values exceeds
54 | //EarlyTrip
55 | TransactionCoalesceEnable bool
56 | TransactionCoalesceInterval uint64
57 | TransactionCoalesceEarlyTrip uint64
58 |
59 | Params map[string]string
60 | }
61 |
62 | // Return true if there are uncommited results to be written to disk
63 | // Should only be used during shutdown as it hogs the glock
64 | func (q *Quasar) IsPending() bool {
65 | isPend := false
66 | q.globlock.Lock()
67 | for uuid, ot := range q.openTrees {
68 | q.treelocks[uuid].Lock()
69 | if len(ot.store) != 0 {
70 | isPend = true
71 | q.treelocks[uuid].Unlock()
72 | break
73 | }
74 | q.treelocks[uuid].Unlock()
75 | }
76 | q.globlock.Unlock()
77 | return isPend
78 | }
79 |
80 | func NewQuasar(cfg *QuasarConfig) (*Quasar, error) {
81 | bs, err := bstore.NewBlockStore(cfg.Params)
82 | if err != nil {
83 | return nil, err
84 | }
85 | rv := &Quasar{
86 | cfg: *cfg,
87 | bs: bs,
88 | openTrees: make(map[[16]byte]*openTree, 128),
89 | treelocks: make(map[[16]byte]*sync.Mutex, 128),
90 | }
91 | return rv, nil
92 | }
93 |
94 | func (q *Quasar) getTree(id uuid.UUID) (*openTree, *sync.Mutex) {
95 | mk := bstore.UUIDToMapKey(id)
96 | q.globlock.Lock()
97 | ot, ok := q.openTrees[mk]
98 | if !ok {
99 | ot := newOpenTree(id)
100 | mtx := &sync.Mutex{}
101 | q.openTrees[mk] = ot
102 | q.treelocks[mk] = mtx
103 | q.globlock.Unlock()
104 | return ot, mtx
105 | }
106 | mtx, ok := q.treelocks[mk]
107 | if !ok {
108 | log.Panicf("This should not happen")
109 | }
110 | q.globlock.Unlock()
111 | return ot, mtx
112 | }
113 |
114 | func (t *openTree) commit(q *Quasar) {
115 | if len(t.store) == 0 {
116 | //This might happen with a race in the timeout commit
117 | fmt.Println("no store in commit")
118 | return
119 | }
120 | tr, err := qtree.NewWriteQTree(q.bs, t.id)
121 | if err != nil {
122 | log.Panic(err)
123 | }
124 | if err := tr.InsertValues(t.store); err != nil {
125 | log.Error("BAD INSERT: ", err)
126 | }
127 | tr.Commit()
128 | t.store = nil
129 | }
130 | func (q *Quasar) InsertValues(id uuid.UUID, r []qtree.Record) {
131 | defer func() {
132 | if r := recover(); r != nil {
133 | log.Error("BAD INSERT: ", r)
134 | }
135 | }()
136 | tr, mtx := q.getTree(id)
137 | mtx.Lock()
138 | if tr == nil {
139 | log.Panicf("This should not happen")
140 | }
141 | if tr.store == nil {
142 | //Empty store
143 | tr.store = make([]qtree.Record, 0, len(r)*2)
144 | tr.sigEC = make(chan bool, 1)
145 | //Also spawn the coalesce timeout goroutine
146 | go func(abrt chan bool) {
147 | tmt := time.After(time.Duration(q.cfg.TransactionCoalesceInterval) * time.Millisecond)
148 | select {
149 | case <-tmt:
150 | //do coalesce
151 | mtx.Lock()
152 | //In case we early tripped between waiting for lock and getting it, commit will return ok
153 | //log.Debug("Coalesce timeout %v", id.String())
154 | tr.commit(q)
155 | mtx.Unlock()
156 | case <-abrt:
157 | return
158 | }
159 | }(tr.sigEC)
160 | }
161 | tr.store = append(tr.store, r...)
162 | if uint64(len(tr.store)) >= q.cfg.TransactionCoalesceEarlyTrip {
163 | tr.sigEC <- true
164 | log.Debug("Coalesce early trip %v", id.String())
165 | tr.commit(q)
166 | }
167 | mtx.Unlock()
168 | }
169 | func (q *Quasar) Flush(id uuid.UUID) error {
170 | tr, mtx := q.getTree(id)
171 | mtx.Lock()
172 | if len(tr.store) != 0 {
173 | tr.sigEC <- true
174 | tr.commit(q)
175 | fmt.Printf("Commit done %+v\n", id)
176 | } else {
177 | fmt.Printf("no store\n")
178 | }
179 | mtx.Unlock()
180 | return nil
181 | }
182 |
183 | //These functions are the API. TODO add all the bounds checking on PW, and sanity on start/end
184 | func (q *Quasar) QueryValues(id uuid.UUID, start int64, end int64, gen uint64) ([]qtree.Record, uint64, error) {
185 | tr, err := qtree.NewReadQTree(q.bs, id, gen)
186 | if err != nil {
187 | return nil, 0, err
188 | }
189 | rv, err := tr.ReadStandardValuesBlock(start, end)
190 | return rv, tr.Generation(), err
191 | }
192 |
193 | func (q *Quasar) QueryValuesStream(id uuid.UUID, start int64, end int64, gen uint64) (chan qtree.Record, chan error, uint64) {
194 | tr, err := qtree.NewReadQTree(q.bs, id, gen)
195 | if err != nil {
196 | return nil, nil, 0
197 | }
198 | recordc := make(chan qtree.Record)
199 | errc := make(chan error)
200 | go tr.ReadStandardValuesCI(recordc, errc, start, end)
201 | return recordc, errc, tr.Generation()
202 | }
203 |
204 | func (q *Quasar) QueryStatisticalValues(id uuid.UUID, start int64, end int64,
205 | gen uint64, pointwidth uint8) ([]qtree.StatRecord, uint64, error) {
206 | //fmt.Printf("QSV0 s=%v e=%v pw=%v\n", start, end, pointwidth)
207 | start &^= ((1 << pointwidth) - 1)
208 | end &^= ((1 << pointwidth) - 1)
209 | end -= 1
210 | tr, err := qtree.NewReadQTree(q.bs, id, gen)
211 | if err != nil {
212 | return nil, 0, err
213 | }
214 | rv, err := tr.QueryStatisticalValuesBlock(start, end, pointwidth)
215 | if err != nil {
216 | return nil, 0, err
217 | }
218 | return rv, tr.Generation(), nil
219 | }
220 | func (q *Quasar) QueryStatisticalValuesStream(id uuid.UUID, start int64, end int64,
221 | gen uint64, pointwidth uint8) (chan qtree.StatRecord, chan error, uint64) {
222 | fmt.Printf("QSV1 s=%v e=%v pw=%v\n", start, end, pointwidth)
223 | start &^= ((1 << pointwidth) - 1)
224 | end &^= ((1 << pointwidth) - 1)
225 | end -= 1
226 | rvv := make(chan qtree.StatRecord, 1024)
227 | rve := make(chan error)
228 | tr, err := qtree.NewReadQTree(q.bs, id, gen)
229 | if err != nil {
230 | return nil, nil, 0
231 | }
232 | go tr.QueryStatisticalValues(rvv, rve, start, end, pointwidth)
233 | return rvv, rve, tr.Generation()
234 | }
235 |
236 | func (q *Quasar) QueryWindow(id uuid.UUID, start int64, end int64,
237 | gen uint64, width uint64, depth uint8) (chan qtree.StatRecord, uint64) {
238 | rvv := make(chan qtree.StatRecord, 1024)
239 | tr, err := qtree.NewReadQTree(q.bs, id, gen)
240 | if err != nil {
241 | return nil, 0
242 | }
243 | go tr.QueryWindow(start, end, width, depth, rvv)
244 | return rvv, tr.Generation()
245 | }
246 |
247 | func (q *Quasar) QueryGeneration(id uuid.UUID) (uint64, error) {
248 | sb := q.bs.LoadSuperblock(id, bstore.LatestGeneration)
249 | if sb == nil {
250 | return 0, qtree.ErrNoSuchStream
251 | }
252 | return sb.Gen(), nil
253 | }
254 |
255 | func (q *Quasar) QueryNearestValue(id uuid.UUID, time int64, backwards bool, gen uint64) (qtree.Record, uint64, error) {
256 | tr, err := qtree.NewReadQTree(q.bs, id, gen)
257 | if err != nil {
258 | return qtree.Record{}, 0, err
259 | }
260 | rv, err := tr.FindNearestValue(time, backwards)
261 | return rv, tr.Generation(), err
262 | }
263 |
264 | type ChangedRange struct {
265 | Start int64
266 | End int64
267 | }
268 |
269 | //Resolution is how far down the tree to go when working out which blocks have changed. Higher resolutions are faster
270 | //but will give you back coarser results.
271 | func (q *Quasar) QueryChangedRanges(id uuid.UUID, startgen uint64, endgen uint64, resolution uint8) ([]ChangedRange, uint64, error) {
272 | //0 is a reserved generation, so is 1, which means "before first"
273 | if startgen == 0 {
274 | startgen = 1
275 | }
276 | tr, err := qtree.NewReadQTree(q.bs, id, endgen)
277 | if err != nil {
278 | log.Debug("Error on QCR open tree")
279 | return nil, 0, err
280 | }
281 | rv := make([]ChangedRange, 0, 1024)
282 | rch := tr.FindChangedSince(startgen, resolution)
283 | var lr *ChangedRange = nil
284 | for {
285 |
286 | select {
287 | case cr, ok := <-rch:
288 | if !ok {
289 | //This is the end.
290 | //Do we have an unsaved LR?
291 | if lr != nil {
292 | rv = append(rv, *lr)
293 | }
294 | return rv, tr.Generation(), nil
295 | }
296 | if !cr.Valid {
297 | log.Panicf("Didn't think this could happen")
298 | }
299 | //Coalesce
300 | if lr != nil && cr.Start == lr.End {
301 | lr.End = cr.End
302 | } else {
303 | if lr != nil {
304 | rv = append(rv, *lr)
305 | }
306 | lr = &ChangedRange{Start: cr.Start, End: cr.End}
307 | }
308 | }
309 | }
310 | return rv, tr.Generation(), nil
311 | }
312 |
313 | func (q *Quasar) DeleteRange(id uuid.UUID, start int64, end int64) error {
314 | tr, mtx := q.getTree(id)
315 | mtx.Lock()
316 | if len(tr.store) != 0 {
317 | tr.sigEC <- true
318 | tr.commit(q)
319 | }
320 | wtr, err := qtree.NewWriteQTree(q.bs, id)
321 | if err != nil {
322 | log.Panic(err)
323 | }
324 | err = wtr.DeleteRange(start, end)
325 | if err != nil {
326 | log.Panic(err)
327 | }
328 | wtr.Commit()
329 | mtx.Unlock()
330 | return nil
331 | }
332 |
--------------------------------------------------------------------------------
/internal/bstore/bstore_test.go:
--------------------------------------------------------------------------------
1 | package bstore
2 |
3 | import (
4 | "github.com/pborman/uuid"
5 | "math/rand"
6 | "reflect"
7 | "strings"
8 | "testing"
9 | "time"
10 | )
11 |
12 | func mUint64() uint64 {
13 | return uint64(rand.Uint32())
14 | //return (uint64(rand.Uint32()) << 32) + uint64(rand.Uint32())
15 | }
16 | func mInt64() int64 {
17 | return int64(mUint64())
18 | }
19 | func mFloat64() float64 {
20 | return rand.Float64()
21 | }
22 |
23 | /**
24 | * Randomly populate the fields of a struct
25 | */
26 | func FillBlock(rv interface{}) {
27 | rand.Seed(time.Now().UnixNano())
28 | t := reflect.ValueOf(rv)
29 | for i := 0; i < t.Elem().NumField(); i++ {
30 | fld := t.Elem().Field(i)
31 | switch fld.Type().Kind() {
32 | case reflect.Array:
33 | for k := 0; k < fld.Len(); k++ {
34 | if fld.Type().Elem().Kind() == reflect.Float64 {
35 | fld.Index(k).SetFloat(mFloat64())
36 | } else if fld.Type().Elem().Kind() == reflect.Uint64 {
37 | fld.Index(k).SetUint(mUint64())
38 | } else if fld.Type().Elem().Kind() == reflect.Int64 {
39 | fld.Index(k).SetInt(mInt64())
40 | } else if fld.Type().Elem().Kind() == reflect.Uint8 {
41 | fld.Index(k).SetUint(mUint64())
42 | } else {
43 | log.Panic("Unhandled element type: %v", fld.Type().Elem().Kind())
44 | }
45 | }
46 | case reflect.Uint64:
47 | fld.SetUint(mUint64())
48 | case reflect.Uint8:
49 | fld.SetUint(mUint64() & 0xFF)
50 | case reflect.Uint16:
51 | fld.SetUint(mUint64() & 0xFFFF)
52 | case reflect.Int64:
53 | fld.SetInt(mInt64())
54 | case reflect.Int:
55 | fld.SetInt(mInt64())
56 | default:
57 | log.Panicf("Unrecognized type: %+v", fld.Type().Kind())
58 | }
59 | }
60 | }
61 |
62 | func MakeAllocatedCoreblock() *Coreblock {
63 | mBS()
64 | db, err := _gen.AllocateCoreblock()
65 | if err != nil {
66 | log.Panic(err)
67 | }
68 | addr := db.Identifier
69 | FillBlock(db)
70 | db.Identifier = addr
71 | return db
72 | }
73 |
74 | func MakeAllocatedVBlock() *Vectorblock {
75 | mBS()
76 | v, err := _gen.AllocateVectorblock()
77 | if err != nil {
78 | log.Panic(err)
79 | }
80 | addr := v.Identifier
81 | FillBlock(v)
82 | v.Len = VSIZE
83 | v.Identifier = addr
84 | return v
85 | }
86 |
87 | func MakeCoreblock() *Coreblock {
88 | db := new(Coreblock)
89 | FillBlock(db)
90 | for i := 0; i < KFACTOR; i++ {
91 | //These have special meaning, so don't test it here
92 | if db.Addr[i] == 0 {
93 | db.Addr[i] = 1
94 | }
95 | }
96 | return db
97 | }
98 |
99 | func MakeVBlock() *Vectorblock {
100 | v := new(Vectorblock)
101 | FillBlock(v)
102 | v.Len = VSIZE
103 | return v
104 | }
105 |
106 | /**
107 | * This should work with any object that uses the struct tags to
108 | * mean fields that don't need to match after SERDES
109 | */
110 | func CompareNoTags(lhs interface{}, rhs interface{}, tags []string) bool {
111 | chk := make(map[string]bool)
112 | for _, s := range tags {
113 | chk[s] = true
114 | }
115 | vlhs := reflect.ValueOf(lhs)
116 | vrhs := reflect.ValueOf(rhs)
117 | if vlhs.Type() != vrhs.Type() {
118 | log.Fatalf("Types differ %v %v", vlhs.Type(), vrhs.Type())
119 | return false
120 | }
121 | for k := 0; k < vlhs.NumField(); k++ {
122 | tagstring := string(reflect.TypeOf(lhs).Field(k).Tag)
123 | tags := strings.Split(tagstring, ",")
124 | doskip := false
125 | for _, k := range tags {
126 | if chk[k] {
127 | doskip = true
128 | }
129 | }
130 | if doskip {
131 | continue
132 | }
133 | if !reflect.DeepEqual(vlhs.Field(k).Interface(), vrhs.Field(k).Interface()) {
134 | log.Fatalf("Field differs: %v, %v != %v", reflect.TypeOf(lhs).Field(k).Name,
135 | vlhs.Field(k).Interface(), vrhs.Field(k).Interface())
136 | return false
137 | }
138 | }
139 | return true
140 | }
141 |
142 | var _bs *BlockStore = nil
143 | var _gen *Generation = nil
144 |
145 | func mBS() {
146 | testuuid := uuid.NewRandom()
147 | params := map[string]string{
148 | "dbpath": "/srv/quasartestdb/",
149 | "mongoserver": "localhost",
150 | "cachesize": "0",
151 | }
152 | nbs, err := NewBlockStore(params)
153 | if err != nil {
154 | log.Panic(err)
155 | }
156 | if _bs == nil {
157 | _bs = nbs
158 | _gen = _bs.ObtainGeneration(testuuid)
159 | }
160 | }
161 |
162 | func TestCoreBlockSERDES(t *testing.T) {
163 | db := MakeCoreblock()
164 | buf := make([]byte, CBSIZE)
165 | db.Serialize(buf)
166 | out := new(Coreblock)
167 | out.Deserialize(buf)
168 | if !CompareNoTags(*db, *out, []string{"implicit"}) {
169 | t.Error("Core block SERDES faled")
170 | }
171 | }
172 |
173 | func TestCoreBlockSERDESAbsFullZero(t *testing.T) {
174 | db := MakeCoreblock()
175 | db.Addr[10] = 0
176 | db.Min[10] = 0
177 | db.Mean[10] = 0
178 | db.Max[10] = 0
179 | db.Count[10] = 0
180 |
181 | db.Addr[11] = 0
182 | db.Min[11] = 0
183 | db.Mean[11] = 0
184 | db.Max[11] = 0
185 | db.Count[11] = 0
186 | db.CGeneration[11] = 0
187 |
188 | db.Addr[54] = 0
189 | db.Min[54] = 0
190 | db.Mean[54] = 0
191 | db.Max[54] = 0
192 | db.Count[54] = 0
193 |
194 | for i := 55; i < KFACTOR; i++ {
195 | db.Addr[i] = 0
196 | db.Min[i] = 0
197 | db.Mean[i] = 0
198 | db.Max[i] = 0
199 | db.Count[i] = 0
200 | db.CGeneration[i] = 0
201 | }
202 |
203 | buf := make([]byte, CBSIZE)
204 | db.Serialize(buf)
205 | out := new(Coreblock)
206 | out.Deserialize(buf)
207 |
208 | if !CompareNoTags(*db, *out, []string{"implicit"}) {
209 | t.Error("Core block SERDES faled")
210 | }
211 | }
212 |
213 | func TestCoreBlockBadDES(t *testing.T) {
214 | db := MakeCoreblock()
215 | buf := make([]byte, CBSIZE)
216 | db.Serialize(buf)
217 | out := new(Coreblock)
218 | out.Deserialize(buf)
219 | if out.GetDatablockType() != Core {
220 | t.FailNow()
221 | }
222 | defer func() {
223 | if r := recover(); r == nil {
224 | //We expected a failure
225 | t.FailNow()
226 | }
227 | }()
228 | vb := new(Vectorblock)
229 | vb.Deserialize(buf)
230 | t.FailNow()
231 | }
232 | func TestVectorBlockBadDES(t *testing.T) {
233 | v := MakeVBlock()
234 | buf := make([]byte, VBSIZE)
235 | v.Serialize(buf)
236 | out := new(Vectorblock)
237 | out.Deserialize(buf)
238 | if out.GetDatablockType() != Vector {
239 | t.Fatal("Wrong id on block")
240 | }
241 | defer func() {
242 | if r := recover(); r == nil {
243 | //We expected a failure
244 | t.Fatal("Did not throw exception")
245 | }
246 | }()
247 | cb := new(Coreblock)
248 | cb.Deserialize(buf)
249 | t.FailNow()
250 | }
251 | func TestBufferType(t *testing.T) {
252 | v := MakeVBlock()
253 | buf := make([]byte, VBSIZE)
254 | v.Serialize(buf)
255 | if DatablockGetBufferType(buf) != Vector {
256 | t.Fatal("Expected Vector")
257 | }
258 | c := MakeCoreblock()
259 | buf2 := make([]byte, CBSIZE)
260 | c.Serialize(buf2)
261 | if DatablockGetBufferType(buf2) != Core {
262 | t.Fatal("Expected Core")
263 | }
264 | buf3 := make([]byte, 2)
265 | buf3[0] = byte(5)
266 | if DatablockGetBufferType(buf3) != Bad {
267 | t.Fatal("Expected Bad")
268 | }
269 | }
270 | func TestVBlockSERDES(t *testing.T) {
271 | v := MakeVBlock()
272 | buf := make([]byte, VBSIZE)
273 | v.Serialize(buf)
274 | out := new(Vectorblock)
275 | out.Deserialize(buf)
276 | if !CompareNoTags(*v, *out, []string{"implicit"}) {
277 | t.Error("Vector block SERDES failed")
278 | }
279 | }
280 |
281 | func TestVBlockManSERDES(t *testing.T) {
282 | v := new(Vectorblock)
283 | for i := 0; i < 6; i++ {
284 | v.Time[i] = int64(i * 100000)
285 | v.Value[i] = float64(i * 100000.0)
286 | }
287 | v.Len = 6
288 | buf := make([]byte, VBSIZE)
289 | v.Serialize(buf)
290 | out := new(Vectorblock)
291 | out.Deserialize(buf)
292 | for i := 0; i < 6; i++ {
293 | if v.Value[i] != out.Value[i] {
294 | t.Error("Fail")
295 | }
296 | }
297 | }
298 |
299 | func TestCBlockE2ESERDES(t *testing.T) {
300 | db := MakeAllocatedCoreblock()
301 | for i := 0; i < KFACTOR; i++ {
302 | vb, err := _gen.AllocateVectorblock()
303 | if err != nil {
304 | t.Errorf("Could not allocate VB %v", err)
305 | }
306 | reloc_addr := vb.Identifier
307 | FillBlock(vb)
308 | vb.Len = VSIZE
309 | vb.Identifier = reloc_addr
310 | db.Addr[i] = vb.Identifier
311 | }
312 | cpy := *db
313 | amap, err := _gen.Commit()
314 | if err != nil {
315 | t.Error(err)
316 | }
317 | _bs = nil
318 | _gen = nil
319 | log.Info("reloc address was 0x%016x", cpy.Identifier)
320 | log.Info("cnt0 was %v", cpy.Count[0])
321 | actual_addr, ok := amap[cpy.Identifier]
322 | if !ok {
323 | t.Errorf("relocation address 0x%016x did not exist in address map", cpy.Identifier)
324 | }
325 | mBS()
326 | out := _bs.ReadDatablock(actual_addr, cpy.Generation, cpy.PointWidth, cpy.StartTime)
327 | cpy.Identifier = actual_addr
328 | for i := 0; i < KFACTOR; i++ {
329 | cpy.Addr[i] = amap[cpy.Addr[i]]
330 | }
331 | if !CompareNoTags(cpy, *(out.(*Coreblock)), []string{}) {
332 | t.Error("E2E C SERDES failed")
333 | }
334 | }
335 |
336 | func TestVBlockE2ESERDES(t *testing.T) {
337 | db := MakeAllocatedVBlock()
338 | cpy := *db
339 | amap, err := _gen.Commit()
340 | if err != nil {
341 | t.Error(err)
342 | }
343 | _bs = nil
344 | _gen = nil
345 | log.Info("reloc address was 0x%016x", cpy.Identifier)
346 | actual_addr, ok := amap[cpy.Identifier]
347 | if !ok {
348 | t.Errorf("relocation address 0x%016x did not exist in address map", cpy.Identifier)
349 | }
350 | mBS()
351 | out := _bs.ReadDatablock(actual_addr, cpy.Generation, cpy.PointWidth, cpy.StartTime)
352 | cpy.Identifier = actual_addr
353 | //cpy.Identifier = actual_addr
354 | if !CompareNoTags(cpy, *(out.(*Vectorblock)), []string{}) {
355 | t.Error("E2E V SERDES failed")
356 | }
357 | }
358 |
359 | func TestVCopyInto(t *testing.T) {
360 | db := MakeVBlock()
361 | out := &Vectorblock{}
362 | db.CopyInto(out)
363 | if !CompareNoTags(*db, *out, []string{"metadata"}) {
364 | t.Error("V CopyInto failed")
365 | }
366 | }
367 |
368 | func TestCCopyInto(t *testing.T) {
369 | db := MakeCoreblock()
370 | out := &Coreblock{}
371 | db.CopyInto(out)
372 | if !CompareNoTags(*db, *out, []string{"metadata"}) {
373 | t.Error("C CopyInto failed")
374 | }
375 | }
376 |
377 | /*
378 | func BenchmarkSERDER(b *testing.B) {
379 | dblocks_in := make([]*Coreblock, b.N)
380 | for i := 0; i < b.N; i++ {
381 | dblocks_in[i] = MakeCoreblock()
382 | }
383 | dblocks_out := make([]*Coreblock, b.N)
384 | for i := 0; i < b.N; i++ {
385 | dblocks_out[i] = new(Coreblock)
386 | }
387 | buf := make([]byte, DBSIZE)
388 | b.ResetTimer()
389 | for i := 0; i < b.N; i++ {
390 | dblocks_in[0].Serialize(buf)
391 | dblocks_out[0].Deserialize(buf)
392 | }
393 | }
394 | */
395 |
--------------------------------------------------------------------------------
/qtree/qtree_utils.go:
--------------------------------------------------------------------------------
1 | package qtree
2 |
3 | import (
4 | "fmt"
5 |
6 | "github.com/pborman/uuid"
7 | "github.com/SoftwareDefinedBuildings/btrdb/internal/bstore"
8 | )
9 |
10 | const PWFACTOR = bstore.PWFACTOR
11 | const KFACTOR = bstore.KFACTOR
12 | const MICROSECOND = 1000
13 | const MILLISECOND = 1000 * MICROSECOND
14 | const SECOND = 1000 * MILLISECOND
15 | const MINUTE = 60 * SECOND
16 | const HOUR = 60 * MINUTE
17 | const DAY = 24 * HOUR
18 | const ROOTPW = 56 //This makes each bucket at the root ~= 2.2 years
19 | //so the root spans 146.23 years
20 | const ROOTSTART = -1152921504606846976 //This makes the 16th bucket start at 1970 (0)
21 | const MinimumTime = -(16 << 56)
22 | const MaximumTime = (48 << 56)
23 |
24 | type QTree struct {
25 | sb *bstore.Superblock
26 | bs *bstore.BlockStore
27 | gen *bstore.Generation
28 | root *QTreeNode
29 | commited bool
30 | }
31 |
32 | type Record struct {
33 | Time int64
34 | Val float64
35 | }
36 |
37 | type QTreeNode struct {
38 | tr *QTree
39 | vector_block *bstore.Vectorblock
40 | core_block *bstore.Coreblock
41 | isLeaf bool
42 | child_cache [bstore.KFACTOR]*QTreeNode
43 | parent *QTreeNode
44 | isNew bool
45 | }
46 |
47 | type RecordSlice []Record
48 |
49 | type ChangedRange struct {
50 | Valid bool
51 | Start int64
52 | End int64
53 | }
54 |
55 | func (s RecordSlice) Len() int {
56 | return len(s)
57 | }
58 |
59 | func (s RecordSlice) Swap(i, j int) {
60 | s[i], s[j] = s[j], s[i]
61 | }
62 |
63 | func (s RecordSlice) Less(i, j int) bool {
64 | return s[i].Time < s[j].Time
65 | }
66 |
67 | func (tr *QTree) Commit() {
68 | if tr.commited {
69 | log.Panicf("Tree alredy comitted")
70 | }
71 | if tr.gen == nil {
72 | log.Panicf("Commit on non-write-tree")
73 | }
74 |
75 | tr.gen.Commit()
76 | tr.commited = true
77 | tr.gen = nil
78 |
79 | }
80 |
81 | func (n *QTree) FindNearestValue(time int64, backwards bool) (Record, error) {
82 | if n.root == nil {
83 | return Record{}, ErrNoSuchPoint
84 | }
85 | return n.root.FindNearestValue(time, backwards)
86 | }
87 |
88 | func (n *QTree) Generation() uint64 {
89 | if n.gen != nil {
90 | //Return the gen it will have after commit
91 | return n.gen.Number()
92 | } else {
93 | //Return it's current gen
94 | return n.sb.Gen()
95 | }
96 | return n.gen.Number()
97 | }
98 |
99 | func (tr *QTree) GetReferencedAddrsDebug() map[uint64]bool {
100 | refset := make(map[uint64]bool, 1024000)
101 |
102 | rchan := tr.GetAllReferencedVAddrs()
103 | //for i, v := range e_tree.
104 | idx := 0
105 | for {
106 | val, ok := <-rchan
107 | if idx%8192 == 0 {
108 | log.Info("Got referenced addr #%d", idx)
109 | }
110 | idx += 1
111 | if !ok {
112 | break
113 | }
114 | refset[val] = true
115 | }
116 | return refset
117 | }
118 |
119 | func (tr *QTree) LoadNode(addr uint64, impl_Generation uint64, impl_Pointwidth uint8, impl_StartTime int64) (*QTreeNode, error) {
120 | db := tr.bs.ReadDatablock(tr.sb.Uuid(), addr, impl_Generation, impl_Pointwidth, impl_StartTime)
121 | n := &QTreeNode{tr: tr}
122 | switch db.GetDatablockType() {
123 | case bstore.Vector:
124 | n.vector_block = db.(*bstore.Vectorblock)
125 | n.isLeaf = true
126 | case bstore.Core:
127 | n.core_block = db.(*bstore.Coreblock)
128 | n.isLeaf = false
129 | default:
130 | log.Panicf("What kind of type is this? %+v", db.GetDatablockType())
131 | }
132 | if n.ThisAddr() == 0 {
133 | log.Panicf("Node has zero address")
134 | }
135 | return n, nil
136 | }
137 |
138 | func (tr *QTree) NewCoreNode(startTime int64, pointWidth uint8) (*QTreeNode, error) {
139 | if tr.gen == nil {
140 | return nil, ErrImmutableTree
141 | }
142 | cb, err := tr.gen.AllocateCoreblock()
143 | if err != nil {
144 | return nil, err
145 | }
146 | cb.PointWidth = pointWidth
147 | startTime = ClampTime(startTime, pointWidth)
148 | cb.StartTime = startTime
149 | rv := &QTreeNode{
150 | core_block: cb,
151 | tr: tr,
152 | isNew: true,
153 | }
154 | return rv, nil
155 | }
156 |
157 | func (tr *QTree) NewVectorNode(startTime int64, pointWidth uint8) (*QTreeNode, error) {
158 | if tr.gen == nil {
159 | return nil, ErrImmutableTree
160 | }
161 | vb, err := tr.gen.AllocateVectorblock()
162 | if err != nil {
163 | return nil, err
164 | }
165 | vb.PointWidth = pointWidth
166 | startTime = ClampTime(startTime, pointWidth)
167 | vb.StartTime = startTime
168 | rv := &QTreeNode{
169 | vector_block: vb,
170 | tr: tr,
171 | isLeaf: true,
172 | isNew: true,
173 | }
174 | return rv, nil
175 | }
176 |
177 | /**
178 | * Load a quasar tree
179 | */
180 | func NewReadQTree(bs *bstore.BlockStore, id uuid.UUID, generation uint64) (*QTree, error) {
181 | sb := bs.LoadSuperblock(id, generation)
182 | if sb == nil {
183 | return nil, ErrNoSuchStream
184 | }
185 | rv := &QTree{sb: sb, bs: bs}
186 | if sb.Root() != 0 {
187 | rt, err := rv.LoadNode(sb.Root(), sb.Gen(), ROOTPW, ROOTSTART)
188 | if err != nil {
189 | log.Panicf("%v", err)
190 | return nil, err
191 | }
192 | //log.Debug("The start time for the root is %v",rt.StartTime())
193 | rv.root = rt
194 | }
195 | return rv, nil
196 | }
197 |
198 | func NewWriteQTree(bs *bstore.BlockStore, id uuid.UUID) (*QTree, error) {
199 | gen := bs.ObtainGeneration(id)
200 | rv := &QTree{
201 | sb: gen.New_SB,
202 | gen: gen,
203 | bs: bs,
204 | }
205 |
206 | //If there is an existing root node, we need to load it so that it
207 | //has the correct values
208 | if rv.sb.Root() != 0 {
209 | rt, err := rv.LoadNode(rv.sb.Root(), rv.sb.Gen(), ROOTPW, ROOTSTART)
210 | if err != nil {
211 | log.Panicf("%v", err)
212 | return nil, err
213 | }
214 | rv.root = rt
215 | } else {
216 | rt, err := rv.NewCoreNode(ROOTSTART, ROOTPW)
217 | if err != nil {
218 | log.Panicf("%v", err)
219 | return nil, err
220 | }
221 | rv.root = rt
222 | }
223 |
224 | return rv, nil
225 | }
226 |
227 | func (n *QTreeNode) Generation() uint64 {
228 | if n.isLeaf {
229 | return n.vector_block.Generation
230 | } else {
231 | return n.core_block.Generation
232 | }
233 | }
234 |
235 | func (n *QTreeNode) TreePath() string {
236 | rv := ""
237 | if n.isLeaf {
238 | rv += "V"
239 | } else {
240 | rv += "C"
241 | }
242 | dn := n
243 | for {
244 | par := dn.Parent()
245 | if par == nil {
246 | return rv
247 | }
248 | //Try locate the index of this node in the parent
249 | addr := dn.ThisAddr()
250 | found := false
251 | for i := 0; i < bstore.KFACTOR; i++ {
252 | if par.core_block.Addr[i] == addr {
253 | rv = fmt.Sprintf("(%v)[%v].", par.PointWidth(), i) + rv
254 | found = true
255 | break
256 | }
257 | }
258 | if !found {
259 | log.Panicf("Could not find self address in parent")
260 | }
261 | dn = par
262 | }
263 | }
264 |
265 | func (n *QTreeNode) ArbitraryStartTime(idx uint64, pw uint8) int64 {
266 | return n.StartTime() + int64(idx*(1<> n.PointWidth())
295 | if rv >= bstore.KFACTOR {
296 | rv = bstore.KFACTOR - 1
297 | }
298 | return uint16(rv)
299 | }
300 |
301 | //Unlike core nodes, vectors have infinitely many buckets. This
302 | //function allows you to get a bucket idx for a time and an
303 | //arbitrary point width
304 | func (n *QTreeNode) ClampVBucket(t int64, pw uint8) uint64 {
305 | if !n.isLeaf {
306 | log.Panicf("This is intended for vectors")
307 | }
308 | if t < n.StartTime() {
309 | t = n.StartTime()
310 | }
311 | t -= n.StartTime()
312 | if pw > n.Parent().PointWidth() {
313 | log.Panicf("I can't do this dave")
314 | }
315 | idx := uint64(t) >> pw
316 | maxidx := uint64(n.Parent().WidthTime()) >> pw
317 | if idx >= maxidx {
318 | idx = maxidx - 1
319 | }
320 | return idx
321 | }
322 |
323 | func (n *QTreeNode) clone() (*QTreeNode, error) {
324 | var rv *QTreeNode
325 | var err error
326 | if !n.isLeaf {
327 | rv, err = n.tr.NewCoreNode(n.StartTime(), n.PointWidth())
328 | if err != nil {
329 | return nil, err
330 | }
331 | n.core_block.CopyInto(rv.core_block)
332 | } else {
333 | rv, err = n.tr.NewVectorNode(n.StartTime(), n.PointWidth())
334 | if err != nil {
335 | return nil, err
336 | }
337 | n.vector_block.CopyInto(rv.vector_block)
338 | }
339 | return rv, nil
340 | }
341 |
342 | func (n *QTreeNode) EndTime() int64 {
343 | if n.isLeaf {
344 | //We do this because out point width might not be *KFACTOR as we might be
345 | //at the lowest level
346 | return n.StartTime() + (1 << n.Parent().PointWidth())
347 | } else {
348 | //A core node has multiple buckets
349 | return n.StartTime() + (1< 100 {
214 | total := 0
215 | for _, v:= range gen.vblocks {
216 | total += int(v.Len)
217 | }
218 | log.Critical("Triggered vblock examination: %v blocks, %v points, %v avg", len(gen.vblocks), total, total/len(gen.vblocks))
219 | }*/
220 | gen.vblocks = nil
221 | gen.cblocks = nil
222 |
223 | fsb := fake_sblock{
224 | Uuid: gen.New_SB.uuid.String(),
225 | Gen: gen.New_SB.gen,
226 | Root: gen.New_SB.root,
227 | }
228 | if err := gen.blockstore.db.C("superblocks").Insert(fsb); err != nil {
229 | lg.Panic(err)
230 | }
231 | gen.flushed = true
232 | gen.blockstore.glock.RLock()
233 | //log.Printf("bs is %v, wlocks is %v", gen.blockstore, gen.blockstore._wlocks)
234 | gen.blockstore._wlocks[UUIDToMapKey(*gen.Uuid())].Unlock()
235 | gen.blockstore.glock.RUnlock()
236 | return address_map, nil
237 | }
238 |
239 | func (bs *BlockStore) datablockBarrier(fi int) {
240 | //Gonuts group says that I don't need to call Sync()
241 |
242 | //Block until all datablocks have finished writing
243 | /*bs.blockmtx[fi].Lock()
244 | err := bs.dbf[fi].Sync()
245 | if err != nil {
246 | log.Panic(err)
247 | }
248 | bs.blockmtx[fi].Unlock()*/
249 | //bs.ses.Fsync(false)
250 | }
251 |
252 | func (bs *BlockStore) allocateBlock() uint64 {
253 | relocation_address := <-bs.alloc
254 | return relocation_address
255 | }
256 |
257 | /**
258 | * The real function is supposed to allocate an address for the data
259 | * block, reserving it on disk, and then give back the data block that
260 | * can be filled in
261 | * This stub makes up an address, and mongo pretends its real
262 | */
263 | func (gen *Generation) AllocateCoreblock() (*Coreblock, error) {
264 | cblock := &Coreblock{}
265 | cblock.Identifier = gen.blockstore.allocateBlock()
266 | cblock.Generation = gen.Number()
267 | gen.cblocks = append(gen.cblocks, cblock)
268 | return cblock, nil
269 | }
270 |
271 | func (gen *Generation) AllocateVectorblock() (*Vectorblock, error) {
272 | vblock := &Vectorblock{}
273 | vblock.Identifier = gen.blockstore.allocateBlock()
274 | vblock.Generation = gen.Number()
275 | gen.vblocks = append(gen.vblocks, vblock)
276 | return vblock, nil
277 | }
278 |
279 | func (bs *BlockStore) FreeCoreblock(cb **Coreblock) {
280 | *cb = nil
281 | }
282 |
283 | func (bs *BlockStore) FreeVectorblock(vb **Vectorblock) {
284 | *vb = nil
285 | }
286 |
287 | func (bs *BlockStore) DEBUG_DELETE_UUID(id uuid.UUID) {
288 | lg.Info("DEBUG removing uuid '%v' from database", id.String())
289 | _, err := bs.db.C("superblocks").RemoveAll(bson.M{"uuid": id.String()})
290 | if err != nil && err != mgo.ErrNotFound {
291 | lg.Panic(err)
292 | }
293 | if err == mgo.ErrNotFound {
294 | lg.Info("Quey did not find supeblock to delete")
295 | } else {
296 | lg.Info("err was nik")
297 | }
298 | //bs.datablockBarrier()
299 | }
300 |
301 | func (bs *BlockStore) ReadDatablock(uuid uuid.UUID, addr uint64, impl_Generation uint64, impl_Pointwidth uint8, impl_StartTime int64) Datablock {
302 | //Try hit the cache first
303 | db := bs.cacheGet(addr)
304 | if db != nil {
305 | return db
306 | }
307 | syncbuf := block_buf_pool.Get().([]byte)
308 | trimbuf := bs.store.Read([]byte(uuid), addr, syncbuf)
309 | switch DatablockGetBufferType(trimbuf) {
310 | case Core:
311 | rv := &Coreblock{}
312 | rv.Deserialize(trimbuf)
313 | block_buf_pool.Put(syncbuf)
314 | rv.Identifier = addr
315 | rv.Generation = impl_Generation
316 | rv.PointWidth = impl_Pointwidth
317 | rv.StartTime = impl_StartTime
318 | bs.cachePut(addr, rv)
319 | return rv
320 | case Vector:
321 | rv := &Vectorblock{}
322 | rv.Deserialize(trimbuf)
323 | block_buf_pool.Put(syncbuf)
324 | rv.Identifier = addr
325 | rv.Generation = impl_Generation
326 | rv.PointWidth = impl_Pointwidth
327 | rv.StartTime = impl_StartTime
328 | bs.cachePut(addr, rv)
329 | return rv
330 | }
331 | lg.Panic("Strange datablock type")
332 | return nil
333 | }
334 |
335 | type fake_sblock struct {
336 | Uuid string
337 | Gen uint64
338 | Root uint64
339 | Unlinked bool
340 | }
341 |
342 | func (bs *BlockStore) LoadSuperblock(id uuid.UUID, generation uint64) *Superblock {
343 | var sb = fake_sblock{}
344 | if generation == LatestGeneration {
345 | //log.Info("loading superblock uuid=%v (lgen)", id.String())
346 | qry := bs.db.C("superblocks").Find(bson.M{"uuid": id.String()})
347 | if err := qry.Sort("-gen").One(&sb); err != nil {
348 | if err == mgo.ErrNotFound {
349 | lg.Info("sb notfound!")
350 | return nil
351 | } else {
352 | lg.Panic(err)
353 | }
354 | }
355 | } else {
356 | qry := bs.db.C("superblocks").Find(bson.M{"uuid": id.String(), "gen": generation})
357 | if err := qry.One(&sb); err != nil {
358 | if err == mgo.ErrNotFound {
359 | return nil
360 | } else {
361 | lg.Panic(err)
362 | }
363 | }
364 | }
365 | rv := Superblock{
366 | uuid: id,
367 | gen: sb.Gen,
368 | root: sb.Root,
369 | unlinked: sb.Unlinked,
370 | }
371 | return &rv
372 | }
373 |
374 | func CreateDatabase(params map[string]string) {
375 | ses, err := mgo.Dial(params["mongoserver"])
376 | if err != nil {
377 | lg.Critical("Could not connect to mongo database", err)
378 | os.Exit(1)
379 | }
380 | db := ses.DB(params["collection"])
381 | idx := mgo.Index{
382 | Key: []string{"uuid", "-gen"},
383 | Unique: true,
384 | DropDups: true,
385 | Background: true,
386 | Sparse: false,
387 | }
388 | db.C("superblocks").EnsureIndex(idx)
389 | switch params["provider"] {
390 | case "file":
391 | if err := os.MkdirAll(params["dbpath"], 0755); err != nil {
392 | lg.Panic(err)
393 | }
394 | fp := new(fileprovider.FileStorageProvider)
395 | err := fp.CreateDatabase(params)
396 | if err != nil {
397 | lg.Critical("Error on create: %v", err)
398 | os.Exit(1)
399 | }
400 | case "ceph":
401 | cp := new(cephprovider.CephStorageProvider)
402 | err := cp.CreateDatabase(params)
403 | if err != nil {
404 | lg.Critical("Error on create: %v", err)
405 | os.Exit(1)
406 | }
407 | }
408 | }
409 |
--------------------------------------------------------------------------------
/quasar_test.go:
--------------------------------------------------------------------------------
1 | package btrdb
2 |
3 | import (
4 | "fmt"
5 | _ "log"
6 | "math/rand"
7 | "testing"
8 | "time"
9 |
10 | "github.com/pborman/uuid"
11 | "github.com/SoftwareDefinedBuildings/btrdb/qtree"
12 | )
13 |
14 | const MICROSECOND = 1000
15 | const MILLISECOND = 1000 * MICROSECOND
16 | const SECOND = 1000 * MILLISECOND
17 | const MINUTE = 60 * SECOND
18 | const HOUR = 60 * MINUTE
19 | const DAY = 24 * HOUR
20 |
21 | /*
22 | func TestMultInsert(t *testing.T) {
23 | testuuid := uuid.NewRandom()
24 | cfg := &DefaultQuasarConfig
25 | cfg.BlockPath = "/srv/quasartestdb"
26 | q, err := NewQuasar(cfg)
27 | if err != nil {
28 | log.Panic(err)
29 | }
30 | vals := []qtree.Record{{10, 10}, {20, 20}}
31 | q.InsertValues(testuuid, vals)
32 | q.InsertValues(testuuid, vals)
33 | }
34 | */
35 | func init() {
36 | sd := time.Now().Unix()
37 | fmt.Printf(">>>> USING %v AS SEED <<<<<", sd)
38 | rand.Seed(sd)
39 | }
40 |
41 | /*
42 | var _bs *bstore.BlockStore = nil
43 |
44 | func mBS() {
45 | if _bs == nil {
46 | nbs, err := bstore.NewBlockStore("localhost", 0, "/srv/quasartestdb/")
47 | if err != nil {
48 | log.Panic(err)
49 | }
50 | _bs = nbs
51 | }
52 | }
53 | func GenBrk(avg uint64, spread uint64) chan uint64 {
54 | rv := make(chan uint64)
55 | go func() {
56 | for {
57 | num := int64(avg)
58 | num -= int64(spread / 2)
59 | num += rand.Int63n(int64(spread))
60 | rv <- uint64(num)
61 | }
62 | }()
63 | return rv
64 | }
65 | func GenData(s int64, e int64, avgTimeBetweenSamples uint64,
66 | spread uint64, dat func(int64) float64) []qtree.Record {
67 | if avgTimeBetweenSamples == 0 {
68 | panic("lolwut")
69 | }
70 | if e <= s {
71 | panic("s<=e")
72 | }
73 | log.Printf("e %v s %v avt %v", s, e, avgTimeBetweenSamples)
74 | p3 := uint64((e-s))/avgTimeBetweenSamples + 100
75 | log.Printf("p3: ", p3)
76 | rv := make([]qtree.Record, 0, p3)
77 | r := qtree.Record{}
78 | for t := s; t < e; {
79 | r.Time = t
80 | r.Val = dat(t)
81 | rv = append(rv, r)
82 | nt := t + int64(avgTimeBetweenSamples)
83 | if spread != 0 {
84 | nt -= int64(spread / 2)
85 | nt += rand.Int63n(int64(spread))
86 | }
87 | if nt > t {
88 | t = nt
89 | }
90 | }
91 | return rv
92 | }
93 | func MakeWTree() (*qtree.QTree, uuid.UUID) {
94 | id := uuid.NewRandom()
95 | mBS()
96 | tr, err := qtree.NewWriteQTree(_bs, id)
97 | if err != nil {
98 | log.Panic(err)
99 | }
100 | return tr, id
101 | }
102 | */
103 | func CompareData(lhs []qtree.Record, rhs []qtree.Record) {
104 | if len(lhs) != len(rhs) {
105 | log.Panicf("lhs != rhs len %d vs %d\n", len(lhs), len(rhs))
106 | }
107 | for i, v := range lhs {
108 | if rhs[i] != v {
109 | log.Panic("data differs")
110 | }
111 | }
112 | }
113 |
114 | /*
115 | func LoadWTree(id uuid.UUID) *qtree.QTree {
116 | mBS()
117 | tr, err := qtree.NewWriteQTree(_bs, id)
118 | if err != nil {
119 | log.Panic(err)
120 | }
121 | return tr
122 | }
123 |
124 | //This flushes, for now
125 | func TestInsertFlush(t *testing.T) {
126 | gs := int64(23) * 365 * DAY
127 | ge := int64(25) * 365 * DAY
128 | freq := uint64(100 * MINUTE)
129 | varn := uint64(10 * MINUTE)
130 | tdat := GenData(gs, ge, freq, varn,
131 | func(_ int64) float64 { return rand.Float64() })
132 | log.Printf("generated %v records", len(tdat))
133 |
134 | cfg := &DefaultQuasarConfig
135 | cfg.BlockPath = "/srv/quasartestdb"
136 | q, err := NewQuasar(cfg)
137 | if err != nil {
138 | log.Panic(err)
139 | }
140 |
141 | id := uuid.NewRandom()
142 | log.Printf("Generating uuid=%s", id)
143 | brk := GenBrk(100, 50)
144 | idx := 0
145 | for idx < len(tdat) {
146 | time.Sleep(100 * time.Millisecond)
147 | ln := int(<-brk)
148 | end := idx + ln
149 | if end > len(tdat) {
150 | end = len(tdat)
151 | }
152 | q.InsertValues(id, tdat[idx:end])
153 | q.Flush(id)
154 | idx += ln
155 | }
156 |
157 | q.Flush(id)
158 |
159 | dat, gen, err := q.QueryValues(id, gs, ge, LatestGeneration)
160 | if err != nil {
161 | log.Panic(err)
162 | }
163 | log.Printf("Test gen was: %v", gen)
164 | CompareData(dat, tdat)
165 |
166 | }
167 | */
168 | func TestArbWindow(t *testing.T) {
169 | Params := map[string]string{
170 | "mongoserver": "localhost",
171 | "provider": "file",
172 | "cachesize": "16000",
173 | "collection": "testdb",
174 | "dbpath": "/srv/testqdb/",
175 | }
176 | cfg := QuasarConfig{
177 | DatablockCacheSize: uint64(0),
178 | TransactionCoalesceEnable: true,
179 | TransactionCoalesceInterval: uint64(5000),
180 | TransactionCoalesceEarlyTrip: uint64(16000),
181 | Params: Params,
182 | }
183 | q, err := NewQuasar(&cfg)
184 | if err != nil {
185 | log.Panicf("error: ", err)
186 | }
187 | startt := 0
188 | deltat := 1000000000
189 | tnum := 50000
190 | tdat := make([]qtree.Record, tnum)
191 | id := uuid.NewRandom()
192 | for i := 0; i < tnum; i++ {
193 | tdat[i].Time = int64(startt) + int64(deltat*i)
194 | tdat[i].Val = float64(i)
195 | }
196 | q.InsertValues(id, tdat)
197 | for i := 0; i < tnum; i++ {
198 | tdat[i].Time = int64(startt) + int64(deltat*i) + int64(tnum*2*deltat)
199 | tdat[i].Val = float64(i)
200 | }
201 | q.InsertValues(id, tdat)
202 | q.Flush(id)
203 | time.Sleep(2 * time.Second)
204 | log.Info("Stream: %+v\n", id)
205 | var rstart int64 = int64(startt) - int64(4000*deltat)
206 | var rend int64 = int64(startt + deltat*250000 + 5000000000)
207 | rvalc, _ := q.QueryWindow(id, rstart, rend, LatestGeneration, uint64(deltat)*700, 0)
208 | for {
209 | v, ok := <-rvalc
210 | log.Info("reading: %+v", v)
211 | if !ok {
212 | panic("eof")
213 | }
214 | /*exp := float64(v.Time+v.Time+int64(deltat)) / float64(deltat) / 2.0
215 | if math.Abs(v.Mean-exp) > 0.00001 {
216 | log.Panicf("got bad %+v\n expected mean: ", v, exp)
217 | }*/
218 | }
219 | }
220 |
221 | /*
222 | func TestUnlinkBlocks(t *testing.T) {
223 |
224 | gs := int64(24) * 365 * DAY
225 | ge := int64(25) * 365 * DAY
226 | freq := uint64(300 * MINUTE)
227 | varn := uint64(10 * MINUTE)
228 | tdat := GenData(gs, ge, freq, varn,
229 | func(_ int64) float64 { return rand.Float64() })
230 | log.Printf("generated %v records", len(tdat))
231 |
232 | cfg := &DefaultQuasarConfig
233 | cfg.BlockPath = "/srv/quasartestdb"
234 | q, err := NewQuasar(cfg)
235 | if err != nil {
236 | log.Panic(err)
237 | }
238 |
239 | {
240 | alloced, free, strange, leaked := q.bs.InspectBlocks()
241 | log.Printf("BEFORE SUMMARY:")
242 | log.Printf("ALLOCED: %d", alloced)
243 | log.Printf("FREE : %d", free)
244 | log.Printf("STRANGE: %d", strange)
245 | log.Printf("LEAKED : %d", leaked)
246 | log.Printf("USAGE : %.2f %%\n", float64(alloced)/float64(alloced+free)*100)
247 | }
248 | id := uuid.NewRandom()
249 | log.Printf("Generating uuid=%s", id)
250 | brk := GenBrk(100, 50)
251 | idx := 0
252 | for idx < len(tdat) {
253 | time.Sleep(1 * time.Second)
254 | ln := int(<-brk)
255 | end := idx + ln
256 | if end > len(tdat) {
257 | end = len(tdat)
258 | }
259 | q.InsertValues(id, tdat[idx:end])
260 | idx += ln
261 | }
262 | //Allow for coalescence
263 | time.Sleep(10 * time.Second)
264 |
265 | {
266 | alloced, free, strange, leaked := q.bs.InspectBlocks()
267 | log.Printf("AFTER SUMMARY:")
268 | log.Printf("ALLOCED: %d", alloced)
269 | log.Printf("FREE : %d", free)
270 | log.Printf("STRANGE: %d", strange)
271 | log.Printf("LEAKED : %d", leaked)
272 | log.Printf("USAGE : %.2f %%\n", float64(alloced)/float64(alloced+free)*100)
273 | }
274 | {
275 | dat, gen, err := q.QueryValues(id, gs, ge, LatestGeneration)
276 | if err != nil {
277 | log.Panic(err)
278 | }
279 | log.Printf("Test gen was: %v", gen)
280 | CompareData(dat, tdat)
281 | err = q.UnlinkBlocks([]uuid.UUID{id}, []uint64{0}, []uint64{gen - 1})
282 | if err != nil {
283 | log.Panic(err)
284 | }
285 | }
286 |
287 | {
288 | dat, gen, err := q.QueryValues(id, gs, ge, LatestGeneration)
289 | if err != nil {
290 | log.Panic(err)
291 | }
292 | log.Printf("Test gen was: %v", gen)
293 | CompareData(dat, tdat)
294 | }
295 |
296 | {
297 | alloced, free, strange, leaked := q.bs.InspectBlocks()
298 | log.Printf("AFTER2 SUMMARY:")
299 | log.Printf("ALLOCED: %d", alloced)
300 | log.Printf("FREE : %d", free)
301 | log.Printf("STRANGE: %d", strange)
302 | log.Printf("LEAKED : %d", leaked)
303 | log.Printf("USAGE : %.2f %%\n", float64(alloced)/float64(alloced+free)*100)
304 | }
305 | }
306 | func TestCompleteDelete(t *testing.T) {
307 | gs := int64(24) * 365 * DAY
308 | ge := int64(25) * 365 * DAY
309 | freq := uint64(300 * MINUTE)
310 | varn := uint64(10 * MINUTE)
311 | tdat := GenData(gs, ge, freq, varn,
312 | func(_ int64) float64 { return rand.Float64() })
313 | log.Printf("generated %v records", len(tdat))
314 | id := uuid.NewRandom()
315 | cfg := &DefaultQuasarConfig
316 | cfg.BlockPath = "/srv/quasartestdb"
317 | q, err := NewQuasar(cfg)
318 | if err != nil {
319 | log.Panic(err)
320 | }
321 | {
322 | q.InsertValues(id, tdat)
323 | q.Flush(id)
324 | }
325 | {
326 | dat, _, err := q.QueryValues(id, gs, ge, LatestGeneration)
327 | if err != nil {
328 | log.Panic(err)
329 | }
330 | CompareData(dat, tdat)
331 | }
332 | {
333 | q.DeleteRange(id, gs, ge+1)
334 | dat, _, err := q.QueryValues(id, gs, ge, LatestGeneration)
335 | if err != nil {
336 | log.Panic(err)
337 | }
338 | if len(dat) != 0 {
339 | t.Log("dat length wrong")
340 | t.Fail()
341 | }
342 | }
343 | {
344 | q.InsertValues(id, tdat)
345 | q.Flush(id)
346 | }
347 | {
348 | dat, _, err := q.QueryValues(id, gs, ge, LatestGeneration)
349 | if err != nil {
350 | log.Panic(err)
351 | }
352 | CompareData(dat, tdat)
353 | }
354 |
355 | }
356 | func TestUnlinkBlocks2(t *testing.T) {
357 |
358 | gs := int64(24) * 365 * DAY
359 | ge := int64(25) * 365 * DAY
360 | freq := uint64(300 * MINUTE)
361 | varn := uint64(10 * MINUTE)
362 | tdat := GenData(gs, ge, freq, varn,
363 | func(_ int64) float64 { return rand.Float64() })
364 | log.Printf("generated %v records", len(tdat))
365 |
366 | cfg := &DefaultQuasarConfig
367 | cfg.BlockPath = "/srv/quasartestdb"
368 | q, err := NewQuasar(cfg)
369 | if err != nil {
370 | log.Panic(err)
371 | }
372 |
373 | {
374 | alloced, free, strange, leaked := q.bs.InspectBlocks()
375 | log.Printf("BEFORE SUMMARY:")
376 | log.Printf("ALLOCED: %d", alloced)
377 | log.Printf("FREE : %d", free)
378 | log.Printf("STRANGE: %d", strange)
379 | log.Printf("LEAKED : %d", leaked)
380 | log.Printf("USAGE : %.2f %%\n", float64(alloced)/float64(alloced+free)*100)
381 | }
382 | id := uuid.NewRandom()
383 | log.Printf("Generating uuid=%s", id)
384 | brk := GenBrk(100, 50)
385 | idx := 0
386 | for idx < len(tdat) {
387 | time.Sleep(1 * time.Second)
388 | ln := int(<-brk)
389 | end := idx + ln
390 | if end > len(tdat) {
391 | end = len(tdat)
392 | }
393 | q.InsertValues(id, tdat[idx:end])
394 | idx += ln
395 | }
396 | //Allow for coalescence
397 | time.Sleep(10 * time.Second)
398 | {
399 | alloced, free, strange, leaked := q.bs.InspectBlocks()
400 | log.Printf("BEFORE DELETE:")
401 | log.Printf("ALLOCED: %d", alloced)
402 | log.Printf("FREE : %d", free)
403 | log.Printf("STRANGE: %d", strange)
404 | log.Printf("LEAKED : %d", leaked)
405 | log.Printf("USAGE : %.2f %%\n", float64(alloced)/float64(alloced+free)*100)
406 | }
407 | {
408 | err := q.DeleteRange(id, tdat[1].Time, ge)
409 | if err != nil {
410 | t.Error(err)
411 | }
412 | }
413 | {
414 | q.InsertValues(id, []qtree.Record{{0, 100}})
415 | q.Flush(id)
416 | }
417 | {
418 | alloced, free, strange, leaked := q.bs.InspectBlocks()
419 | log.Printf("AFTER DELETE:")
420 | log.Printf("ALLOCED: %d", alloced)
421 | log.Printf("FREE : %d", free)
422 | log.Printf("STRANGE: %d", strange)
423 | log.Printf("LEAKED : %d", leaked)
424 | log.Printf("USAGE : %.2f %%\n", float64(alloced)/float64(alloced+free)*100)
425 | }
426 | {
427 | _, gen, err := q.QueryValues(id, gs, ge, LatestGeneration)
428 | if err != nil {
429 | log.Panic(err)
430 | }
431 | err = q.UnlinkBlocks([]uuid.UUID{id}, []uint64{0}, []uint64{gen})
432 | if err != nil {
433 | log.Panic(err)
434 | }
435 | }
436 |
437 | {
438 | alloced, free, strange, leaked := q.bs.InspectBlocks()
439 | log.Printf("AFTER FREE:")
440 | log.Printf("ALLOCED: %d", alloced)
441 | log.Printf("FREE : %d", free)
442 | log.Printf("STRANGE: %d", strange)
443 | log.Printf("LEAKED : %d", leaked)
444 | log.Printf("USAGE : %.2f %%\n", float64(alloced)/float64(alloced+free)*100)
445 | }
446 | }
447 | */
448 |
--------------------------------------------------------------------------------
/qtree/qtree2_test.go:
--------------------------------------------------------------------------------
1 | package qtree
2 |
3 | import (
4 | "fmt"
5 | "log"
6 | "math/rand"
7 | "testing"
8 | "time"
9 |
10 | "github.com/SoftwareDefinedBuildings/btrdb/internal/bstore"
11 | )
12 |
13 | func init() {
14 | sd := time.Now().Unix()
15 | fmt.Printf(">>>> USING %v AS SEED <<<<<", sd)
16 | //rand.Seed(1417417715)
17 | rand.Seed(sd)
18 | }
19 | func GenBrk(avg uint64, spread uint64) chan uint64 {
20 | rv := make(chan uint64)
21 | go func() {
22 | for {
23 | num := int64(avg)
24 | num -= int64(spread / 2)
25 | num += rand.Int63n(int64(spread))
26 | rv <- uint64(num)
27 | }
28 | }()
29 | return rv
30 | }
31 |
32 | //TODO PW test at range with no data
33 | func TestQT2_PW2(t *testing.T) {
34 | log.Printf("Inserting data 0-4096")
35 | te := int64(4096)
36 | tdat := GenData(0, 4096, 1, 0, func(_ int64) float64 { return rand.Float64() })
37 | if int64(len(tdat)) != te {
38 | log.Panic("GenDat messed up a bit")
39 | }
40 | tr, uuid := MakeWTree()
41 | tr.InsertValues(tdat)
42 | tr.Commit()
43 | var err error
44 | tr, err = NewReadQTree(_bs, uuid, bstore.LatestGeneration)
45 | if err != nil {
46 | t.Error(err)
47 | }
48 |
49 | moddat := make([]StatRecord, len(tdat))
50 | for i, v := range tdat {
51 | moddat[i] = StatRecord{
52 | Time: v.Time,
53 | Count: 1,
54 | Min: v.Val,
55 | Mean: v.Val,
56 | Max: v.Val,
57 | }
58 | }
59 | expected_qty := 4096
60 | for pwi := uint8(0); pwi < 63; pwi++ {
61 | qrydat, err := tr.QueryStatisticalValuesBlock(-(16 << 56), 48<<56, pwi)
62 | if err != nil {
63 | log.Panic(err)
64 | }
65 | //log.Printf("for pwi %v, we got len %v",pwi, len(qrydat))
66 | if len(qrydat) != expected_qty {
67 | log.Printf("qdat: %v", qrydat)
68 | log.Printf("expected %v, got %v", expected_qty, len(qrydat))
69 | t.FailNow()
70 | }
71 | if expected_qty != 1 {
72 | expected_qty >>= 1
73 | }
74 | }
75 | }
76 | func TestQT2_PW(t *testing.T) {
77 | log.Printf("Inserting data 0-4096")
78 | te := int64(4096)
79 | tdat := GenData(0, 4096, 1, 0, func(_ int64) float64 { return rand.Float64() })
80 | if int64(len(tdat)) != te {
81 | log.Panic("GenDat messed up a bit")
82 | }
83 | tr, uuid := MakeWTree()
84 | err := tr.InsertValues(tdat)
85 | if err != nil {
86 | t.Error(err)
87 | }
88 | tr.Commit()
89 | tr, err = NewReadQTree(_bs, uuid, bstore.LatestGeneration)
90 | if err != nil {
91 | t.Error(err)
92 | }
93 |
94 | moddat := make([]StatRecord, len(tdat))
95 | for i, v := range tdat {
96 | moddat[i] = StatRecord{
97 | Time: v.Time,
98 | Count: 1,
99 | Min: v.Val,
100 | Mean: v.Val,
101 | Max: v.Val,
102 | }
103 | }
104 | for pwi := uint8(0); pwi < 12; pwi++ {
105 | qrydat, err := tr.QueryStatisticalValuesBlock(0, te, pwi)
106 | if err != nil {
107 | log.Panic(err)
108 | }
109 | if int64(len(qrydat)) != te>>pwi {
110 | t.Log("len of qrydat mismatch %v vs %v", len(qrydat), te>>pwi)
111 | log.Printf("qry dat %+v", qrydat)
112 | t.FailNow()
113 | } else {
114 | t.Log("LEN MATCH %v", len(qrydat))
115 | }
116 | min := func(a float64, b float64) float64 {
117 | if a < b {
118 | return a
119 | }
120 | return b
121 | }
122 | max := func(a float64, b float64) float64 {
123 | if a > b {
124 | return a
125 | }
126 | return b
127 | }
128 | moddat2 := make([]StatRecord, len(moddat)/2)
129 | for i := 0; i < len(moddat)/2; i++ {
130 | nmean := moddat[2*i].Mean*float64(moddat[2*i].Count) +
131 | moddat[2*i+1].Mean*float64(moddat[2*i+1].Count)
132 | nmean /= float64(moddat[2*i].Count + moddat[2*i+1].Count)
133 |
134 | moddat2[i] = StatRecord{
135 | Time: moddat[2*i].Time,
136 | Count: moddat[2*i].Count + moddat[2*i+1].Count,
137 | Min: min(moddat[2*i].Min, moddat[2*i+1].Min),
138 | Mean: nmean,
139 | Max: max(moddat[2*i].Max, moddat[2*i+1].Max),
140 | }
141 | }
142 | }
143 | }
144 | func TestQT2_A(t *testing.T) {
145 | gs := int64(20+rand.Intn(10)) * 365 * DAY
146 | ge := int64(30+rand.Intn(10)) * 365 * DAY
147 | freq := uint64(rand.Intn(10)+1) * HOUR
148 | varn := uint64(30 * MINUTE)
149 | tdat := GenData(gs, ge, freq, varn,
150 | func(_ int64) float64 { return rand.Float64() })
151 | log.Printf("generated %v records", len(tdat))
152 | tr, uuid := MakeWTree()
153 | log.Printf("geneated tree %v", tr.gen.Uuid().String())
154 | tr.Commit()
155 |
156 | idx := uint64(0)
157 | brks := GenBrk(100, 50)
158 | loops := GenBrk(4, 4)
159 | for idx < uint64(len(tdat)) {
160 | tr := LoadWTree(uuid)
161 | loop := <-loops
162 | for i := uint64(0); i < loop; i++ {
163 | brk := <-brks
164 | if idx+brk >= uint64(len(tdat)) {
165 | brk = uint64(len(tdat)) - idx
166 | }
167 | if brk == 0 {
168 | continue
169 | }
170 | tr.InsertValues(tdat[idx : idx+brk])
171 | idx += brk
172 | }
173 | tr.Commit()
174 | }
175 |
176 | rtr, err := NewReadQTree(_bs, uuid, bstore.LatestGeneration)
177 | if err != nil {
178 | log.Panic(err)
179 | }
180 | rval, err := rtr.ReadStandardValuesBlock(gs, ge+int64(2*varn))
181 | if err != nil {
182 | log.Panic(err)
183 | }
184 | log.Printf("wrote %v, read %v", len(tdat), len(rval))
185 | CompareData(tdat, rval)
186 | }
187 |
188 | func TestQT2_Superdense(t *testing.T) {
189 | tdat := make([]Record, 10000)
190 | for i := 0; i < 10000; i++ {
191 | tdat[i] = Record{Time: 5, Val: i}
192 | }
193 | tr, uuid := MakeWTree()
194 | log.Printf("geneated tree %v", tr.gen.Uuid().String())
195 | tr.Commit()
196 |
197 | idx := uint64(0)
198 | brks := GenBrk(100, 50)
199 | loops := GenBrk(4, 4)
200 | for idx < uint64(len(tdat)) {
201 | tr := LoadWTree(uuid)
202 | loop := <-loops
203 | for i := uint64(0); i < loop; i++ {
204 | brk := <-brks
205 | if idx+brk >= uint64(len(tdat)) {
206 | brk = uint64(len(tdat)) - idx
207 | }
208 | if brk == 0 {
209 | continue
210 | }
211 | tr.InsertValues(tdat[idx : idx+brk])
212 | idx += brk
213 | }
214 | tr.Commit()
215 | }
216 |
217 | rtr, err := NewReadQTree(_bs, uuid, bstore.LatestGeneration)
218 | if err != nil {
219 | log.Panic(err)
220 | }
221 | rval, err := rtr.ReadStandardValuesBlock(gs, ge+int64(2*varn))
222 | if err != nil {
223 | log.Panic(err)
224 | }
225 | log.Printf("wrote %v, read %v", len(tdat), len(rval))
226 | CompareData(tdat, rval)
227 | }
228 |
229 | func TestQT2_Nearest(t *testing.T) {
230 | vals := []Record{
231 | {int64(1 << 56), 1},
232 | {int64(2 << 56), 2},
233 | {int64(3 << 56), 3},
234 | }
235 | tr, uuid := MakeWTree()
236 | err := tr.InsertValues(vals)
237 | if err != nil {
238 | t.Error(err)
239 | }
240 | tr.Commit()
241 | rtr, err := NewReadQTree(_bs, uuid, bstore.LatestGeneration)
242 | if err != nil {
243 | log.Panic(err)
244 | }
245 | tparams := []struct {
246 | time int64
247 | backwards bool
248 | expectOk bool
249 | val float64
250 | }{
251 | {(2 << 56) + 1, true, true, 2},
252 | {(2 << 56), true, true, 1},
253 | {(2 << 56), false, true, 2},
254 | {(2 << 56) + 1, false, true, 3},
255 | {0, false, true, 1},
256 | {4 << 56, true, true, 3},
257 | {0, true, false, -1},
258 | {4 << 56, false, false, -1},
259 | }
260 | for i, v := range tparams {
261 | rv, err := rtr.FindNearestValue(v.time, v.backwards)
262 | if v.expectOk {
263 | if err != nil || rv.Val != v.val {
264 | t.Fatal("subtest [%v] = %+v", i, v)
265 | }
266 | } else {
267 | if err != ErrNoSuchPoint {
268 | t.Fatal("subtest [%v] = %+v", i, v)
269 | }
270 | }
271 | }
272 | }
273 |
274 | func TestQT2_DEL(t *testing.T) {
275 | gs := int64(20+rand.Intn(10)) * 365 * DAY
276 | ge := int64(30+rand.Intn(10)) * 365 * DAY
277 | freq := uint64(rand.Intn(10)+1) * HOUR
278 | varn := uint64(30 * MINUTE)
279 | tdat := GenData(gs, ge, freq, varn,
280 | func(_ int64) float64 { return rand.Float64() })
281 | log.Printf("generated %v records", len(tdat))
282 | tr, uuid := MakeWTree()
283 | log.Printf("geneated tree %v", tr.gen.Uuid().String())
284 | tr.Commit()
285 |
286 | idx := uint64(0)
287 | brks := GenBrk(100, 50)
288 | loops := GenBrk(4, 4)
289 | for idx < uint64(len(tdat)) {
290 | tr := LoadWTree(uuid)
291 | loop := <-loops
292 | for i := uint64(0); i < loop; i++ {
293 | brk := <-brks
294 | if idx+brk >= uint64(len(tdat)) {
295 | brk = uint64(len(tdat)) - idx
296 | }
297 | if brk == 0 {
298 | continue
299 | }
300 | tr.InsertValues(tdat[idx : idx+brk])
301 | idx += brk
302 | }
303 | tr.Commit()
304 | }
305 |
306 | rtr, err := NewReadQTree(_bs, uuid, bstore.LatestGeneration)
307 | if err != nil {
308 | log.Panic(err)
309 | }
310 | rval, err := rtr.ReadStandardValuesBlock(gs, ge+int64(2*varn))
311 | if err != nil {
312 | log.Panic(err)
313 | }
314 | log.Printf("wrote %v, read %v", len(tdat), len(rval))
315 | CompareData(tdat, rval)
316 |
317 | dtr, err := NewWriteQTree(_bs, uuid)
318 | dtr.DeleteRange(tdat[1].Time, tdat[len(tdat)-2].Time+1)
319 | dtr.Commit()
320 | {
321 | rtr, err := NewReadQTree(_bs, uuid, bstore.LatestGeneration)
322 | if err != nil {
323 | log.Panic(err)
324 | }
325 | rval, err := rtr.ReadStandardValuesBlock(gs, ge+int64(2*varn))
326 | if err != nil {
327 | log.Panic(err)
328 | }
329 |
330 | if len(rval) != 2 {
331 | t.Log("Mismatch in expected length")
332 | t.Fail()
333 | }
334 | }
335 | {
336 | rtr, err := NewReadQTree(_bs, uuid, bstore.LatestGeneration)
337 | if err != nil {
338 | log.Panic(err)
339 | }
340 | rch := rtr.GetAllReferencedVAddrs()
341 | refd := make([]uint64, 0, 10)
342 | for v := range rch {
343 | log.Printf("Referenced: 0x%016x", v)
344 | refd = append(refd, v)
345 | }
346 | /*
347 | if len(refd) != 5 {
348 | t.Log("Referencing != 5 nodes (%v)", len(refd))
349 | t.Fail()
350 | }*/
351 | }
352 | }
353 |
354 | func TestQT2_CRNG(t *testing.T) {
355 | gs := int64(20+rand.Intn(10)) * 365 * DAY
356 | ge := int64(30+rand.Intn(10)) * 365 * DAY
357 | freq := uint64(rand.Intn(10)+1) * HOUR
358 | varn := uint64(30 * MINUTE)
359 | tdat := GenData(gs, ge, freq, varn,
360 | func(_ int64) float64 { return rand.Float64() })
361 | log.Printf("generated %v records", len(tdat))
362 | tr, uuid := MakeWTree()
363 | log.Printf("geneated tree %v", tr.gen.Uuid().String())
364 | tr.Commit()
365 |
366 | idx := uint64(0)
367 | brks := GenBrk(100, 50)
368 | loops := GenBrk(4, 4)
369 | for idx < uint64(len(tdat)) {
370 | tr := LoadWTree(uuid)
371 | loop := <-loops
372 | for i := uint64(0); i < loop; i++ {
373 | brk := <-brks
374 | if idx+brk >= uint64(len(tdat)) {
375 | brk = uint64(len(tdat)) - idx
376 | }
377 | if brk == 0 {
378 | continue
379 | }
380 | tr.InsertValues(tdat[idx : idx+brk])
381 | idx += brk
382 | }
383 | tr.Commit()
384 | }
385 |
386 | rtr, err := NewReadQTree(_bs, uuid, bstore.LatestGeneration)
387 | if err != nil {
388 | log.Panic(err)
389 | }
390 | rval, err := rtr.ReadStandardValuesBlock(gs, ge+int64(2*varn))
391 | if err != nil {
392 | log.Panic(err)
393 | }
394 | initial_gen := rtr.Generation()
395 | log.Printf("wrote %v, read %v", len(tdat), len(rval))
396 | CompareData(tdat, rval)
397 |
398 | dtr, err := NewWriteQTree(_bs, uuid)
399 | dtr.DeleteRange(tdat[0].Time, tdat[5].Time)
400 | dtr.Commit()
401 | {
402 | rtr, err := NewReadQTree(_bs, uuid, bstore.LatestGeneration)
403 | if err != nil {
404 | log.Panic(err)
405 | }
406 | rval, err := rtr.ReadStandardValuesBlock(gs, ge+int64(2*varn))
407 | if err != nil {
408 | log.Panic(err)
409 | }
410 | if len(rval) != len(tdat)-5 {
411 | t.Log("Mismatch in expected length %v %v %v", len(rval), len(tdat)-5, len(tdat))
412 | t.Fail()
413 | }
414 | log.Printf("gen was, gen is: %v / %v", initial_gen, rtr.Generation())
415 | log.Printf("========== STARTING CHANGED RANGE INVOCATION ==============")
416 | changed_ranges := rtr.FindChangedSinceSlice(initial_gen, 0)
417 | log.Printf("Changed ranges: %+v", changed_ranges)
418 | s, e, ds, de := tdat[0].Time, tdat[5].Time, changed_ranges[0].Start-tdat[0].Time, changed_ranges[0].End-tdat[5].Time
419 | dsm := float64(ds) / (1E9 * 60)
420 | dem := float64(de) / (1E9 * 60)
421 | log.Printf("We deleted from %v to %v \n(delta %v %v) (delta min %.3f %.3f)", s, e, ds, de, dsm, dem)
422 | rtr.root.PrintCounts(0)
423 | }
424 |
425 | {
426 | dtr, err := NewWriteQTree(_bs, uuid)
427 | dtr.InsertValues([]Record{{ge - 1000, 100}})
428 | dtr.Commit()
429 | rtr, err := NewReadQTree(_bs, uuid, bstore.LatestGeneration)
430 | if err != nil {
431 | log.Panic(err)
432 | }
433 | rval, err := rtr.ReadStandardValuesBlock(gs, ge+int64(2*varn))
434 | if err != nil {
435 | log.Panic(err)
436 | }
437 | if len(rval) != len(tdat)-4 {
438 | t.Log("Mismatch in expected length %v %v %v", len(rval), len(tdat)-5, len(tdat))
439 | t.Fail()
440 | }
441 | log.Printf("gen was, gen is: %v / %v", initial_gen, rtr.Generation())
442 | log.Printf("========== STARTING CHANGED RANGE INVOCATION ==============")
443 | changed_ranges := rtr.FindChangedSinceSlice(initial_gen, 0)
444 | log.Printf("Changed ranges: %+v", changed_ranges)
445 | s, e, ds, de := tdat[0].Time, tdat[5].Time, changed_ranges[0].Start-tdat[0].Time, changed_ranges[0].End-tdat[5].Time
446 | dsm := float64(ds) / (1E9 * 60)
447 | dem := float64(de) / (1E9 * 60)
448 | log.Printf("We deleted from %v to %v \n(delta %v %v) (delta min %.3f %.3f)", s, e, ds, de, dsm, dem)
449 | rtr.root.PrintCounts(0)
450 | }
451 | }
452 |
--------------------------------------------------------------------------------
/cpinterface/cpinterface.go:
--------------------------------------------------------------------------------
1 | package cpinterface
2 |
3 | import (
4 | "net"
5 | "os"
6 | "os/signal"
7 | "sync"
8 |
9 | "github.com/pborman/uuid"
10 | "github.com/SoftwareDefinedBuildings/btrdb"
11 | "github.com/SoftwareDefinedBuildings/btrdb/qtree"
12 | capn "github.com/glycerine/go-capnproto"
13 | "github.com/op/go-logging"
14 | )
15 |
16 | var log *logging.Logger
17 |
18 | func init() {
19 | log = logging.MustGetLogger("log")
20 | }
21 |
22 | type CPInterface struct {
23 | isShuttingDown bool
24 | }
25 |
26 | func ServeCPNP(q *btrdb.Quasar, ntype string, laddr string) *CPInterface {
27 | rv := &CPInterface{}
28 | go func() {
29 | sigchan := make(chan os.Signal, 1)
30 | signal.Notify(sigchan, os.Interrupt)
31 | _ = <-sigchan
32 | rv.isShuttingDown = true
33 | }()
34 | l, err := net.Listen(ntype, laddr)
35 | if err != nil {
36 | log.Panic(err)
37 | }
38 | defer l.Close()
39 | for !rv.isShuttingDown {
40 | conn, err := l.Accept()
41 | if err != nil {
42 | log.Panic(err)
43 | }
44 | go func(c net.Conn) {
45 | rv.dispatchCommands(q, c)
46 | }(conn)
47 | }
48 | return rv
49 | }
50 |
51 | func (c *CPInterface) Shutdown() {
52 | c.isShuttingDown = true
53 | }
54 |
55 | func (c *CPInterface) dispatchCommands(q *btrdb.Quasar, conn net.Conn) {
56 | //This governs the stream
57 | rmtx := sync.Mutex{}
58 | wmtx := sync.Mutex{}
59 | log.Info("cpnp connection")
60 | for !c.isShuttingDown {
61 | rmtx.Lock()
62 | seg, err := capn.ReadFromStream(conn, nil)
63 | if err != nil {
64 | log.Warning("ERR (%v) :: %v", conn.RemoteAddr(), err)
65 | conn.Close()
66 | break
67 | }
68 | rmtx.Unlock()
69 | go func() {
70 | seg := seg
71 | req := ReadRootRequest(seg)
72 | mkresp := func() (Response, *capn.Segment) {
73 | rvseg := capn.NewBuffer(nil)
74 | resp := NewRootResponse(rvseg)
75 | resp.SetEchoTag(req.EchoTag())
76 | return resp, rvseg
77 | }
78 | sendresp := func(seg *capn.Segment) {
79 | wmtx.Lock()
80 | seg.WriteTo(conn)
81 | wmtx.Unlock()
82 | }
83 | switch req.Which() {
84 | case REQUEST_QUERYSTANDARDVALUES:
85 | //log.Info("QSV\n")
86 | st := req.QueryStandardValues().StartTime()
87 | et := req.QueryStandardValues().EndTime()
88 | uuid := uuid.UUID(req.QueryStandardValues().Uuid())
89 | ver := req.QueryStandardValues().Version()
90 | //log.Info("[REQ=QsV] st=%v, et=%v, uuid=%v, gen=%v", st, et, uuid, ver)
91 | if ver == 0 {
92 | ver = btrdb.LatestGeneration
93 | }
94 | recordc, errorc, gen := q.QueryValuesStream(uuid, st, et, ver)
95 | if recordc == nil {
96 | log.Warning("RESPONDING ERR: %v", err)
97 | resp, rvseg := mkresp()
98 | resp.SetStatusCode(STATUSCODE_INTERNALERROR)
99 | resp.SetFinal(true)
100 | sendresp(rvseg)
101 | return
102 | } else {
103 | bufarr := make([]qtree.Record, 0, 4096)
104 | for {
105 | resp, rvseg := mkresp()
106 | fail := false
107 | fin := false
108 | for {
109 | select {
110 | case _, ok := <-errorc:
111 | if ok {
112 | fin = true
113 | fail = true
114 | goto donestandard
115 | }
116 | case r, ok := <-recordc:
117 | if !ok {
118 | fin = true
119 | goto donestandard
120 | }
121 | bufarr = append(bufarr, r)
122 | if len(bufarr) == cap(bufarr) {
123 | goto donestandard
124 | }
125 | }
126 | }
127 | donestandard:
128 | if fail {
129 | resp.SetStatusCode(STATUSCODE_INTERNALERROR)
130 | resp.SetFinal(true)
131 | //consume channels
132 | go func() {
133 | for _ = range recordc {
134 | }
135 | }()
136 | go func() {
137 | for _ = range errorc {
138 | }
139 | }()
140 | sendresp(rvseg)
141 | return
142 | }
143 | records := NewRecords(rvseg)
144 | rl := NewRecordList(rvseg, len(bufarr))
145 | rla := rl.ToArray()
146 | for i, v := range bufarr {
147 | rla[i].SetTime(v.Time)
148 | rla[i].SetValue(v.Val)
149 | }
150 | records.SetVersion(gen)
151 | records.SetValues(rl)
152 | resp.SetRecords(records)
153 | resp.SetStatusCode(STATUSCODE_OK)
154 | if fin {
155 | resp.SetFinal(true)
156 | }
157 | sendresp(rvseg)
158 | bufarr = bufarr[:0]
159 | if fin {
160 | return
161 | }
162 | }
163 | }
164 | case REQUEST_QUERYWINDOWVALUES:
165 | st := req.QueryWindowValues().StartTime()
166 | et := req.QueryWindowValues().EndTime()
167 | id := uuid.UUID(req.QueryWindowValues().Uuid())
168 | width := req.QueryWindowValues().Width()
169 | ver := req.QueryWindowValues().Version()
170 | depth := req.QueryWindowValues().Depth()
171 | if ver == 0 {
172 | ver = btrdb.LatestGeneration
173 | }
174 | recordc, gen := q.QueryWindow(id, st, et, ver, width, depth)
175 | if recordc == nil {
176 | log.Warning("RESPONDING ERR: %v", err)
177 | resp, rvseg := mkresp()
178 | resp.SetStatusCode(STATUSCODE_INTERNALERROR)
179 | resp.SetFinal(true)
180 | sendresp(rvseg)
181 | return
182 | } else {
183 | bufarr := make([]qtree.StatRecord, 0, 4096)
184 | for {
185 | resp, rvseg := mkresp()
186 | fail := false
187 | fin := false
188 | for {
189 | select {
190 | case r, ok := <-recordc:
191 | if !ok {
192 | fin = true
193 | goto donewindow
194 | }
195 | bufarr = append(bufarr, r)
196 | if len(bufarr) == cap(bufarr) {
197 | goto donewindow
198 | }
199 | }
200 | }
201 | donewindow:
202 | if fail {
203 | resp.SetStatusCode(STATUSCODE_INTERNALERROR)
204 | resp.SetFinal(true)
205 | //consume channels
206 | go func() {
207 | for _ = range recordc {
208 | }
209 | }()
210 | sendresp(rvseg)
211 | return
212 | }
213 | records := NewStatisticalRecords(rvseg)
214 | rl := NewStatisticalRecordList(rvseg, len(bufarr))
215 | rla := rl.ToArray()
216 | for i, v := range bufarr {
217 | rla[i].SetTime(v.Time)
218 | rla[i].SetCount(v.Count)
219 | rla[i].SetMin(v.Min)
220 | rla[i].SetMean(v.Mean)
221 | rla[i].SetMax(v.Max)
222 | }
223 | records.SetVersion(gen)
224 | records.SetValues(rl)
225 | resp.SetStatisticalRecords(records)
226 | resp.SetStatusCode(STATUSCODE_OK)
227 | if fin {
228 | resp.SetFinal(true)
229 | }
230 | sendresp(rvseg)
231 | bufarr = bufarr[:0]
232 | if fin {
233 | return
234 | }
235 | }
236 | }
237 | case REQUEST_QUERYSTATISTICALVALUES:
238 | st := req.QueryStatisticalValues().StartTime()
239 | et := req.QueryStatisticalValues().EndTime()
240 | uuid := uuid.UUID(req.QueryStatisticalValues().Uuid())
241 | pw := req.QueryStatisticalValues().PointWidth()
242 | ver := req.QueryStatisticalValues().Version()
243 | if ver == 0 {
244 | ver = btrdb.LatestGeneration
245 | }
246 | recordc, errorc, gen := q.QueryStatisticalValuesStream(uuid, st, et, ver, pw)
247 | if recordc == nil {
248 | log.Warning("RESPONDING ERR: %v", err)
249 | resp, rvseg := mkresp()
250 | resp.SetStatusCode(STATUSCODE_INTERNALERROR)
251 | resp.SetFinal(true)
252 | sendresp(rvseg)
253 | return
254 | } else {
255 | bufarr := make([]qtree.StatRecord, 0, 4096)
256 | for {
257 | resp, rvseg := mkresp()
258 | fail := false
259 | fin := false
260 | for {
261 | select {
262 | case _, ok := <-errorc:
263 | if ok {
264 | fin = true
265 | fail = true
266 | goto donestat
267 | }
268 | case r, ok := <-recordc:
269 | if !ok {
270 | fin = true
271 | goto donestat
272 | }
273 | bufarr = append(bufarr, r)
274 | if len(bufarr) == cap(bufarr) {
275 | goto donestat
276 | }
277 | }
278 | }
279 | donestat:
280 | if fail {
281 | resp.SetStatusCode(STATUSCODE_INTERNALERROR)
282 | resp.SetFinal(true)
283 | //consume channels
284 | go func() {
285 | for _ = range recordc {
286 | }
287 | }()
288 | go func() {
289 | for _ = range errorc {
290 | }
291 | }()
292 | sendresp(rvseg)
293 | return
294 | }
295 | records := NewStatisticalRecords(rvseg)
296 | rl := NewStatisticalRecordList(rvseg, len(bufarr))
297 | rla := rl.ToArray()
298 | for i, v := range bufarr {
299 | rla[i].SetTime(v.Time)
300 | rla[i].SetCount(v.Count)
301 | rla[i].SetMin(v.Min)
302 | rla[i].SetMean(v.Mean)
303 | rla[i].SetMax(v.Max)
304 | }
305 | records.SetVersion(gen)
306 | records.SetValues(rl)
307 | resp.SetStatisticalRecords(records)
308 | resp.SetStatusCode(STATUSCODE_OK)
309 | if fin {
310 | resp.SetFinal(true)
311 | }
312 | sendresp(rvseg)
313 | bufarr = bufarr[:0]
314 | if fin {
315 | return
316 | }
317 | }
318 | }
319 | case REQUEST_QUERYVERSION:
320 | //ul := req.
321 | ul := req.QueryVersion().Uuids()
322 | ull := ul.ToArray()
323 | resp, rvseg := mkresp()
324 | rvers := NewVersions(rvseg)
325 | vlist := rvseg.NewUInt64List(len(ull))
326 | ulist := rvseg.NewDataList(len(ull))
327 | for i, v := range ull {
328 | ver, err := q.QueryGeneration(uuid.UUID(v))
329 | if err != nil {
330 | resp.SetStatusCode(STATUSCODE_INTERNALERROR)
331 | resp.SetFinal(true)
332 | sendresp(rvseg)
333 | return
334 | }
335 | //I'm not sure that the array that sits behind the uuid slice will stick around
336 | //so I'm copying it.
337 | uuid := make([]byte, 16)
338 | copy(uuid, v)
339 | vlist.Set(i, ver)
340 | ulist.Set(i, uuid)
341 | }
342 | resp.SetStatusCode(STATUSCODE_OK)
343 | rvers.SetUuids(ulist)
344 | rvers.SetVersions(vlist)
345 | resp.SetVersionList(rvers)
346 | resp.SetFinal(true)
347 | sendresp(rvseg)
348 | case REQUEST_QUERYNEARESTVALUE:
349 | resp, rvseg := mkresp()
350 | t := req.QueryNearestValue().Time()
351 | id := uuid.UUID(req.QueryNearestValue().Uuid())
352 | ver := req.QueryNearestValue().Version()
353 | if ver == 0 {
354 | ver = btrdb.LatestGeneration
355 | }
356 | back := req.QueryNearestValue().Backward()
357 | rv, gen, err := q.QueryNearestValue(id, t, back, ver)
358 | switch err {
359 | case nil:
360 | resp.SetStatusCode(STATUSCODE_OK)
361 | records := NewRecords(rvseg)
362 | rl := NewRecordList(rvseg, 1)
363 | rla := rl.ToArray()
364 | rla[0].SetTime(rv.Time)
365 | rla[0].SetValue(rv.Val)
366 | records.SetVersion(gen)
367 | records.SetValues(rl)
368 | resp.SetRecords(records)
369 | case qtree.ErrNoSuchPoint:
370 | resp.SetStatusCode(STATUSCODE_NOSUCHPOINT)
371 | default:
372 | resp.SetStatusCode(STATUSCODE_INTERNALERROR)
373 | }
374 | resp.SetFinal(true)
375 | sendresp(rvseg)
376 | case REQUEST_QUERYCHANGEDRANGES:
377 | resp, rvseg := mkresp()
378 | id := uuid.UUID(req.QueryChangedRanges().Uuid())
379 | sgen := req.QueryChangedRanges().FromGeneration()
380 | egen := req.QueryChangedRanges().ToGeneration()
381 | if egen == 0 {
382 | egen = btrdb.LatestGeneration
383 | }
384 | resolution := req.QueryChangedRanges().Resolution()
385 | rv, ver, err := q.QueryChangedRanges(id, sgen, egen, resolution)
386 | switch err {
387 | case nil:
388 | resp.SetStatusCode(STATUSCODE_OK)
389 | ranges := NewRanges(rvseg)
390 | ranges.SetVersion(ver)
391 | crl := NewChangedRangeList(rvseg, len(rv))
392 | crla := crl.ToArray()
393 | for i := 0; i < len(rv); i++ {
394 | crla[i].SetStartTime(rv[i].Start)
395 | crla[i].SetEndTime(rv[i].End)
396 | }
397 | ranges.SetValues(crl)
398 | resp.SetChangedRngList(ranges)
399 | default:
400 | log.Critical("qcr error: ", err)
401 | resp.SetStatusCode(STATUSCODE_INTERNALERROR)
402 | }
403 | resp.SetFinal(true)
404 | sendresp(rvseg)
405 |
406 | case REQUEST_INSERTVALUES:
407 | resp, rvseg := mkresp()
408 | uuid := uuid.UUID(req.InsertValues().Uuid())
409 | rl := req.InsertValues().Values()
410 | rla := rl.ToArray()
411 | if len(rla) != 0 {
412 | qtr := make([]qtree.Record, len(rla))
413 | for i, v := range rla {
414 | qtr[i] = qtree.Record{Time: v.Time(), Val: v.Value()}
415 | }
416 | q.InsertValues(uuid, qtr)
417 | }
418 | if req.InsertValues().Sync() {
419 | q.Flush(uuid)
420 | }
421 | resp.SetFinal(true)
422 | resp.SetStatusCode(STATUSCODE_OK)
423 | sendresp(rvseg)
424 | case REQUEST_DELETEVALUES:
425 | resp, rvseg := mkresp()
426 | id := uuid.UUID(req.DeleteValues().Uuid())
427 | stime := req.DeleteValues().StartTime()
428 | etime := req.DeleteValues().EndTime()
429 | err := q.DeleteRange(id, stime, etime)
430 | switch err {
431 | case nil:
432 | resp.SetStatusCode(STATUSCODE_OK)
433 | default:
434 | resp.SetStatusCode(STATUSCODE_INTERNALERROR)
435 | }
436 | resp.SetFinal(true)
437 | sendresp(rvseg)
438 | default:
439 | log.Critical("weird segment")
440 | }
441 | }()
442 | }
443 | }
444 |
445 | /*
446 | func EncodeMsg() *bytes.Buffer {
447 | rv := bytes.Buffer{}
448 | seg := capn.NewBuffer(nil)
449 | cmd := NewRootRequest(seg)
450 |
451 | qsv := NewCmdQueryStandardValues(seg)
452 | cmd.SetEchoTag(500)
453 | qsv.SetStartTime(0x5a5a)
454 | qsv.SetEndTime(0xf7f7)
455 | cmd.SetQueryStandardValues(qsv)
456 | seg.WriteTo(&rv)
457 | return &rv
458 | }
459 |
460 | func DecodeMsg(b *bytes.Buffer) {
461 | seg, err := capn.ReadFromStream(b, nil)
462 | if err != nil {
463 | log.Panic(err)
464 | }
465 | cmd := ReadRootRequest(seg)
466 | switch cmd.Which() {
467 | case REQUEST_QUERYSTANDARDVALUES:
468 | ca := cmd.QueryStandardValues()
469 | default:
470 | log.Critical("wtf")
471 | }
472 | }
473 | */
474 |
--------------------------------------------------------------------------------
/internal/cephprovider/cephprovider.go:
--------------------------------------------------------------------------------
1 | package cephprovider
2 |
3 | // #cgo LDFLAGS: -lrados
4 | // #include "cephprovider.h"
5 | // #include
6 | import "C"
7 |
8 | import (
9 | "strconv"
10 | "sync"
11 | "unsafe"
12 |
13 | "github.com/SoftwareDefinedBuildings/btrdb/internal/bprovider"
14 | "github.com/op/go-logging"
15 | )
16 |
17 | var log *logging.Logger
18 |
19 | func init() {
20 | log = logging.MustGetLogger("log")
21 | }
22 |
23 | const NUM_RHANDLES = 200
24 |
25 | //We know we won't get any addresses here, because this is the relocation base as well
26 | const METADATA_BASE = 0xFF00000000000000
27 |
28 | //4096 blocks per addr lock
29 | const ADDR_LOCK_SIZE = 0x1000000000
30 | const ADDR_OBJ_SIZE = 0x0001000000
31 |
32 | //Just over the DBSIZE
33 | const MAX_EXPECTED_OBJECT_SIZE = 20485
34 |
35 | //The number of RADOS blocks to cache (up to 16MB each, probably only 1.6MB each)
36 | const RADOS_CACHE_SIZE = NUM_RHANDLES * 2
37 |
38 | const OFFSET_MASK = 0xFFFFFF
39 | const R_CHUNKSIZE = 1 << 20
40 |
41 | //This is how many uuid/address pairs we will keep to facilitate appending to segments
42 | //instead of creating new ones.
43 | const WORTH_CACHING = OFFSET_MASK - MAX_EXPECTED_OBJECT_SIZE
44 | const SEGCACHE_SIZE = 1024
45 |
46 | // 1MB for write cache, I doubt we will ever hit this tbh
47 | const WCACHE_SIZE = 1 << 20
48 |
49 | func UUIDSliceToArr(id []byte) [16]byte {
50 | rv := [16]byte{}
51 | copy(rv[:], id)
52 | return rv
53 | }
54 |
55 | type CephSegment struct {
56 | h C.phandle_t
57 | sp *CephStorageProvider
58 | ptr uint64
59 | naddr uint64
60 | base uint64 //Not the same as the provider's base
61 | warrs [][]byte
62 | uid [16]byte
63 | wcache []byte
64 | wcache_base uint64
65 | }
66 |
67 | type chunkreqindex struct {
68 | UUID [16]byte
69 | Addr uint64
70 | }
71 |
72 | type CephStorageProvider struct {
73 | rh []C.phandle_t
74 | rhidx chan int
75 | rhidx_ret chan int
76 | rh_avail []bool
77 | ptr uint64
78 | alloc chan uint64
79 | segaddrcache map[[16]byte]uint64
80 | segcachelock sync.Mutex
81 |
82 | chunklock sync.Mutex
83 | chunkgate map[chunkreqindex][]chan []byte
84 |
85 | rcache *CephCache
86 | }
87 |
88 | //Returns the address of the first free word in the segment when it was locked
89 | func (seg *CephSegment) BaseAddress() uint64 {
90 | return seg.base
91 | }
92 |
93 | //Unlocks the segment for the StorageProvider to give to other consumers
94 | //Implies a flush
95 | func (seg *CephSegment) Unlock() {
96 | seg.flushWrite()
97 | _, err := C.handle_close(seg.h)
98 | if err != nil {
99 | log.Panic("CGO ERROR: %v", err)
100 | }
101 | seg.warrs = nil
102 | if (seg.naddr & OFFSET_MASK) < WORTH_CACHING {
103 | seg.sp.segcachelock.Lock()
104 | seg.sp.pruneSegCache()
105 | seg.sp.segaddrcache[seg.uid] = seg.naddr
106 | seg.sp.segcachelock.Unlock()
107 | }
108 |
109 | }
110 |
111 | func (seg *CephSegment) flushWrite() {
112 | if len(seg.wcache) == 0 {
113 | return
114 | }
115 | C.handle_write(seg.h, (*C.uint8_t)(unsafe.Pointer(&seg.uid[0])), C.uint64_t(seg.wcache_base),
116 | (*C.char)(unsafe.Pointer(&seg.wcache[0])), C.int(len(seg.wcache)), 0)
117 |
118 | for i := 0; i < len(seg.wcache); i += R_CHUNKSIZE {
119 | seg.sp.rcache.cacheInvalidate((uint64(i) + seg.wcache_base) & R_ADDRMASK)
120 | }
121 | //The C code does not finish immediately, so we need to keep a reference to the old
122 | //wcache array until the segment is unlocked
123 | seg.warrs = append(seg.warrs, seg.wcache)
124 | seg.wcache = make([]byte, 0, WCACHE_SIZE)
125 | seg.wcache_base = seg.naddr
126 |
127 | }
128 |
129 | //Writes a slice to the segment, returns immediately
130 | //Returns nil if op is OK, otherwise ErrNoSpace or ErrInvalidArgument
131 | //It is up to the implementer to work out how to report no space immediately
132 | //The uint64 is the address to be used for the next write
133 | func (seg *CephSegment) Write(uuid []byte, address uint64, data []byte) (uint64, error) {
134 | //We don't put written blocks into the cache, because those will be
135 | //in the dblock cache much higher up.
136 | if address != seg.naddr {
137 | log.Panic("Non-sequential write")
138 | }
139 |
140 | if len(seg.wcache)+len(data)+2 > cap(seg.wcache) {
141 | seg.flushWrite()
142 | }
143 |
144 | base := len(seg.wcache)
145 | seg.wcache = seg.wcache[:base+2]
146 | seg.wcache[base] = byte(len(data))
147 | seg.wcache[base+1] = byte(len(data) >> 8)
148 | seg.wcache = append(seg.wcache, data...)
149 |
150 | naddr := address + uint64(len(data)+2)
151 |
152 | //OLD NOTE:
153 | //Note that it is ok for an object to "go past the end of the allocation". Naddr could be one byte before
154 | //the end of the allocation for example. This is not a problem as we never address anything except the
155 | //start of an object. This is why we do not add the object max size here
156 | //NEW NOTE:
157 | //We cannot go past the end of the allocation anymore because it would break the read cache
158 | if ((naddr + MAX_EXPECTED_OBJECT_SIZE + 2) >> 24) != (address >> 24) {
159 | //We are gonna need a new object addr
160 | naddr = <-seg.sp.alloc
161 | seg.naddr = naddr
162 | seg.flushWrite()
163 | return naddr, nil
164 | }
165 | seg.naddr = naddr
166 |
167 | return naddr, nil
168 | }
169 |
170 | //Block until all writes are complete. Note this does not imply a flush of the underlying files.
171 | func (seg *CephSegment) Flush() {
172 | //Not sure we need to do stuff here, we can do it in unlock
173 | }
174 |
175 | //Must be called with the cache lock held
176 | func (sp *CephStorageProvider) pruneSegCache() {
177 | //This is extremely rare, so its best to handle it simply
178 | //If we drop the cache, we will get one shortsized object per stream,
179 | //and it won't necessarily be _very_ short.
180 | if len(sp.segaddrcache) >= SEGCACHE_SIZE {
181 | sp.segaddrcache = make(map[[16]byte]uint64, SEGCACHE_SIZE)
182 | }
183 | }
184 |
185 | func (sp *CephStorageProvider) provideReadHandles() {
186 | for {
187 | //Read all returned read handles
188 | ldretfi:
189 | for {
190 | select {
191 | case fi := <-sp.rhidx_ret:
192 | sp.rh_avail[fi] = true
193 | default:
194 | break ldretfi
195 | }
196 | }
197 |
198 | found := false
199 | for i := 0; i < NUM_RHANDLES; i++ {
200 | if sp.rh_avail[i] {
201 | sp.rhidx <- i
202 | sp.rh_avail[i] = false
203 | found = true
204 | }
205 | }
206 | //If we didn't find one, do a blocking read
207 | if !found {
208 | idx := <-sp.rhidx_ret
209 | sp.rh_avail[idx] = true
210 | }
211 | }
212 | }
213 |
214 | func (sp *CephStorageProvider) provideAllocs() {
215 | base := sp.ptr
216 | for {
217 | sp.alloc <- sp.ptr
218 | sp.ptr += ADDR_OBJ_SIZE
219 | if sp.ptr >= base+ADDR_LOCK_SIZE {
220 | sp.ptr = sp.obtainBaseAddress()
221 | base = sp.ptr
222 | }
223 | }
224 | }
225 |
226 | func (sp *CephStorageProvider) obtainBaseAddress() uint64 {
227 | h, err := C.handle_create()
228 | if err != nil {
229 | log.Panic("CGO ERROR: %v", err)
230 | }
231 | addr, err := C.handle_obtainrange(h)
232 | if err != nil {
233 | log.Panic("CGO ERROR: %v", err)
234 | }
235 | return uint64(addr)
236 | }
237 |
238 | //Called at startup of a normal run
239 | func (sp *CephStorageProvider) Initialize(opts map[string]string) {
240 | //Allocate caches
241 | sp.rcache = &CephCache{}
242 | cachesz, _ := strconv.Atoi(opts["cephrcache"])
243 | if cachesz < 40 {
244 | cachesz = 40 //one per read handle: 40MB
245 | }
246 | sp.rcache.initCache(uint64(cachesz))
247 |
248 | cephconf := C.CString(opts["cephconf"])
249 | cephpool := C.CString(opts["cephpool"])
250 | _, err := C.initialize_provider(cephconf, cephpool)
251 | if err != nil {
252 | log.Panic("CGO ERROR: %v", err)
253 | }
254 | C.free(unsafe.Pointer(cephconf))
255 | C.free(unsafe.Pointer(cephpool))
256 |
257 | sp.rh = make([]C.phandle_t, NUM_RHANDLES)
258 | sp.rh_avail = make([]bool, NUM_RHANDLES)
259 | sp.rhidx = make(chan int, NUM_RHANDLES+1)
260 | sp.rhidx_ret = make(chan int, NUM_RHANDLES+1)
261 | sp.alloc = make(chan uint64, 128)
262 | sp.segaddrcache = make(map[[16]byte]uint64, SEGCACHE_SIZE)
263 | sp.chunkgate = make(map[chunkreqindex][]chan []byte)
264 |
265 | for i := 0; i < NUM_RHANDLES; i++ {
266 | sp.rh_avail[i] = true
267 | h, err := C.handle_create()
268 | if err != nil {
269 | log.Panic("CGO ERROR: %v", err)
270 | }
271 | sp.rh[i] = h
272 | }
273 |
274 | //Obtain base address
275 | sp.ptr = sp.obtainBaseAddress()
276 | if sp.ptr == 0 {
277 | log.Panic("Could not read allocator! DB not created properly?")
278 | }
279 | log.Info("Base address obtained as 0x%016x", sp.ptr)
280 |
281 | //Start serving read handles
282 | go sp.provideReadHandles()
283 |
284 | //Start providing address allocations
285 | go sp.provideAllocs()
286 |
287 | }
288 |
289 | //Called to create the database for the first time
290 | func (sp *CephStorageProvider) CreateDatabase(opts map[string]string) error {
291 | cephconf := C.CString(opts["cephconf"])
292 | cephpool := C.CString(opts["cephpool"])
293 | _, err := C.initialize_provider(cephconf, cephpool)
294 | if err != nil {
295 | log.Panic("CGO ERROR: %v", err)
296 | }
297 | C.free(unsafe.Pointer(cephconf))
298 | C.free(unsafe.Pointer(cephpool))
299 | h, err := C.handle_create()
300 | if err != nil {
301 | log.Panic("CGO ERROR: %v", err)
302 | }
303 | C.handle_init_allocator(h)
304 | _, err = C.handle_close(h)
305 | if err != nil {
306 | log.Panic("CGO ERROR: %v", err)
307 | }
308 | return nil
309 | }
310 |
311 | // Lock a segment, or block until a segment can be locked
312 | // Returns a Segment struct
313 | // Implicit unchecked assumption: you cannot lock more than one segment
314 | // for a given uuid (without unlocking them in between). It will break
315 | // segcache
316 | func (sp *CephStorageProvider) LockSegment(uuid []byte) bprovider.Segment {
317 | rv := new(CephSegment)
318 | rv.sp = sp
319 | h, err := C.handle_create()
320 | if err != nil {
321 | log.Panic("CGO ERROR: %v", err)
322 | }
323 | rv.h = h
324 | rv.ptr = <-sp.alloc
325 | rv.uid = UUIDSliceToArr(uuid)
326 | rv.wcache = make([]byte, 0, WCACHE_SIZE)
327 | sp.segcachelock.Lock()
328 | cached_ptr, ok := sp.segaddrcache[rv.uid]
329 | if ok {
330 | delete(sp.segaddrcache, rv.uid)
331 | }
332 | sp.segcachelock.Unlock()
333 | //ok = false
334 | if ok {
335 | rv.base = cached_ptr
336 | rv.naddr = rv.base
337 | } else {
338 | rv.base = rv.ptr
339 | rv.naddr = rv.base
340 | }
341 | rv.wcache_base = rv.naddr
342 | //Although I don't know this for sure, I am concerned that when we pass the write array pointer to C
343 | //the Go GC may free it before C is done. I prevent this by pinning all the written arrays, which get
344 | //deref'd after the segment is unlocked
345 | rv.warrs = make([][]byte, 0, 64)
346 | return rv
347 | }
348 |
349 | func (sp *CephStorageProvider) rawObtainChunk(uuid []byte, address uint64) []byte {
350 | chunk := sp.rcache.cacheGet(address)
351 | if chunk == nil {
352 | chunk = sp.rcache.getBlank()
353 | rhidx := <-sp.rhidx
354 | rc, err := C.handle_read(sp.rh[rhidx], (*C.uint8_t)(unsafe.Pointer(&uuid[0])), C.uint64_t(address), (*C.char)(unsafe.Pointer(&chunk[0])), R_CHUNKSIZE)
355 | if err != nil {
356 | log.Panic("CGO ERROR: %v", err)
357 | }
358 | chunk = chunk[0:rc]
359 | sp.rhidx_ret <- rhidx
360 | sp.rcache.cachePut(address, chunk)
361 | }
362 | return chunk
363 | }
364 |
365 | func (sp *CephStorageProvider) obtainChunk(uuid []byte, address uint64) []byte {
366 | chunk := sp.rcache.cacheGet(address)
367 | if chunk != nil {
368 | return chunk
369 | }
370 | index := chunkreqindex{UUID: UUIDSliceToArr(uuid), Addr: address}
371 | rvc := make(chan []byte, 1)
372 | sp.chunklock.Lock()
373 | slc, ok := sp.chunkgate[index]
374 | if ok {
375 | sp.chunkgate[index] = append(slc, rvc)
376 | sp.chunklock.Unlock()
377 | } else {
378 | sp.chunkgate[index] = []chan []byte{rvc}
379 | sp.chunklock.Unlock()
380 | go func() {
381 | bslice := sp.rawObtainChunk(uuid, address)
382 | sp.chunklock.Lock()
383 | slc, ok := sp.chunkgate[index]
384 | if !ok {
385 | panic("inconsistency!!")
386 | }
387 | for _, chn := range slc {
388 | chn <- bslice
389 | }
390 | delete(sp.chunkgate, index)
391 | sp.chunklock.Unlock()
392 | }()
393 | }
394 | rv := <-rvc
395 | return rv
396 | }
397 |
398 | // Read the blob into the given buffer: direct read
399 | /*
400 | func (sp *CephStorageProvider) Read(uuid []byte, address uint64, buffer []byte) []byte {
401 |
402 | //Get a read handle
403 | rhidx := <-sp.rhidx
404 | if len(buffer) < MAX_EXPECTED_OBJECT_SIZE {
405 | log.Panic("That doesn't seem safe")
406 | }
407 | rc, err := C.handle_read(sp.rh[rhidx], (*C.uint8_t)(unsafe.Pointer(&uuid[0])), C.uint64_t(address), (*C.char)(unsafe.Pointer(&buffer[0])), MAX_EXPECTED_OBJECT_SIZE)
408 | if err != nil {
409 | log.Panic("CGO ERROR: %v", err)
410 | }
411 | sp.rhidx_ret <- rhidx
412 | ln := int(buffer[0]) + (int(buffer[1]) << 8)
413 | if int(rc) < ln+2 {
414 | //TODO this can happen, it is better to just go back a few superblocks
415 | log.Panic("Short read")
416 | }
417 | return buffer[2 : ln+2]
418 | }*/
419 |
420 | // Read the blob into the given buffer
421 | func (sp *CephStorageProvider) Read(uuid []byte, address uint64, buffer []byte) []byte {
422 | //Get the first chunk for this object:
423 | chunk1 := sp.obtainChunk(uuid, address&R_ADDRMASK)[address&R_OFFSETMASK:]
424 | var chunk2 []byte
425 | var ln int
426 |
427 | if len(chunk1) < 2 {
428 | //not even long enough for the prefix, must be one byte in the first chunk, one in teh second
429 | chunk2 = sp.obtainChunk(uuid, (address+R_CHUNKSIZE)&R_ADDRMASK)
430 | ln = int(chunk1[0]) + (int(chunk2[0]) << 8)
431 | chunk2 = chunk2[1:]
432 | chunk1 = chunk1[1:]
433 | } else {
434 | ln = int(chunk1[0]) + (int(chunk1[1]) << 8)
435 | chunk1 = chunk1[2:]
436 | }
437 |
438 | if (ln) > MAX_EXPECTED_OBJECT_SIZE {
439 | log.Panic("WTUF: ", ln)
440 | }
441 |
442 | copied := 0
443 | if len(chunk1) > 0 {
444 | //We need some bytes from chunk1
445 | end := ln
446 | if len(chunk1) < ln {
447 | end = len(chunk1)
448 | }
449 | copied = copy(buffer, chunk1[:end])
450 | }
451 | if copied < ln {
452 | //We need some bytes from chunk2
453 | if chunk2 == nil {
454 | chunk2 = sp.obtainChunk(uuid, (address+R_CHUNKSIZE)&R_ADDRMASK)
455 | }
456 | copy(buffer[copied:], chunk2[:ln-copied])
457 |
458 | }
459 | if ln < 2 {
460 | log.Panic("This is unexpected")
461 | }
462 | return buffer[:ln]
463 |
464 | }
465 |
--------------------------------------------------------------------------------
/internal/bstore/blocktypes.go:
--------------------------------------------------------------------------------
1 | package bstore
2 |
3 | import (
4 | "math"
5 |
6 | "github.com/pborman/uuid"
7 | )
8 |
9 | type Superblock struct {
10 | uuid uuid.UUID
11 | gen uint64
12 | root uint64
13 | unlinked bool
14 | }
15 |
16 | func (s *Superblock) Gen() uint64 {
17 | return s.gen
18 | }
19 |
20 | func (s *Superblock) Root() uint64 {
21 | return s.root
22 | }
23 |
24 | func (s *Superblock) Uuid() uuid.UUID {
25 | return s.uuid
26 | }
27 |
28 | func (s *Superblock) Unlinked() bool {
29 | return s.unlinked
30 | }
31 |
32 | func NewSuperblock(id uuid.UUID) *Superblock {
33 | return &Superblock{
34 | uuid: id,
35 | gen: 1,
36 | root: 0,
37 | }
38 | }
39 |
40 | func (s *Superblock) Clone() *Superblock {
41 | return &Superblock{
42 | uuid: s.uuid,
43 | gen: s.gen,
44 | root: s.root,
45 | }
46 | }
47 |
48 | type BlockType uint64
49 |
50 | const (
51 | Vector BlockType = 1
52 | Core BlockType = 2
53 | Bad BlockType = 255
54 | )
55 |
56 | const FlagsMask uint8 = 3
57 |
58 | type Datablock interface {
59 | GetDatablockType() BlockType
60 | }
61 |
62 | // The leaf datablock type. The tags allow unit tests
63 | // to work out if clone / serdes are working properly
64 | // metadata is not copied when a node is cloned
65 | // implicit is not serialised
66 | type Vectorblock struct {
67 |
68 | //Metadata, not copied on clone
69 | Identifier uint64 "metadata,implicit"
70 | Generation uint64 "metadata,implicit"
71 |
72 | //Payload, copied on clone
73 | Len uint16
74 | PointWidth uint8 "implicit"
75 | StartTime int64 "implicit"
76 | Time [VSIZE]int64
77 | Value [VSIZE]float64
78 | }
79 |
80 | type Coreblock struct {
81 |
82 | //Metadata, not copied
83 | Identifier uint64 "metadata,implicit"
84 | Generation uint64 "metadata,implicit"
85 |
86 | //Payload, copied
87 | PointWidth uint8 "implicit"
88 | StartTime int64 "implicit"
89 | Addr [KFACTOR]uint64
90 | Count [KFACTOR]uint64
91 | Min [KFACTOR]float64
92 | Mean [KFACTOR]float64
93 | Max [KFACTOR]float64
94 | CGeneration [KFACTOR]uint64
95 | }
96 |
97 | func (*Vectorblock) GetDatablockType() BlockType {
98 | return Vector
99 | }
100 |
101 | func (*Coreblock) GetDatablockType() BlockType {
102 | return Core
103 | }
104 |
105 | //Copy a core block, only copying the payload, not the metadata
106 | func (src *Coreblock) CopyInto(dst *Coreblock) {
107 | dst.PointWidth = src.PointWidth
108 | dst.StartTime = src.StartTime
109 | dst.Addr = src.Addr
110 | //dst.Time = src.Time
111 | dst.Count = src.Count
112 | dst.Min = src.Min
113 | dst.Mean = src.Mean
114 | dst.Max = src.Max
115 | dst.CGeneration = src.CGeneration
116 | }
117 |
118 | func (src *Vectorblock) CopyInto(dst *Vectorblock) {
119 | dst.PointWidth = src.PointWidth
120 | dst.StartTime = src.StartTime
121 | dst.Len = src.Len
122 | dst.Time = src.Time
123 | dst.Value = src.Value
124 | }
125 |
126 | func DatablockGetBufferType(buf []byte) BlockType {
127 | switch BlockType(buf[0]) {
128 | case Vector:
129 | return Vector
130 | case Core:
131 | return Core
132 | }
133 | return Bad
134 | }
135 |
136 | // The current algorithm is as follows:
137 | // entry 0: absolute time and value
138 | // entry 1: delta time and value since 0
139 | // entry 2: delta since delta 1
140 | // entry 3: delta from average delta (1+2)
141 | // enrty 4+ delta from average delta (n-1, n-2, n-3)
142 |
143 | func (v *Vectorblock) Serialize(dst []byte) []byte {
144 | idx := 3
145 | dst[0] = byte(Vector)
146 | dst[1] = byte(v.Len)
147 | dst[2] = byte(v.Len >> 8)
148 |
149 | if v.Len == 0 {
150 | return dst[:idx]
151 | }
152 | //First values are written in full
153 | e, m := decompose(v.Value[0])
154 | idx += writeUnsignedHuff(dst[idx:], m)
155 | idx += writeUnsignedHuff(dst[idx:], uint64(e))
156 |
157 | //So we are taking a gamble here: I think I will never have negative times. If I do,
158 | //this will use 9 bytes for every time. But I won't.
159 | t := v.Time[0]
160 | idx += writeUnsignedHuff(dst[idx:], uint64(t))
161 | if v.Len == 1 {
162 | return dst[:idx]
163 | }
164 |
165 | const delta_depth = 3
166 | hist_deltas_t := make([]int64, delta_depth)
167 | hist_deltas_e := make([]int64, delta_depth)
168 | hist_deltas_m := make([]int64, delta_depth)
169 | delta_idx := 0
170 | num_deltas := 0
171 |
172 | em1 := int64(e)
173 | mm1 := int64(m)
174 | tm1 := t
175 | for i := 1; i < int(v.Len); i++ {
176 | var deltas int
177 | if num_deltas > delta_depth {
178 | deltas = delta_depth
179 | } else {
180 | deltas = num_deltas
181 | }
182 | var e, m int64
183 | tmpe, tmpm := decompose(v.Value[i])
184 | e = int64(tmpe)
185 | m = int64(tmpm)
186 | t := v.Time[i]
187 |
188 | //Calculate the delta for this record
189 | dt := t - tm1
190 | de := e - em1
191 | dm := m - mm1
192 |
193 | //Calculate average deltas
194 | var dt_total int64 = 0
195 | var dm_total int64 = 0
196 | var de_total int64 = 0
197 | for d := 0; d < deltas; d++ {
198 | dt_total += hist_deltas_t[d]
199 | dm_total += hist_deltas_m[d]
200 | de_total += hist_deltas_e[d]
201 | }
202 | var adt, ade, adm int64 = 0, 0, 0
203 | if deltas != 0 {
204 | adt = dt_total / int64(deltas)
205 | ade = de_total / int64(deltas)
206 | adm = dm_total / int64(deltas)
207 | }
208 | //Calculate the delta delta
209 | ddt := dt - adt
210 | dde := de - ade
211 | ddm := dm - adm
212 |
213 | //Add in the delta for this record
214 | hist_deltas_t[delta_idx] = dt
215 | hist_deltas_e[delta_idx] = de
216 | hist_deltas_m[delta_idx] = dm
217 | delta_idx++
218 | if delta_idx == delta_depth {
219 | delta_idx = 0
220 | }
221 | num_deltas++
222 |
223 | //Encode dde nz and ddt nz into ddm
224 | ddm <<= 2
225 | if dde != 0 {
226 | ddm |= 2
227 | }
228 | if ddt != 0 {
229 | ddm |= 1
230 | }
231 |
232 | //Write it out
233 | idx += writeSignedHuff(dst[idx:], ddm)
234 | if dde != 0 {
235 | idx += writeSignedHuff(dst[idx:], dde)
236 | }
237 | if ddt != 0 {
238 | idx += writeSignedHuff(dst[idx:], ddt)
239 | }
240 |
241 | em1 = e
242 | tm1 = t
243 | mm1 = m
244 | }
245 | return dst[:idx]
246 | }
247 |
248 | func (v *Vectorblock) Deserialize(src []byte) {
249 | blocktype := src[0]
250 | if BlockType(blocktype) != Vector {
251 | lg.Panicf("This is not a vector block")
252 | }
253 |
254 | v.Len = uint16(src[1]) + (uint16(src[2]) << 8)
255 | length := int(v.Len)
256 | idx := 3
257 |
258 | m, l, _ := readUnsignedHuff(src[idx:])
259 | idx += l
260 | e, l, _ := readUnsignedHuff(src[idx:])
261 | idx += l
262 | t, l, _ := readUnsignedHuff(src[idx:])
263 | idx += l
264 | v.Time[0] = int64(t)
265 | v.Value[0] = recompose(uint16(e), uint64(m))
266 |
267 | //Keep delta history
268 | const delta_depth = 3
269 | hist_deltas_t := make([]int64, delta_depth)
270 | hist_deltas_e := make([]int64, delta_depth)
271 | hist_deltas_m := make([]int64, delta_depth)
272 | delta_idx := 0
273 | num_deltas := 0
274 |
275 | mm1 := int64(m)
276 | em1 := int64(e)
277 | tm1 := int64(t)
278 | for i := 1; i < length; i++ {
279 | //How many deltas do we have
280 | var deltas int
281 | if num_deltas > delta_depth {
282 | deltas = delta_depth
283 | } else {
284 | deltas = num_deltas
285 | }
286 |
287 | //Calculate average deltas
288 | var dt_total int64 = 0
289 | var dm_total int64 = 0
290 | var de_total int64 = 0
291 | for d := 0; d < deltas; d++ {
292 | dt_total += hist_deltas_t[d]
293 | dm_total += hist_deltas_m[d]
294 | de_total += hist_deltas_e[d]
295 | }
296 | var adt, ade, adm int64 = 0, 0, 0
297 | if deltas != 0 {
298 | adt = dt_total / int64(deltas)
299 | ade = de_total / int64(deltas)
300 | adm = dm_total / int64(deltas)
301 | }
302 | //Read the dd's
303 | ddm, l, _ := readSignedHuff(src[idx:])
304 | idx += l
305 | var dde, ddt int64 = 0, 0
306 | if ddm&2 != 0 {
307 | //log.Warning("re")
308 | dde, l, _ = readSignedHuff(src[idx:])
309 | idx += l
310 | }
311 | if ddm&1 != 0 {
312 | //log.Warning("rt")
313 | ddt, l, _ = readSignedHuff(src[idx:])
314 | idx += l
315 | }
316 | ddm >>= 2
317 | //Convert dd's to d's
318 | dm := ddm + adm
319 | dt := ddt + adt
320 | de := dde + ade
321 |
322 | //Save the deltas in the history
323 | hist_deltas_t[delta_idx] = dt
324 | hist_deltas_m[delta_idx] = dm
325 | hist_deltas_e[delta_idx] = de
326 | delta_idx++
327 | if delta_idx == delta_depth {
328 | delta_idx = 0
329 | }
330 | num_deltas++
331 |
332 | //Save values
333 | e := em1 + de
334 | m := mm1 + dm
335 | v.Time[i] = tm1 + dt
336 | v.Value[i] = recompose(uint16(e), uint64(m))
337 | em1 += de
338 | mm1 += dm
339 | tm1 += dt
340 | }
341 | }
342 |
343 | func (c *Coreblock) Serialize(dst []byte) []byte {
344 | /*
345 | Addr delta-delta / abszero
346 | Count delta +isnz(cgen)
347 | CGeneration delta-delta
348 | Mean delta-delta (mantissa contains isnz(e))
349 | Min delta-delta (mantissa contains isnz(e))
350 | Max delta-delta (mantissa contains isnz(e))
351 |
352 | TL;DR the code is the documentation MWAHAHAHA
353 | */
354 |
355 | idx := 1
356 | dst[0] = byte(Core)
357 |
358 | const delta_depth = 3
359 |
360 | deltadeltarizer := func(maxdepth int) func(value int64) int64 {
361 | hist_delta := make([]int64, maxdepth)
362 | var depth int = 0
363 | insidx := 0
364 | var last_value int64
365 | dd := func(value int64) int64 {
366 | var total_dt int64 = 0
367 | for i := 0; i < depth; i++ {
368 | total_dt += hist_delta[i]
369 | }
370 | var avg_dt int64 = 0
371 | if depth > 0 {
372 | avg_dt = total_dt / int64(depth)
373 | }
374 | curdelta := value - last_value
375 | last_value = value
376 | ddelta := curdelta - avg_dt
377 | hist_delta[insidx] = curdelta
378 | insidx = (insidx + 1) % maxdepth
379 | depth += 1
380 | if depth > maxdepth {
381 | depth = maxdepth
382 | }
383 | return ddelta
384 | }
385 | return dd
386 | }
387 | dd_addr := deltadeltarizer(delta_depth)
388 | dd_cgen := deltadeltarizer(delta_depth)
389 | dd_count := deltadeltarizer(delta_depth)
390 | dd_mean_m := deltadeltarizer(delta_depth)
391 | dd_mean_e := deltadeltarizer(delta_depth)
392 | dd_min_m := deltadeltarizer(delta_depth)
393 | dd_min_e := deltadeltarizer(delta_depth)
394 | dd_max_m := deltadeltarizer(delta_depth)
395 | dd_max_e := deltadeltarizer(delta_depth)
396 |
397 | //Look for bottomable idx
398 | bottomidx := -1
399 | for i := KFACTOR - 1; i >= 0; i-- {
400 | if c.Addr[i] == 0 && c.CGeneration[i] == 0 {
401 | bottomidx = i
402 | } else {
403 | break
404 | }
405 | }
406 | for i := 0; i < KFACTOR; i++ {
407 | if i == bottomidx {
408 | idx += writeFullZero(dst[idx:])
409 | break
410 | }
411 | if c.Addr[i] == 0 {
412 | idx += writeAbsZero(dst[idx:])
413 | idx += writeSignedHuff(dst[idx:], dd_cgen(int64(c.CGeneration[i])))
414 | } else {
415 | idx += writeSignedHuff(dst[idx:], dd_addr(int64(c.Addr[i])))
416 |
417 | min_e, min_m := decompose(c.Min[i])
418 | min_m_dd := dd_min_m(int64(min_m))
419 | min_e_dd := dd_min_e(int64(min_e))
420 | min_m_dd <<= 1
421 | if min_e_dd != 0 {
422 | min_m_dd |= 1
423 | }
424 |
425 | mean_e, mean_m := decompose(c.Mean[i])
426 | mean_m_dd := dd_mean_m(int64(mean_m))
427 | mean_e_dd := dd_mean_e(int64(mean_e))
428 | mean_m_dd <<= 1
429 | if mean_e_dd != 0 {
430 | mean_m_dd |= 1
431 | }
432 |
433 | max_e, max_m := decompose(c.Max[i])
434 | max_m_dd := dd_max_m(int64(max_m))
435 | max_e_dd := dd_max_e(int64(max_e))
436 | max_m_dd <<= 1
437 | if max_e_dd != 0 {
438 | max_m_dd |= 1
439 | }
440 |
441 | cgen_dd := dd_cgen(int64(c.CGeneration[i]))
442 |
443 | cnt := dd_count(int64(c.Count[i]))
444 | cnt <<= 1
445 | if cgen_dd != 0 {
446 | cnt |= 1
447 | }
448 | idx += writeSignedHuff(dst[idx:], cnt)
449 | if cgen_dd != 0 {
450 | idx += writeSignedHuff(dst[idx:], cgen_dd)
451 | }
452 | idx += writeSignedHuff(dst[idx:], min_m_dd)
453 | if min_e_dd != 0 {
454 | idx += writeSignedHuff(dst[idx:], min_e_dd)
455 | }
456 | idx += writeSignedHuff(dst[idx:], mean_m_dd)
457 | if mean_e_dd != 0 {
458 | idx += writeSignedHuff(dst[idx:], mean_e_dd)
459 | }
460 | idx += writeSignedHuff(dst[idx:], max_m_dd)
461 | if max_e_dd != 0 {
462 | idx += writeSignedHuff(dst[idx:], max_e_dd)
463 | }
464 | }
465 | //log.Warning("Finished SER %v, idx is %v", i, idx)
466 | }
467 | return dst[:idx]
468 | }
469 |
470 | func (c *Coreblock) Deserialize(src []byte) {
471 | //check 0 for id
472 | if src[0] != byte(Core) {
473 | lg.Panic("This is not a core block")
474 | }
475 | idx := 1
476 | dedeltadeltarizer := func(maxdepth int) func(dd int64) int64 {
477 | hist_delta := make([]int64, maxdepth)
478 | depth := 0
479 | insidx := 0
480 | var last_value int64 = 0
481 | decode := func(dd int64) int64 {
482 | var total_dt int64 = 0
483 | for i := 0; i < depth; i++ {
484 | total_dt += hist_delta[i]
485 | }
486 | var avg_dt int64 = 0
487 | if depth > 0 {
488 | avg_dt = total_dt / int64(depth)
489 | }
490 | curdelta := avg_dt + dd
491 | curvalue := last_value + curdelta
492 | last_value = curvalue
493 | hist_delta[insidx] = curdelta
494 | insidx = (insidx + 1) % maxdepth
495 | depth += 1
496 | if depth > maxdepth {
497 | depth = maxdepth
498 | }
499 | return last_value
500 | }
501 | return decode
502 | }
503 |
504 | const delta_depth = 3
505 | dd_addr := dedeltadeltarizer(delta_depth)
506 | dd_cgen := dedeltadeltarizer(delta_depth)
507 | dd_count := dedeltadeltarizer(delta_depth)
508 | dd_mean_m := dedeltadeltarizer(delta_depth)
509 | dd_mean_e := dedeltadeltarizer(delta_depth)
510 | dd_min_m := dedeltadeltarizer(delta_depth)
511 | dd_min_e := dedeltadeltarizer(delta_depth)
512 | dd_max_m := dedeltadeltarizer(delta_depth)
513 | dd_max_e := dedeltadeltarizer(delta_depth)
514 |
515 | i := 0
516 | for ; i < KFACTOR; i++ {
517 |
518 | //Get addr
519 | addr_dd, used, bottom := readSignedHuff(src[idx:])
520 | idx += used
521 | if bottom == ABSZERO {
522 | c.Addr[i] = 0
523 | c.Count[i] = 0
524 | //min/mean/max are undefined
525 | //Still have to decode cgen
526 | cgen_dd, used, _ := readSignedHuff(src[idx:])
527 | idx += used
528 | cgen := uint64(dd_cgen(cgen_dd))
529 | c.CGeneration[i] = cgen
530 | } else if bottom == FULLZERO {
531 | break
532 | } else {
533 | //Real value
534 | c.Addr[i] = uint64(dd_addr(addr_dd))
535 |
536 | cnt_dd, used, _ := readSignedHuff(src[idx:])
537 | idx += used
538 |
539 | var cgen_dd int64 = 0
540 | if cnt_dd&1 != 0 {
541 | cgen_dd, used, _ = readSignedHuff(src[idx:])
542 | idx += used
543 | }
544 | cnt_dd >>= 1
545 | c.CGeneration[i] = uint64(dd_cgen(cgen_dd))
546 | c.Count[i] = uint64(dd_count(cnt_dd))
547 |
548 | min_m_dd, used, _ := readSignedHuff(src[idx:])
549 | idx += used
550 | var min_e_dd int64
551 | if min_m_dd&1 != 0 {
552 | min_e_dd, used, _ = readSignedHuff(src[idx:])
553 | idx += used
554 | } else {
555 | min_e_dd = 0
556 | }
557 | min_m_dd >>= 1
558 | c.Min[i] = recompose(uint16(dd_min_e(min_e_dd)), uint64(dd_min_m(min_m_dd)))
559 |
560 | mean_m_dd, used, _ := readSignedHuff(src[idx:])
561 | idx += used
562 | var mean_e_dd int64
563 | if mean_m_dd&1 != 0 {
564 | mean_e_dd, used, _ = readSignedHuff(src[idx:])
565 | idx += used
566 | } else {
567 | mean_e_dd = 0
568 | }
569 | mean_m_dd >>= 1
570 | c.Mean[i] = recompose(uint16(dd_mean_e(mean_e_dd)), uint64(dd_mean_m(mean_m_dd)))
571 |
572 | max_m_dd, used, _ := readSignedHuff(src[idx:])
573 | idx += used
574 | var max_e_dd int64
575 | if max_m_dd&1 != 0 {
576 | max_e_dd, used, _ = readSignedHuff(src[idx:])
577 | idx += used
578 | } else {
579 | max_e_dd = 0
580 | }
581 | max_m_dd >>= 1
582 | c.Max[i] = recompose(uint16(dd_max_e(max_e_dd)), uint64(dd_max_m(max_m_dd)))
583 | }
584 | //log.Warning("Finishing deser idx %v, idx is %v",i, idx)
585 | }
586 |
587 | //Clear out from a FULLZERO
588 | for ; i < KFACTOR; i++ {
589 | c.Addr[i] = 0
590 | c.Count[i] = 0
591 | c.CGeneration[i] = 0
592 |
593 | }
594 | }
595 |
596 | //These functions allow us to read/write the packed numbers in the datablocks
597 | //These are huffman encoded in big endian
598 | // 0xxx xxxx 7 0x00
599 | // 10xx xxxx +1 14 0x80
600 | // 1100 xxxx +2 20 0xC0
601 | // 1101 xxxx +3 28 0xD0
602 | // 1110 xxxx +4 36 0xE0
603 | // 1111 00xx +5 42 0xF0
604 | // 1111 01xx +6 50 0xF4
605 | // 1111 10xx +7 58 0xF8
606 | // 1111 1100 +8 64 0xFC
607 | // 1111 1101 +0 ABSZERO (special symbol) 0xFD
608 | // 1111 1110 +0 FULLZERO (special symbol) 0xFE
609 | const VALUE = 0
610 | const ABSZERO = 1
611 | const FULLZERO = 2
612 |
613 | func writeUnsignedHuff(dst []byte, val uint64) int {
614 | //log.Warning("wuh called dstlen %v",len(dst))
615 | i := 0
616 | var do_rest func(n uint8)
617 | do_rest = func(n uint8) {
618 | if n == 0 {
619 | return
620 | }
621 | dst[i] = byte((val >> ((n - 1) * 8)) & 0xFF)
622 | i++
623 | do_rest(n - 1)
624 | }
625 | if val < (1 << 7) {
626 | dst[i] = byte(val)
627 | i++
628 | } else if val < (1 << 14) {
629 | dst[i] = byte(0x80 | val>>8)
630 | i++
631 | do_rest(1)
632 | } else if val < (1 << 20) {
633 | dst[i] = byte(0xC0 | val>>16)
634 | i++
635 | do_rest(2)
636 | } else if val < (1 << 28) {
637 | dst[i] = byte(0xD0 | val>>24)
638 | i++
639 | do_rest(3)
640 | } else if val < (1 << 36) {
641 | dst[i] = byte(0xE0 | val>>32)
642 | i++
643 | do_rest(4)
644 | } else if val < (1 << 42) {
645 | dst[i] = byte(0xF0 | val>>40)
646 | i++
647 | do_rest(5)
648 | } else if val < (1 << 50) {
649 | dst[i] = byte(0xF4 | val>>48)
650 | i++
651 | do_rest(6)
652 | } else if val < (1 << 58) {
653 | dst[i] = byte(0xF8 | val>>56)
654 | i++
655 | do_rest(7)
656 | } else {
657 | dst[i] = 0xFC
658 | i++
659 | do_rest(8)
660 | }
661 | return i
662 | }
663 | func writeAbsZero(dst []byte) int {
664 | dst[0] = 0xFD
665 | return 1
666 | }
667 | func writeFullZero(dst []byte) int {
668 | dst[0] = 0xFE
669 | return 1
670 | }
671 | func writeSignedHuff(dst []byte, val int64) int {
672 | if val < 0 {
673 | return writeUnsignedHuff(dst, (uint64(-val)<<1 | 1))
674 | } else {
675 | return writeUnsignedHuff(dst, uint64(val)<<1)
676 | }
677 | }
678 | func readUnsignedHuff(src []byte) (uint64, int, int) {
679 | var rv uint64
680 | i := 1
681 | var do_rest func(n uint8)
682 | do_rest = func(n uint8) {
683 | if n == 0 {
684 | return
685 | }
686 | rv <<= 8
687 | rv |= uint64(src[i])
688 | i++
689 | do_rest(n - 1)
690 | }
691 | if src[0] > 0xFE {
692 | lg.Panicf("This huffman symbol is reserved: +v", src[0])
693 | } else if src[0] == 0xFD {
694 | return 0, 1, ABSZERO
695 | } else if src[0] == 0xFE {
696 | return 0, 1, FULLZERO
697 | } else if src[0] == 0xFC {
698 | do_rest(8)
699 | } else if src[0] >= 0xF8 {
700 | rv = uint64(src[0] & 0x03)
701 | do_rest(7)
702 | } else if src[0] >= 0xF4 {
703 | rv = uint64(src[0] & 0x03)
704 | do_rest(6)
705 | } else if src[0] >= 0xF0 {
706 | rv = uint64(src[0] & 0x03)
707 | do_rest(5)
708 | } else if src[0] >= 0xE0 {
709 | rv = uint64(src[0] & 0x0F)
710 | do_rest(4)
711 | } else if src[0] >= 0xD0 {
712 | rv = uint64(src[0] & 0x0F)
713 | do_rest(3)
714 | } else if src[0] >= 0xC0 {
715 | rv = uint64(src[0] & 0x0F)
716 | do_rest(2)
717 | } else if src[0] >= 0x80 {
718 | rv = uint64(src[0] & 0x3F)
719 | do_rest(1)
720 | } else {
721 | rv = uint64(src[0] & 0x7F)
722 | }
723 | return rv, i, VALUE
724 | }
725 | func readSignedHuff(src []byte) (int64, int, int) {
726 | v, l, bv := readUnsignedHuff(src)
727 | if bv != VALUE {
728 | return 0, 1, bv
729 | }
730 | s := v & 1
731 | v >>= 1
732 | if s == 1 {
733 | return -int64(v), l, VALUE
734 | }
735 | return int64(v), l, VALUE
736 | }
737 |
738 | //This composes a float into a weird representation that was empirically determined to be
739 | //ideal for compression of Quasar streams.
740 | //First we split out the sign, exponent and mantissa from the float
741 | //Then we reverse the bytes in the mantissa (bits are better but slower)
742 | //Then we left shift it and stick the sign bit as the LSB
743 | //The result is the (unsigned) exponent and the mantissa-sortof-thingy
744 | func decompose(val float64) (e uint16, m uint64) {
745 | iv := math.Float64bits(val)
746 | s := iv >> 63
747 | exp := (iv >> 52) & 2047
748 | iv = iv & ((1 << 52) - 1)
749 | //Take the bottom 7 bytes and reverse them. Top byte is left zero
750 | // . . . . . .
751 | m = ((iv&0x00000000000000FF)<<(6*8) |
752 | (iv&0x000000000000FF00)<<(4*8) |
753 | (iv&0x0000000000FF0000)<<(2*8) |
754 | (iv & 0x00000000FF000000) |
755 | (iv&0x000000FF00000000)>>(2*8) |
756 | (iv&0x0000FF0000000000)>>(4*8) |
757 | (iv&0x00FF000000000000)>>(6*8))
758 | e = (uint16(exp) << 1) | uint16(s)
759 | return
760 | }
761 |
762 | func recompose(e uint16, m uint64) float64 {
763 | s := e & 1
764 | e >>= 1
765 | iv := ((m&0x00000000000000FF)<<(6*8) |
766 | (m&0x000000000000FF00)<<(4*8) |
767 | (m&0x0000000000FF0000)<<(2*8) |
768 | (m & 0x00000000FF000000) |
769 | (m&0x000000FF00000000)>>(2*8) |
770 | (m&0x0000FF0000000000)>>(4*8) |
771 | (m&0x00FF000000000000)>>(6*8))
772 | iv |= uint64(e) << 52
773 | iv |= uint64(s) << 63
774 | return math.Float64frombits(iv)
775 | }
776 |
--------------------------------------------------------------------------------