├── qci
    ├── __init__.py
    ├── manual_run.sh
    ├── test_loadgen.ipy
    ├── test_loadgen2.ipy
    ├── test_loadgen3.ipy
    ├── test_endpoint.ipy
    ├── test_readwrite1.ipy
    ├── test_readwrite2.ipy
    ├── test_changedrange.ipy
    ├── utils.ipy
    ├── runtests.ipy
    └── test_readstat1.ipy
├── .gitignore
├── internal
    ├── cephprovider
    │   ├── test
    │   ├── cephprovider.h
    │   ├── cephcache.go
    │   ├── cephprovider.c
    │   └── cephprovider.go
    ├── bstore
    │   ├── bstore.go
    │   ├── linker.go
    │   ├── blockcache.go
    │   ├── blocktypes_test.go
    │   ├── bstore_test.go
    │   ├── blockstore.go
    │   └── blocktypes.go
    ├── bprovider
    │   ├── bprovider.go
    │   └── bprovider_test.go
    └── fileprovider
    │   └── fileprovider.go
├── cpinterface
    ├── go.capnp
    ├── interface.capnp
    └── cpinterface.go
├── Makefile
├── .project
├── tools
    ├── addtarget
    └── scrub
├── btrdb.conf
├── quasar.conf
├── logconfig.xml
├── README.md
├── btrdbd
    ├── main.go
    └── config.go
├── qtree
    ├── operators.go
    ├── qtree_test.go
    ├── qtree_utils.go
    └── qtree2_test.go
├── quasar.go
└── quasar_test.go


/qci/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | bin/*
2 | *.pyc
3 | pkg/*
4 | src/*
5 | *~
6 | .idea
7 | quasar.iml
8 | *.log
9 | 


--------------------------------------------------------------------------------
/internal/cephprovider/test:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UlricQin/btrdb/master/internal/cephprovider/test


--------------------------------------------------------------------------------
/cpinterface/go.capnp:
--------------------------------------------------------------------------------
1 | @0xd12a1c51fedd6c88;
2 | annotation package(file) :Text;
3 | annotation import(file) :Text;
4 | annotation doc(struct, field, enum) :Text;
5 | annotation tag(enumerant) : Text;
6 | annotation notag(enumerant) : Void;
7 | annotation customtype(field) : Text;
8 | $package("capn");
9 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | bqserver:
 3 | 	go build -o bin/qserver github.com/SoftwareDefinedBuildings/quasar/qserver
 4 | 
 5 | cleanbins:
 6 | 	rm -f bin/qserver bin/qtool
 7 | 
 8 | bins: cleanbins bqserver 
 9 | 
10 | cleandb:
11 | 	rm -f /srv/quasar/*.db
12 | 	rm -f /srv/quasartestdb/*
13 | 	mongo quasar2 --eval 'db.superblocks.remove({})'
14 | 
15 | newdbs: cleandb bins
16 | 	./bin/qserver -makedb
17 | 


--------------------------------------------------------------------------------
/qci/manual_run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -ex
 2 | rm -f profile.*
 3 | rm -f log.*
 4 | export goversion=go_64_1.4.2
 5 | export GOROOT=/srv/$goversion
 6 | export GO=$GOROOT/bin/go
 7 | mkdir -p gopath
 8 | export GOPATH=`pwd`/gopath
 9 | export PATH=$PATH:$GOROOT/bin/
10 | git pull
11 | $GO get -v -d ./...
12 | $GO build -a -v -o exe ./qserver
13 | export CEPHTYPE=filestore
14 | export TEST_TYPE=loadgen2
15 | ipython qci/runtests.ipy
16 | 


--------------------------------------------------------------------------------
/.project:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <projectDescription>
 3 | 	<name>quasar</name>
 4 | 	<comment></comment>
 5 | 	<projects>
 6 | 	</projects>
 7 | 	<buildSpec>
 8 | 		<buildCommand>
 9 | 			<name>com.googlecode.goclipse.goBuilder</name>
10 | 			<arguments>
11 | 			</arguments>
12 | 		</buildCommand>
13 | 	</buildSpec>
14 | 	<natures>
15 | 		<nature>com.googlecode.goclipse.core.goNature</nature>
16 | 	</natures>
17 | </projectDescription>
18 | 


--------------------------------------------------------------------------------
/tools/addtarget:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | if len(sys.argv) != 3:
 3 |     print "usage: addtarget <database> <collection> <regex>"
 4 |     sys.exit(1)
 5 |     
 6 | _client = pymongo.MongoClient()
 7 | db = _client[sys.argv[1]][sys.argv[2]]
 8 | 
 9 | outf = open("targets","a")
10 | print >>outf, "#addtargets %s:",sys.argv[3]
11 | for r in db.find({"Path":{"$regex":sys.argv[3]}}):
12 |     print >>outf, "# %s : %s" % (r["Metadata"]["SourceName"], r["Path"])
13 |     print >>outf, r["uuid"]
14 | printf >>outf, "\n"
15 | 
16 | outf.close()


--------------------------------------------------------------------------------
/internal/bstore/bstore.go:
--------------------------------------------------------------------------------
 1 | package bstore
 2 | 
 3 | import (
 4 | 	"github.com/op/go-logging"
 5 | )
 6 | 
 7 | var lg *logging.Logger
 8 | 
 9 | func init() {
10 | 	lg = logging.MustGetLogger("log")
11 | }
12 | 
13 | //Note to self, if you bump VSIZE such that the max blob goes past 2^16, make sure to adapt
14 | //providers
15 | const (
16 | 	VSIZE           = 1024
17 | 	KFACTOR         = 64
18 | 	VBSIZE          = 2 + 9*VSIZE + 9*VSIZE + 2*VSIZE //Worst case with huffman
19 | 	CBSIZE          = 1 + KFACTOR*9*6
20 | 	DBSIZE          = VBSIZE
21 | 	PWFACTOR        = uint8(6) //1<<6 == 64
22 | 	RELOCATION_BASE = 0xFF00000000000000
23 | )
24 | 


--------------------------------------------------------------------------------
/internal/cephprovider/cephprovider.h:
--------------------------------------------------------------------------------
 1 | #include <stdint.h>
 2 | #include <rados/librados.h>
 3 | 
 4 | typedef struct
 5 | {
 6 | 	rados_ioctx_t ctx;
 7 | 	rados_completion_t *comps;
 8 | 	int comp_len;
 9 | 	int comp_cap;
10 | } cephprovider_handle_t;
11 | 
12 | typedef cephprovider_handle_t* phandle_t;
13 | 
14 | void initialize_provider(const char* conffile, const char* pool);
15 | phandle_t handle_create();
16 | void handle_write(phandle_t seg, uint8_t *uuid, uint64_t address, const char* data, int len, int trunc);
17 | uint64_t handle_obtainrange(cephprovider_handle_t *h);
18 | void handle_init_allocator(cephprovider_handle_t *h);
19 | int handle_read(phandle_t seg, uint8_t *uuid, uint64_t address, char* dest, int len);
20 | void handle_close(phandle_t seg);
21 | 


--------------------------------------------------------------------------------
/qci/test_loadgen.ipy:
--------------------------------------------------------------------------------
 1 | def start_loadgen():
 2 |     stdout=open("log.lg.stdout","w")
 3 |     uuids = [str(uuid.uuid4()) for x in xrange(20)]
 4 |     rc = subprocess.call(["./loadgen", "-i"]+uuids, stdout=stdout, stderr=subprocess.STDOUT)
 5 |     print "INSERT RV:",rc
 6 |     if rc != 0:
 7 |         sys.exit(rc)
 8 |     sys.stdout.flush()
 9 |     rc = subprocess.call(["./loadgen", "-v"]+uuids, stdout=stdout, stderr=subprocess.STDOUT)
10 |     print "VERIFY RV:",rc
11 |     if rc != 0:
12 |         sys.exit(rc)
13 | 
14 | 
15 | p2 = Process(target=start_loadgen)
16 | p2.start()
17 | p2.join()
18 | if p2.exitcode != 0:
19 |     print "EXITCODE FROM LOADGEN:", p2.exitcode
20 |     os.kill(p.pid, 9)
21 |     sys.exit(p2.exitcode)
22 | else:
23 |     !rm FAILURE
24 | 


--------------------------------------------------------------------------------
/btrdb.conf:
--------------------------------------------------------------------------------
 1 | # This is the configuration file for QUASAR version 2
 2 | # without this file, it will not start. It should be
 3 | # located either in the directory from which quasar is
 4 | # started, or in /etc/quasar/quasar.conf
 5 | 
 6 | [storage]
 7 | # Either file-based or Ceph-based storage can be used
 8 | provider=file
 9 | filepath=/srv/quasar/
10 | 
11 | #provider=ceph
12 | #cephconf=/etc/ceph/ceph.conf
13 | #cephpool=data
14 | 
15 | [http]
16 | enabled=true
17 | port=9000
18 | address=0.0.0.0
19 | 
20 | [capnp]
21 | enabled=true
22 | port=4410
23 | address=0.0.0.0
24 | 
25 | [mongo]
26 | server=localhost
27 | collection=quasar
28 | 
29 | [cache]
30 | # Configure the RADOS and block caches. If you have a choice, rather 
31 | # spend memory on the block cache.
32 | 
33 | # This is measured in blocks, which are at most ~16K
34 | # blockcache=4000000 #64 GB
35 | blockcache=2000000 #32 GB
36 | # blockcache=1000000 #16 GB
37 | # blockcache=500000  #8 GB
38 | # blockcache=250000  #4 GB
39 | # blockcache=62500   #1 GB
40 | 
41 | radosreadcache=2048 #in MB
42 | radoswritecache=256  #in MB
43 | 
44 | [coalescence]
45 | earlytrip=16384 #readings
46 | interval=5000 #ms
47 | 


--------------------------------------------------------------------------------
/quasar.conf:
--------------------------------------------------------------------------------
 1 | # This is the configuration file for QUASAR version 2
 2 | # without this file, it will not start. It should be
 3 | # located either in the directory from which quasar is
 4 | # started, or in /etc/quasar/quasar.conf
 5 | 
 6 | [storage]
 7 | # Either file-based or Ceph-based storage can be used
 8 | provider=file
 9 | filepath=/srv/quasar/
10 | 
11 | #provider=ceph
12 | #cephconf=/etc/ceph/ceph.conf
13 | #cephpool=data
14 | 
15 | [http]
16 | enabled=true
17 | port=9000
18 | address=0.0.0.0
19 | 
20 | [capnp]
21 | enabled=true
22 | port=4410
23 | address=0.0.0.0
24 | 
25 | [mongo]
26 | server=localhost
27 | collection=quasar
28 | 
29 | [cache]
30 | # Configure the RADOS and block caches. If you have a choice, rather
31 | # spend memory on the block cache.
32 | 
33 | # This is measured in blocks, which are at most ~16K
34 | # blockcache=4000000 #64 GB
35 | blockcache=2000000 #32 GB
36 | # blockcache=1000000 #16 GB
37 | # blockcache=500000  #8 GB
38 | # blockcache=250000  #4 GB
39 | # blockcache=62500   #1 GB
40 | 
41 | radosreadcache=2048 #in MB
42 | radoswritecache=256  #in MB
43 | 
44 | [coalescence]
45 | earlytrip=16384 #readings
46 | interval=5000 #ms
47 | 


--------------------------------------------------------------------------------
/qci/test_loadgen2.ipy:
--------------------------------------------------------------------------------
 1 | import random
 2 | import uuid
 3 | import subprocess
 4 | import sys
 5 | 
 6 | num_streams = 1
 7 | def start_loadgen():
 8 |     global num_streams
 9 |     cf = open("loadConfig.ini", "w")
10 |     random_seed = random.randint(0,10000)
11 |     print "USING RANDOM SEED ", random_seed
12 |     cf.write("""TOTAL_RECORDS=24000000
13 | TCP_CONNECTIONS={0}
14 | POINTS_PER_MESSAGE=5000
15 | NANOS_BETWEEN_POINTS=9000000
16 | MAX_TIME_RANDOM_OFFSET = 8999999
17 | FIRST_TIME=1420582220083869629
18 | DB_ADDR=localhost:4410
19 | NUM_STREAMS={0}
20 | RAND_SEED={1}
21 | """.format(num_streams, random_seed)
22 |     )
23 | 
24 |     for i in xrange(num_streams):
25 |         cf.write("UUID%d=%s\n" % (i+1, uuid.uuid4()))
26 |     cf.close()
27 |     stdout=open("log.lg.1.stdout","w")
28 |     rc = subprocess.call(["./loadgen", "-i"], stdout=stdout, stderr=subprocess.STDOUT)
29 |     print "INSERT RV:",rc
30 |     if rc != 0:
31 |         sys.exit(rc)
32 |     sys.stdout.flush()
33 |     term_quasar()
34 |     time.sleep(2)
35 |     proc_profiles("ins")
36 |     start_quasar()
37 |     time.sleep(10)
38 |     stdout2=open("log.lg.2.stdout","w")
39 |     rc = subprocess.call(["./loadgen", "-v"], stdout=stdout2, stderr=subprocess.STDOUT)
40 |     print "VERIFY RV:",rc
41 |     if rc != 0:
42 |         sys.exit(rc)
43 |     !rm FAILURE
44 | 
45 | start_loadgen()
46 | 


--------------------------------------------------------------------------------
/qci/test_loadgen3.ipy:
--------------------------------------------------------------------------------
 1 | import random
 2 | import uuid
 3 | import subprocess
 4 | import sys
 5 | import time
 6 | num_streams = 1
 7 | def start_loadgen():
 8 |     global num_streams
 9 |     cf = open("loadConfig.ini", "w")
10 |     random_seed = random.randint(0,10000)
11 |     print "USING RANDOM SEED ", random_seed
12 |     cf.write("""TOTAL_RECORDS=24000000
13 | TCP_CONNECTIONS={0}
14 | POINTS_PER_MESSAGE=5000
15 | NANOS_BETWEEN_POINTS=9000000
16 | MAX_TIME_RANDOM_OFFSET = 8999999
17 | FIRST_TIME=1420582220083869629
18 | DB_ADDR=localhost:4410
19 | NUM_STREAMS={0}
20 | RAND_SEED={1}
21 | """.format(num_streams, random_seed)
22 |     )
23 | 
24 |     for i in xrange(num_streams):
25 |         cf.write("UUID%d=%s\n" % (i+1, uuid.uuid4()))
26 |     cf.close()
27 |     stdout=open("log.lg.stdout1","w")
28 |     rc = subprocess.call(["./loadgen", "-i"], stdout=stdout, stderr=subprocess.STDOUT)
29 |     print "INSERT RV:",rc
30 |     if rc != 0:
31 |         sys.exit(rc)
32 |     sys.stdout.flush()
33 |     term_quasar()
34 |     time.sleep(2)
35 |     proc_profiles("inst")
36 |     start_quasar()
37 |     time.sleep(4)
38 |     stdout2=open("log.lg.stdout2","w")
39 |     rc = subprocess.call(["./loadgen", "-d"], stdout=stdout2, stderr=subprocess.STDOUT)
40 |     print "DELETE RV:",rc
41 |     if rc != 0:
42 |         sys.exit(rc)
43 |     !rm FAILURE
44 | 
45 | 
46 | start_loadgen()
47 | 


--------------------------------------------------------------------------------
/qci/test_endpoint.ipy:
--------------------------------------------------------------------------------
 1 | import random
 2 | import uuid
 3 | import subprocess
 4 | import sys
 5 | import time
 6 | import json
 7 | def start_loadgen():
 8 |     global num_streams
 9 |     cf = open("loadConfig.ini", "w")
10 |     cf.write("""TOTAL_RECORDS=120000000
11 | TCP_CONNECTIONS=1
12 | POINTS_PER_MESSAGE=5000
13 | NANOS_BETWEEN_POINTS=9000000
14 | MAX_TIME_RANDOM_OFFSET = 8999999
15 | FIRST_TIME=1420582220083869629
16 | DB_ADDR=localhost:4410
17 | NUM_STREAMS=1
18 | UUID1=9f67541c-95ee-11e4-a7ac-0026b6df9cf2
19 | RAND_SEED=15
20 | """)
21 |     cf.close()
22 |     stdout=open("log.lg.stdout1","w")
23 |     rc = subprocess.call(["./loadgen", "-i"], stdout=stdout, stderr=subprocess.STDOUT)
24 |     print "INSERT RV:",rc
25 |     if rc != 0:
26 |         sys.exit(rc)
27 |     sys.stdout.flush()
28 |     term_quasar()
29 |     time.sleep(2)
30 |     proc_profiles("inst")
31 |     start_quasar()
32 |     time.sleep(4)
33 |     
34 |     # Check whether we have extra points
35 |     dstr = !curl -s "http://localhost:9000/data/uuid/9f67541c-95ee-11e4-a7ac-0026b6df9cf2?starttime=1421395993269633024&endtime=1421455504336486400&unitoftime=ns&pw=37"
36 |     data = json.loads(dstr[0])
37 |     lastpoint = data[0]["XReadings"][-1]
38 |     lasttime = (lastpoint[0] * 1000000) + lastpoint[1]
39 |     if lasttime != (1421455504336486400 - (2 ** 37)):
40 |         print "Extra or missing points detected at end of statistical query"
41 |         print "last time:", lasttime
42 |         sys.exit(1)
43 |     !rm FAILURE
44 | 
45 | 
46 | start_loadgen()
47 | 


--------------------------------------------------------------------------------
/logconfig.xml:
--------------------------------------------------------------------------------
 1 | <logging>
 2 |   <filter enabled="true">
 3 |     <tag>stdout</tag>
 4 |     <type>console</type>
 5 |     <!-- level is (:?FINEST|FINE|DEBUG|TRACE|INFO|WARNING|ERROR) -->
 6 |     <level>DEBUG</level>
 7 |   </filter>
 8 |   <filter enabled="true">
 9 |     <tag>file</tag>
10 |     <type>file</type>
11 |     <level>FINEST</level>
12 |     <property name="filename">quasar.log</property>
13 |     <property name="format">[%D %T] [%L] (%S) %M</property>
14 |     <property name="rotate">false</property> <!-- true enables log rotation, otherwise append -->
15 |     <property name="maxsize">20M</property> <!-- \d+[KMG]? Suffixes are in terms of 2**10 -->
16 |     <property name="maxlines">0K</property> <!-- \d+[KMG]? Suffixes are in terms of thousands -->
17 |     <property name="daily">true</property> <!-- Automatically rotates when a log message is written after midnight -->
18 |   </filter>
19 |   <filter enabled="true">
20 |     <tag>file</tag>
21 |     <type>file</type>
22 |     <level>WARNING</level>
23 |     <property name="filename">quasar.serious.log</property>
24 |     <property name="format">[%D %T] [%L] (%S) %M</property>
25 |     <property name="rotate">false</property> <!-- true enables log rotation, otherwise append -->
26 |     <property name="maxsize">20M</property> <!-- \d+[KMG]? Suffixes are in terms of 2**10 -->
27 |     <property name="maxlines">0K</property> <!-- \d+[KMG]? Suffixes are in terms of thousands -->
28 |     <property name="daily">true</property> <!-- Automatically rotates when a log message is written after midnight -->
29 |   </filter>
30 |   </logging>
31 | 


--------------------------------------------------------------------------------
/qci/test_readwrite1.ipy:
--------------------------------------------------------------------------------
 1 | 
 2 | import qdf
 3 | import qdf.quasar
 4 | import sys
 5 | import random
 6 | import uuid
 7 | import time
 8 | from twisted.internet import defer, protocol, reactor
 9 | print "entered test readwrite1"
10 | EXIT_CODE = None
11 | def setexit(code):
12 |     global EXIT_CODE
13 |     EXIT_CODE = code
14 |     reactor.stop()
15 | 
16 | @defer.inlineCallbacks
17 | def testbody(db):
18 |     print "connected"
19 |     TOTALPOINTS = 1000000
20 |     PERINSERT = 1000
21 |     INTERVAL = int(1E9/120.)
22 |     UID = str(uuid.uuid4())
23 |     randomdata = [(x*INTERVAL, random.random()) for x in xrange(TOTALPOINTS)]
24 |     idx = 0
25 |     print "random data generated"
26 |     for i in xrange(TOTALPOINTS/PERINSERT):
27 |         yield db.insertValues(UID, randomdata[idx:idx+PERINSERT])
28 |         idx += PERINSERT
29 |     time.sleep(20)
30 |     readdata = []
31 |     idx = 0
32 |     print "reading data"
33 |     for i in xrange(TOTALPOINTS/PERINSERT):
34 |         (status, rv) = yield db.queryStandardValues(UID, i*INTERVAL*PERINSERT, (i+1)*INTERVAL*PERINSERT)
35 |         (version, values) = rv
36 |         readdata += [(v.time, v.value) for v in values]
37 |     print "len readdata:",len(readdata)
38 |     print "len insert:",len(randomdata)
39 |     for i in xrange(len(randomdata)):
40 |         if randomdata[i][0] != readdata[i][0]:
41 |             print "time mismatch index",i
42 |             break
43 |         if randomdata[i][1] != readdata[i][1]:
44 |             print "value mismatch index",i
45 |             break
46 |     else:
47 |         print "lists match"
48 |         setexit(0)
49 |         return
50 |     setexit(1)
51 |     return    
52 | 
53 | def onFail(param):
54 |     print "Encountered error: ", param
55 |     setexit(2)
56 | 
57 | def entrypoint():
58 |     print "in entrypoint"
59 |     try:
60 |         q = qdf.quasar.connectToArchiver("localhost", 4410)
61 |         q.addCallback(testbody)
62 |         q.addErrback(onFail)
63 |     except Exception as e:
64 |         print "ex: ",e
65 |         setexit(1)
66 | 
67 | reactor.callWhenRunning(entrypoint)
68 | reactor.run()
69 | if EXIT_CODE == None:
70 |     EXIT_CODE = 42
71 | if EXIT_CODE != 0:
72 |     sys.exit(EXIT_CODE)
73 | else:
74 |     !rm FAILURE
75 | 


--------------------------------------------------------------------------------
/tools/scrub:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import pymongo
 3 | import uuid
 4 | import rados
 5 | import sys
 6 | import time
 7 | 
 8 | if len(sys.argv) != 4:
 9 |     print "usage: scrub <pool> <collection> <uuidfile>"
10 |     sys.exit(1)
11 | 
12 | _client = pymongo.MongoClient()
13 | db = _client[sys.argv[2]].superblocks
14 | 
15 | 
16 | uuids = []
17 | #Get target uuids
18 | with open(sys.argv[3],"r") as uf:
19 |     for l in uf.readlines():
20 |         l = l.strip()
21 |         if l.startswith("#") or len(l) == 0:
22 |             continue
23 |         uuids.append(uuid.UUID(l))
24 | 
25 | #Get all metadata uuids
26 | known_uuids = [uuid.UUID(x) for x in db.distinct("uuid")]
27 | print "There are %d known uuids" % len(known_uuids)
28 | 
29 | #Get all object names for these uuids
30 | cluster = rados.Rados(conffile="/etc/ceph/ceph.conf")
31 | print "Will attempt to connect to: " + str(cluster.conf_get('mon initial members'))
32 | 
33 | cluster.connect()
34 | time.sleep(1)
35 | for i in xrange(10):
36 |     try:
37 |         cluster.require_state("connected")
38 |         break
39 |     except rados.RadosStateError as e:
40 |         print e
41 |         print "Not connected yet"
42 |         time.sleep(1)
43 | 
44 | ioctx = cluster.open_ioctx(sys.argv[1])
45 | obj_iter = ioctx.list_objects()
46 | 
47 | rogue_uuids = set()
48 | toremove = []
49 | total = 0
50 | for obj in obj_iter:
51 |     if obj.key == "allocator":
52 |         continue
53 |     total += 1
54 |     uid = uuid.UUID(obj.key[:32])
55 |     if uid not in known_uuids:
56 |         rogue_uuids.add(uid)
57 |     if uid in uuids:
58 |         toremove.append(obj.key)
59 | if total != 0:
60 |     print "A total of %d objects matched (%.2f%%)" % (len(toremove), (float(len(toremove))/total)*100)
61 | else:
62 |     print "No objects"
63 | print "There are %d rogue uuids" % len(rogue_uuids)
64 | 
65 | print "If you wish to continue and delete the quasar objects, type 'yes i really do' exactly"
66 | inp = raw_input(">")
67 | if inp != "yes i really do":
68 |     print "Aborting"
69 |     sys.exit(1)
70 | 
71 | for key in toremove:
72 |     print "Removing: ",key
73 |     ioctx.remove_object(key)
74 | 
75 | print "If you wish to continue and delete the metadata, type 'yes I really do' exactly"
76 | inp = raw_input(">")
77 | if inp != "yes I really do":
78 |     print "Aborting"
79 |     sys.exit(1)
80 | 
81 | for u in uuids:
82 |     print "Removing: ", str(u)
83 |     rv = db.remove({"uuid":str(u)})
84 |     print "OK, %d generations nuked" % rv["n"]
85 | 
86 | print "Success"
87 | 


--------------------------------------------------------------------------------
/internal/bprovider/bprovider.go:
--------------------------------------------------------------------------------
 1 | package bprovider
 2 | 
 3 | //A blob provider implements a simple interface for storing blobs
 4 | //An address base gets locked in the form of a segment, and then an arbitrary number of
 5 | //blobs are written sequentially from that base, with each write call returning the address
 6 | //of the base of the next write. At the end, the segment is unlocked.
 7 | //For reading, the blob provider needs to work out its own framing, as it gets given
 8 | //a start address and must magically return the blob corresponding to that address
 9 | //The addresses have no special form*, other than being uint64s. It is up to the provider
10 | //to encode whatever metadata it requires inside that uint64
11 | 
12 | //*I lied, addresses must not have the top byte as FF, those are reserved for relocation addresses
13 | 
14 | //In case it is not obvious, the challenge a bprovider faces is being able to hand out an address
15 | //and support an arbitrary sized blob being written to that address. At the moment the max size of
16 | //a blob can be determined by max(CBSIZE, VBSIZE) which is under 32k, but may be as little as 1k
17 | //for well compressed blocks.
18 | 
19 | import (
20 | 	"errors"
21 | )
22 | 
23 | var ErrNoSpace = errors.New("No more space")
24 | var ErrInvalidArgument = errors.New("Invalid argument")
25 | var ErrExists = errors.New("File exists")
26 | 
27 | type Segment interface {
28 | 	//Returns the address of the first free word in the segment when it was locked
29 | 	BaseAddress() uint64
30 | 
31 | 	//Unlocks the segment for the StorageProvider to give to other consumers
32 | 	//Implies a flush
33 | 	Unlock()
34 | 
35 | 	//Writes a slice to the segment, returns immediately
36 | 	//Returns nil if op is OK, otherwise ErrNoSpace or ErrInvalidArgument
37 | 	//It is up to the implementer to work out how to report no space immediately
38 | 	//The uint64 is the address to be used for the next write
39 | 	Write(uuid []byte, address uint64, data []byte) (uint64, error)
40 | 
41 | 	//Block until all writes are complete. Note this does not imply a flush of the underlying files.
42 | 	Flush()
43 | }
44 | type StorageProvider interface {
45 | 
46 | 	//Called at startup of a normal run
47 | 	Initialize(opts map[string]string)
48 | 
49 | 	//Called to create the database for the first time
50 | 	//Note that initialize is not called before this function call
51 | 	//and you can assume the program will exit shortly after this
52 | 	//function call
53 | 	CreateDatabase(opts map[string]string) error
54 | 
55 | 	// Lock a segment, or block until a segment can be locked
56 | 	// Returns a Segment struct
57 | 	LockSegment(uuid []byte) Segment
58 | 
59 | 	// Read the blob into the given buffer
60 | 	Read(uuid []byte, address uint64, buffer []byte) []byte
61 | }
62 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | BTrDB
 2 | =====
 3 | 
 4 | The Berkeley TRee DataBase is a high performance time series
 5 | database designed to support high density data storage applications.
 6 | This project used to be called QUASAR, but we have changed the name
 7 | partly to match publications, and partly as a flag day. The capnp interface
 8 | in BTrDB is designed to better support large queries and clusters and is not 
 9 | backwards compatible with the quasar interface.
10 | 
11 | ### Dependencies
12 | 
13 | BTrDB uses a MongoDB collection to store metadata. Also, if installed in High Availability
14 | mode, it requires a ceph pool. Note that even if not using ceph, librados needs to be 
15 | installed.
16 | 
17 | ### Installation
18 | 
19 | To run an archiver, make sure that you have Go >= 1.4 installed and then
20 | run the following:
21 | 
22 | ```
23 | apt-get install librados-dev
24 | go get github.com/SoftwareDefinedBuildings/btrdb/btrdbd
25 | ```
26 | 
27 | This will install the tools into your
28 | $GOPATH/bin directory. If you have this directory on your $PATH then you do
29 | not need to do anything further. Otherwise you will need to add the binaries
30 | to your $PATH variable manually. 
31 | 
32 | Note that in order to run the btrdb server, you will need to copy btrdb.conf
33 | from the github repository to /etc/btrdb/btrdb.conf (or the directory that
34 | you are in).
35 | 
36 | An alternative to 'go get'ing to your GOPATH is to clone the repository then do:
37 | 
38 | ```
39 | apt-get install librados-dev
40 | go get -d ./... && go install ./btrdbd
41 | ```
42 | 
43 | This will also put the btrdbd binary in your $GOPATH/bin.
44 | 
45 | ### Configuration
46 | 
47 | Sensible defaults (for a production deployment) are already found in btrdb.conf. Some things you may need
48 | to adjust:
49 |  - The MongoDB server and collection name
50 |  - The block cache size (defaults to 32GB). Note that quasar uses more than this, this is just
51 |    a primary contributor to the RAM footprint.
52 |  - The file storage path or ceph details
53 | 
54 | Once your configuration is set up, you can set up the files, and database indices with
55 | 
56 | ```
57 | btrdbd -makedb
58 | ```
59 | 
60 | Which should print out:
61 | ```
62 | Configuration OK!
63 | Creating a new database
64 | Done
65 | ```
66 | 
67 | You can now run a server with:
68 | ```
69 | btrdbd
70 | ```
71 | 
72 | ### Using the database
73 | 
74 | Note that we are presently working on release engineering, and hope to release the first (public) version in August 2016. If you are using it now, bear in mind it is still in development.
75 | 
76 | To communicate with the database, there are [go bindings](https://github.com/SoftwareDefinedBuildings/btrdb-go) and [python bindings](https://github.com/SoftwareDefinedBuildings/btrdb-python). The go bindings are faster and more maintained.
77 | 
78 | 
79 | 
80 | 


--------------------------------------------------------------------------------
/internal/bstore/linker.go:
--------------------------------------------------------------------------------
  1 | package bstore
  2 | 
  3 | import (
  4 | 	"log"
  5 | 	"sort"
  6 | 	"sync"
  7 | 
  8 | 	"github.com/SoftwareDefinedBuildings/btrdb/internal/bprovider"
  9 | )
 10 | 
 11 | var ser_buf_pool = sync.Pool{
 12 | 	New: func() interface{} {
 13 | 		return make([]byte, DBSIZE)
 14 | 	},
 15 | }
 16 | 
 17 | type pCBArr []*Coreblock
 18 | 
 19 | func (dca pCBArr) Len() int {
 20 | 	return len(dca)
 21 | }
 22 | 
 23 | func (dca pCBArr) Swap(i, j int) {
 24 | 	dca[i], dca[j] = dca[j], dca[i]
 25 | }
 26 | 
 27 | func (dca pCBArr) Less(i, j int) bool {
 28 | 	return dca[i].PointWidth < dca[j].PointWidth
 29 | }
 30 | 
 31 | func LinkAndStore(uuid []byte, bs *BlockStore, bp bprovider.StorageProvider, vblocks []*Vectorblock, cblocks []*Coreblock) map[uint64]uint64 {
 32 | 	loaned_sercbufs := make([][]byte, len(cblocks))
 33 | 	loaned_servbufs := make([][]byte, len(vblocks))
 34 | 
 35 | 	//First sort the vblock array (time before lock costs less)
 36 | 	sort.Sort(pCBArr(cblocks))
 37 | 
 38 | 	//Then lets lock a segment
 39 | 	seg := bp.LockSegment(uuid)
 40 | 
 41 | 	backpatch := make(map[uint64]uint64, len(cblocks)+len(vblocks)+1)
 42 | 	backpatch[0] = 0 //Null address is still null
 43 | 
 44 | 	ptr := seg.BaseAddress()
 45 | 
 46 | 	//First step is to write all the vector blocks, order is not important
 47 | 	for i := 0; i < len(vblocks); i++ {
 48 | 		vb := vblocks[i]
 49 | 
 50 | 		//Store relocation for cb backpatch
 51 | 		backpatch[vb.Identifier] = ptr
 52 | 
 53 | 		//Update the block. VB should now look as if it were read from disk
 54 | 		vb.Identifier = ptr
 55 | 		//So we can cache it
 56 | 		bs.cachePut(ptr, vb)
 57 | 
 58 | 		//Now write it
 59 | 		serbuf := ser_buf_pool.Get().([]byte)
 60 | 		cutdown := vb.Serialize(serbuf)
 61 | 		loaned_servbufs[i] = serbuf
 62 | 		nptr, err := seg.Write(uuid, ptr, cutdown)
 63 | 		if err != nil {
 64 | 			log.Panicf("Got error on segment write: %v", err)
 65 | 		}
 66 | 		ptr = nptr
 67 | 	}
 68 | 
 69 | 	//Now we need to write the coreblocks out
 70 | 	for i := 0; i < len(cblocks); i++ {
 71 | 		cb := cblocks[i]
 72 | 
 73 | 		//Relocate and backpatch
 74 | 		for k := 0; k < KFACTOR; k++ {
 75 | 			if cb.Addr[k] < RELOCATION_BASE {
 76 | 				continue
 77 | 			}
 78 | 			nval, ok := backpatch[cb.Addr[k]]
 79 | 			if !ok {
 80 | 				log.Panicf("Failed to backpatch! (trying to find addr 0x%016x)", cb.Addr[k])
 81 | 			}
 82 | 			cb.Addr[k] = nval
 83 | 		}
 84 | 		backpatch[cb.Identifier] = ptr
 85 | 		cb.Identifier = ptr
 86 | 		bs.cachePut(ptr, cb)
 87 | 
 88 | 		serbuf := ser_buf_pool.Get().([]byte)
 89 | 		cutdown := cb.Serialize(serbuf)
 90 | 		loaned_sercbufs[i] = serbuf
 91 | 		nptr, err := seg.Write(uuid, ptr, cutdown)
 92 | 		if err != nil {
 93 | 			log.Panicf("Got error on segment write: %v", err)
 94 | 		}
 95 | 		ptr = nptr
 96 | 	}
 97 | 	seg.Unlock()
 98 | 	//Return buffers to pool
 99 | 	for _, v := range loaned_sercbufs {
100 | 		ser_buf_pool.Put(v)
101 | 	}
102 | 	for _, v := range loaned_servbufs {
103 | 		ser_buf_pool.Put(v)
104 | 	}
105 | 	return backpatch
106 | }
107 | 


--------------------------------------------------------------------------------
/internal/bstore/blockcache.go:
--------------------------------------------------------------------------------
  1 | package bstore
  2 | 
  3 | import (
  4 | 	"time"
  5 | )
  6 | 
  7 | type CacheItem struct {
  8 | 	val   Datablock
  9 | 	vaddr uint64
 10 | 	newer *CacheItem
 11 | 	older *CacheItem
 12 | }
 13 | 
 14 | func (bs *BlockStore) initCache(size uint64) {
 15 | 	bs.cachemax = size
 16 | 	bs.cachemap = make(map[uint64]*CacheItem, size)
 17 | 	go func() {
 18 | 		for {
 19 | 			lg.Info("Cachestats: %d misses, %d hits, %.2f %%",
 20 | 				bs.cachemiss, bs.cachehit, (float64(bs.cachehit*100) / float64(bs.cachemiss+bs.cachehit)))
 21 | 			time.Sleep(5 * time.Second)
 22 | 		}
 23 | 	}()
 24 | }
 25 | 
 26 | //This function must be called with the mutex held
 27 | func (bs *BlockStore) cachePromote(i *CacheItem) {
 28 | 	if bs.cachenew == i {
 29 | 		//Already at front
 30 | 		return
 31 | 	}
 32 | 	if i.newer != nil {
 33 | 		i.newer.older = i.older
 34 | 	}
 35 | 	if i.older != nil {
 36 | 		i.older.newer = i.newer
 37 | 	}
 38 | 	if bs.cacheold == i && i.newer != nil {
 39 | 		//This was the tail of a list longer than 1
 40 | 		bs.cacheold = i.newer
 41 | 	} else if bs.cacheold == nil {
 42 | 		//This was/is the only item in the list
 43 | 		bs.cacheold = i
 44 | 	}
 45 | 
 46 | 	i.newer = nil
 47 | 	i.older = bs.cachenew
 48 | 	if bs.cachenew != nil {
 49 | 		bs.cachenew.newer = i
 50 | 	}
 51 | 	bs.cachenew = i
 52 | }
 53 | func (bs *BlockStore) cachePut(vaddr uint64, item Datablock) {
 54 | 	if bs.cachemax == 0 {
 55 | 		return
 56 | 	}
 57 | 	bs.cachemtx.Lock()
 58 | 	i, ok := bs.cachemap[vaddr]
 59 | 	if ok {
 60 | 		bs.cachePromote(i)
 61 | 	} else {
 62 | 		i = &CacheItem{
 63 | 			val:   item,
 64 | 			vaddr: vaddr,
 65 | 		}
 66 | 		bs.cachemap[vaddr] = i
 67 | 		bs.cachePromote(i)
 68 | 		bs.cachelen++
 69 | 		bs.cacheCheckCap()
 70 | 	}
 71 | 	bs.cachemtx.Unlock()
 72 | }
 73 | 
 74 | func (bs *BlockStore) cacheGet(vaddr uint64) Datablock {
 75 | 	if bs.cachemax == 0 {
 76 | 		bs.cachemiss++
 77 | 		return nil
 78 | 	}
 79 | 	bs.cachemtx.Lock()
 80 | 	rv, ok := bs.cachemap[vaddr]
 81 | 	if ok {
 82 | 		bs.cachePromote(rv)
 83 | 	}
 84 | 	bs.cachemtx.Unlock()
 85 | 	if ok {
 86 | 		bs.cachehit++
 87 | 		return rv.val
 88 | 	} else {
 89 | 		bs.cachemiss++
 90 | 		return nil
 91 | 	}
 92 | }
 93 | 
 94 | //debug function
 95 | func (bs *BlockStore) walkCache() {
 96 | 	fw := 0
 97 | 	bw := 0
 98 | 	it := bs.cachenew
 99 | 	for {
100 | 		if it == nil {
101 | 			break
102 | 		}
103 | 		fw++
104 | 		if it.older == nil {
105 | 			lg.Info("fw walked to end, compare %p/%p", it, bs.cacheold)
106 | 		}
107 | 		it = it.older
108 | 	}
109 | 	it = bs.cacheold
110 | 	for {
111 | 		if it == nil {
112 | 			break
113 | 		}
114 | 		bw++
115 | 		if it.newer == nil {
116 | 			lg.Info("bw walked to end, compare %p/%p", it, bs.cachenew)
117 | 		}
118 | 		it = it.newer
119 | 	}
120 | 	lg.Info("Walked cache fw=%v, bw=%v, map=%v", fw, bw, len(bs.cachemap))
121 | }
122 | 
123 | //This must be called with the mutex held
124 | func (bs *BlockStore) cacheCheckCap() {
125 | 	for bs.cachelen > bs.cachemax {
126 | 		i := bs.cacheold
127 | 		delete(bs.cachemap, i.vaddr)
128 | 		if i.newer != nil {
129 | 			i.newer.older = nil
130 | 		}
131 | 		bs.cacheold = i.newer
132 | 		bs.cachelen--
133 | 	}
134 | }
135 | 


--------------------------------------------------------------------------------
/qci/test_readwrite2.ipy:
--------------------------------------------------------------------------------
  1 | 
  2 | import qdf
  3 | import qdf.quasar
  4 | import sys
  5 | import random
  6 | import uuid
  7 | import time
  8 | from twisted.internet import defer, protocol, reactor
  9 | print "entered test readwrite1"
 10 | EXIT_CODE = None
 11 | def setexit(code):
 12 |     global EXIT_CODE
 13 |     EXIT_CODE = code
 14 |     reactor.stop()
 15 | 
 16 | @defer.inlineCallbacks
 17 | def testbody(db):
 18 |     print "connected"
 19 |     TOTALPOINTS = 1000000
 20 |     PERINSERT = 1000
 21 |     INTERVAL = int(1E9/120.)
 22 |     UID = str(uuid.uuid4())
 23 |     randomdata = [(x*INTERVAL, random.random()) for x in xrange(TOTALPOINTS)]
 24 |     idx = 0
 25 |     print "random data generated"
 26 |     for i in xrange(TOTALPOINTS/PERINSERT):
 27 |         yield db.insertValues(UID, randomdata[idx:idx+PERINSERT])
 28 |         idx += PERINSERT
 29 |     time.sleep(20)
 30 |     readdata = []
 31 |     idx = 0
 32 |     print "reading data"
 33 |     for i in xrange(TOTALPOINTS/PERINSERT):
 34 |         (status, rv) = yield db.queryStandardValues(UID, i*INTERVAL*PERINSERT, (i+1)*INTERVAL*PERINSERT)
 35 |         (version, values) = rv
 36 |         readdata += [(v.time, v.value) for v in values]
 37 |     print "len readdata:",len(readdata)
 38 |     print "len insert:",len(randomdata)
 39 |     for i in xrange(len(randomdata)):
 40 |         if randomdata[i][0] != readdata[i][0]:
 41 |             print "time mismatch index",i
 42 |             break
 43 |         if randomdata[i][1] != readdata[i][1]:
 44 |             print "value mismatch index",i
 45 |             break
 46 |     else:
 47 |         print "lists match"
 48 |     #delete middle 1/3 of data
 49 |     st = randomdata[len(randomdata)/3][0]
 50 |     et = randomdata[2*len(randomdata)/3][0]
 51 |     (status, rv) = yield db.deleteRange(UID, st, et)
 52 |     #also delete it from our data
 53 |     ndat = randomdata[0:len(randomdata)/3] #exlusive
 54 |     ndat += randomdata[2*len(randomdata)/3:] #inclusive
 55 | 
 56 |     readdata = []
 57 |     print "reading data AFTER DELETE"
 58 |     for i in xrange(TOTALPOINTS/PERINSERT):
 59 |         (status, rv) = yield db.queryStandardValues(UID, i*INTERVAL*PERINSERT, (i+1)*INTERVAL*PERINSERT)
 60 |         (version, values) = rv
 61 |         readdata += [(v.time, v.value) for v in values]
 62 |     print "len readdata:",len(readdata)
 63 |     print "len insert:",len(ndat)
 64 |     odataskip = randomdata[666664:666668]
 65 |     print "odataskip:",odataskip
 66 |     for i in xrange(len(ndat)):
 67 |         if ndat[i][0] != readdata[i][0]:
 68 |             print "time mismatch index",i
 69 |             break
 70 |         if ndat[i][1] != readdata[i][1]:
 71 |             print "value mismatch index",i
 72 |             print "received",readdata[i][1]
 73 |             print "expected",ndat[i][1]
 74 |             print "nearby expected", ndat[i-2:i+2]
 75 |             print "nearby received", readdata[i-2:i+2]
 76 |             print "nearby ODAT", randomdata[i-2:i+2]
 77 |             break
 78 |     else:
 79 |         print "lists match"
 80 |         setexit(0)
 81 |         return
 82 | 
 83 |     setexit(1)
 84 |     return    
 85 | 
 86 | def onFail(param):
 87 |     print "Encountered error: ", param
 88 |     setexit(2)
 89 | 
 90 | def entrypoint():
 91 |     print "in entrypoint"
 92 |     try:
 93 |         q = qdf.quasar.connectToArchiver("localhost", 4410)
 94 |         q.addCallback(testbody)
 95 |         q.addErrback(onFail)
 96 |     except Exception as e:
 97 |         print "ex: ",e
 98 |         setexit(1)
 99 | 
100 | reactor.callWhenRunning(entrypoint)
101 | reactor.run()
102 | if EXIT_CODE == None:
103 |     EXIT_CODE = 42
104 | if EXIT_CODE != 0:
105 |     sys.exit(EXIT_CODE)
106 | else:
107 |     !rm FAILURE
108 | 


--------------------------------------------------------------------------------
/qci/test_changedrange.ipy:
--------------------------------------------------------------------------------
  1 | 
  2 | import qdf
  3 | import qdf.quasar
  4 | import sys
  5 | import random
  6 | import uuid
  7 | import time
  8 | from twisted.internet import defer, protocol, reactor
  9 | print "entered test changedrange"
 10 | EXIT_CODE = None
 11 | def setexit(code):
 12 |     global EXIT_CODE
 13 |     EXIT_CODE = code
 14 |     reactor.stop()
 15 | 
 16 | @defer.inlineCallbacks
 17 | def testbody(db):
 18 |     print "connected"
 19 |     TOTALPOINTS = 1000000
 20 |     PERINSERT = 1000
 21 |     INTERVAL = int(1E9/120.)
 22 |     UID = str(uuid.uuid4())
 23 |     OFFSET = random.randrange(100,1000000000000)
 24 |     randomdata = [(OFFSET + x*INTERVAL, random.random()) for x in xrange(TOTALPOINTS)]
 25 |     e_t = randomdata[-1][0]
 26 |     s_t = OFFSET
 27 |     print "SET: ", randomdata[0], randomdata[-1]
 28 |     print "e_t:", e_t
 29 |     print "s_t:", s_t
 30 |     idx = 0
 31 |     print "random data generated"
 32 |     for i in xrange(TOTALPOINTS/PERINSERT):
 33 |         yield db.insertValues(UID, randomdata[idx:idx+PERINSERT])
 34 |         idx += PERINSERT
 35 | 
 36 |     #immediate query
 37 |     srep = []
 38 |     (status, rv) = yield db.queryStatisticalValues(UID, 0, (1<<55), 55)
 39 |     print "status: ", status
 40 |     (version, values) = rv
 41 |     for v in values:
 42 |         srep.append([v.time, v.min, v.mean, v.max, v.count])
 43 |     print "preflush:", srep
 44 |     print "version:", version
 45 |     #preflush_count = srep[0][4]
 46 |     preflush_count = 0
 47 |     print "flushing"
 48 |     yield db.flush(UID)
 49 | 
 50 |     srep = []
 51 |     (status, rv) = yield db.queryStatisticalValues(UID, 0, (1<<55), 55)
 52 |     print "status: ", status
 53 |     (version, values) = rv
 54 |     for v in values:
 55 |         srep.append([v.time, v.min, v.mean, v.max, v.count])
 56 | 
 57 |     #postflush_count = srep[0][4]
 58 |     print "postflush:", srep
 59 |     print "version:", version
 60 | 
 61 |     print "flushing2"
 62 |     yield db.flush(UID)
 63 | 
 64 |     srep = []
 65 |     (status, rv) = yield db.queryStatisticalValues(UID, 0, (1<<55), 55)
 66 |     print "status2: ", status
 67 |     (version, values) = rv
 68 |     for v in values:
 69 |         srep.append([v.time, v.min, v.mean, v.max, v.count])
 70 | 
 71 |     #postflush_count = srep[0][4]
 72 |     print "postflush2:", srep
 73 |     print "version2:", version
 74 | 
 75 |     #print "prepost counts: ",preflush_count, postflush_count
 76 | 
 77 |     def expected_cr(st, et, res):
 78 |         return st & ~((1<<res)-1), (et & ~((1<<res)-1))+(1<<res)
 79 | 
 80 |     #changed ranges from origin
 81 |     (status, rv) = yield db.queryChangedRanges(UID, 0, 0, resolution=38)
 82 |     print "RV:", rv
 83 |     print "status: ",status
 84 |     crs = [(x.startTime, x.endTime) for x in rv[0]]
 85 |     e_crs = expected_cr(s_t, e_t, 38)
 86 |     print "crs:", [hex(x) for x in crs[0]]
 87 |     print "exp:", [hex(x) for x in e_crs]
 88 | 
 89 |     (status, rv) = yield db.queryChangedRanges(UID, 0, 0, resolution=8)
 90 |     print "RV:", rv
 91 |     print "status: ",status
 92 |     crs = [(x.startTime, x.endTime) for x in rv[0]]
 93 |     e_crs = expected_cr(s_t, e_t, 8)
 94 |     print "crs2:", [hex(x) for x in crs[0]]
 95 |     print "exp2:", [hex(x) for x in e_crs]
 96 | 
 97 |     setexit(0)
 98 |     return
 99 | 
100 | def onFail(param):
101 |     print "Encountered error: ", param
102 |     setexit(2)
103 | 
104 | def entrypoint():
105 |     print "in entrypoint"
106 |     try:
107 |         q = qdf.quasar.connectToArchiver("localhost", 4410)
108 |         q.addCallback(testbody)
109 |         q.addErrback(onFail)
110 |     except Exception as e:
111 |         print "ex: ",e
112 |         setexit(1)
113 | 
114 | reactor.callWhenRunning(entrypoint)
115 | reactor.run()
116 | if EXIT_CODE == None:
117 |     EXIT_CODE = 42
118 | if EXIT_CODE != 0:
119 |     sys.exit(EXIT_CODE)
120 | else:
121 |     !rm FAILURE
122 | 


--------------------------------------------------------------------------------
/internal/bstore/blocktypes_test.go:
--------------------------------------------------------------------------------
  1 | package bstore
  2 | 
  3 | import (
  4 | 	"math/rand"
  5 | 	"testing"
  6 | 	"time"
  7 | )
  8 | 
  9 | func init() {
 10 | 	sd := time.Now().Unix()
 11 | 	log.Debug(">>>> USING %v AS SEED <<<<<", sd)
 12 | 	rand.Seed(sd)
 13 | }
 14 | 
 15 | func Test_DeCompose(t *testing.T) {
 16 | 	for i := 0; i < 16; i++ {
 17 | 		x := rand.Float64()
 18 | 		packed_m, packed_e := decompose(x)
 19 | 		//log.Warning("x= %v m=%v e=%v",x, packed_m, packed_e)
 20 | 		rv := recompose(packed_m, packed_e)
 21 | 		if rv != x {
 22 | 			t.Errorf("Number did not convert: +v", x)
 23 | 		}
 24 | 	}
 25 | 	for i := 0; i < 10000000; i++ {
 26 | 		x := rand.Float64()
 27 | 		packed_m, packed_e := decompose(x)
 28 | 		rv := recompose(packed_m, packed_e)
 29 | 		if rv != x {
 30 | 			t.Errorf("Number did not convert: +v", x)
 31 | 		}
 32 | 	}
 33 | }
 34 | 
 35 | func Test_2DeCompose(t *testing.T) {
 36 | 	log.Warning("testing")
 37 | 	for i := 0; i < 16; i++ {
 38 | 		x := float64(i * 100000.0)
 39 | 		packed_m, packed_e := decompose(x)
 40 | 		rv := recompose(packed_m, packed_e)
 41 | 		if rv != x {
 42 | 			t.Errorf("Number did not convert: exp %v got %v", x, rv)
 43 | 		}
 44 | 	}
 45 | }
 46 | 
 47 | func Test_CB1(t *testing.T) {
 48 | 	c := new(Coreblock)
 49 | 	for i := 0; i < KFACTOR; i++ {
 50 | 		c.Addr[i] = uint64(i + 1)
 51 | 	}
 52 | 	sarr := make([]byte, CBSIZE)
 53 | 	donearr := c.Serialize(sarr)
 54 | 	cn := new(Coreblock)
 55 | 	cn.Deserialize(donearr)
 56 | 	if !CompareNoTags(*c, *cn, []string{"implicit"}) {
 57 | 		t.Error("Core block SERDES faled")
 58 | 	}
 59 | }
 60 | 
 61 | func Test_Pack1(t *testing.T) {
 62 | 	tst := func(x uint64) int {
 63 | 		b := make([]byte, 9)
 64 | 		ln := writeUnsignedHuff(b, x)
 65 | 		for i := ln; i < 9; i++ {
 66 | 			if b[i] != 0 {
 67 | 				t.Errorf("Unexpected non-null byte")
 68 | 			}
 69 | 		}
 70 | 		xr, _, _ := readUnsignedHuff(b)
 71 | 		if xr != x {
 72 | 			t.Errorf("Number did not match:", x, xr)
 73 | 		}
 74 | 		return ln
 75 | 	}
 76 | 	//First test around the boundaries
 77 | 	var order uint64
 78 | 	for order = 0; order < 64; order++ {
 79 | 		for offset := -4; offset < 4; offset++ {
 80 | 			x := uint64((1 << order) + offset)
 81 | 			tst(x)
 82 | 		}
 83 | 	}
 84 | 
 85 | 	//Now test that the huff boundaries have the write number of chars
 86 | 	bcheck := []struct {
 87 | 		n   uint64
 88 | 		exp int
 89 | 	}{
 90 | 		{(1 << 7) - 1, 1},
 91 | 		{(1 << 7), 2},
 92 | 		{(1 << 14) - 1, 2},
 93 | 		{(1 << 14), 3},
 94 | 		{(1 << 20) - 1, 3},
 95 | 		{(1 << 20), 4},
 96 | 		{(1 << 28) - 1, 4},
 97 | 		{(1 << 28), 5},
 98 | 		{(1 << 36) - 1, 5},
 99 | 		{(1 << 36), 6},
100 | 		{(1 << 42) - 1, 6},
101 | 		{(1 << 42), 7},
102 | 		{(1 << 50) - 1, 7},
103 | 		{(1 << 50), 8},
104 | 		{(1 << 58) - 1, 8},
105 | 		{(1 << 58), 9},
106 | 		{0xFFFFFFFFFFFFFFFF, 9},
107 | 	}
108 | 	for _, ob := range bcheck {
109 | 		l := tst(ob.n)
110 | 		if l != ob.exp {
111 | 			t.Errorf("Did not get expected number of bytes out test=", ob, l)
112 | 		}
113 | 	}
114 | 
115 | 	//Check the big number
116 | 	tst(0xFFFFFFFFFFFFFFFF)
117 | 
118 | 	//Check the small number
119 | 	tst(0)
120 | 
121 | 	//Check random numbers
122 | 	for i := 0; i < 100000; i++ {
123 | 		x := uint64(rand.Int63())
124 | 		tst(x)
125 | 	}
126 | }
127 | 
128 | func Test_Pack2(t *testing.T) {
129 | 	//Unsigned numbers are probably covered ok, lets try a few signed numbers
130 | 	//Check random numbers
131 | 	tst := func(x int64) int {
132 | 		b := make([]byte, 9)
133 | 		ln := writeSignedHuff(b, x)
134 | 		for i := ln; i < 9; i++ {
135 | 			if b[i] != 0 {
136 | 				t.Errorf("Unexpected non-null byte")
137 | 			}
138 | 		}
139 | 		xr, _, _ := readSignedHuff(b)
140 | 		if xr != x {
141 | 			t.Errorf("Number did not match:", x, xr)
142 | 		}
143 | 		return ln
144 | 	}
145 | 	for i := 0; i < 10000000; i++ {
146 | 		x := rand.Int63()
147 | 		tst(x)
148 | 	}
149 | 	tst(-1)
150 | 	tst(-0x7FFFFFFFFFFFFFFF)
151 | 	tst(0x7FFFFFFFFFFFFFFF)
152 | }
153 | 


--------------------------------------------------------------------------------
/qci/utils.ipy:
--------------------------------------------------------------------------------
  1 | import time
  2 | 
  3 | def getid():
  4 |     return int(time.time() - 1423015475)
  5 | 
  6 | def build_loadgen(totalrecords, numstreams, pointspermessage):
  7 |     !go get github.com/SoftwareDefinedBuildings/quasarloadgenerator
  8 |     !git clone https://github.com/SoftwareDefinedBuildings/quasarloadgenerator
  9 |     !cd quasarloadgenerator && git checkout delete-data
 10 |     !cd quasarloadgenerator && go get -d ...
 11 |     !cd quasarloadgenerator && go build -o ../loadgen .
 12 | 
 13 | def mkconf(cephpool, collection, filepath="/srv/quasar/"):
 14 |     if cephpool is not None:
 15 |         conf="""
 16 | [storage]
 17 | provider=ceph
 18 | cephconf=/etc/ceph/ceph.conf
 19 | cephpool={cephpool}
 20 | """.format(cephpool=cephpool)
 21 |     else:
 22 |         conf="""
 23 | [storage]
 24 | provider=file
 25 | filepath={filepath}
 26 | """.format(filepath=filepath)
 27 |     conf = conf + """
 28 | [http]
 29 | enabled=true
 30 | port=9000
 31 | address=0.0.0.0
 32 | 
 33 | [capnp]
 34 | enabled=true
 35 | port=4410
 36 | address=0.0.0.0
 37 | 
 38 | [mongo]
 39 | server=localhost
 40 | collection={collection}
 41 | 
 42 | [debug]
 43 | heapprofile=true
 44 | cpuprofile=true
 45 | 
 46 | [cache]
 47 | # Configure the RADOS and block caches. If you have a choice, rather
 48 | # spend memory on the block cache.
 49 | 
 50 | # This is measured in blocks, which are at most ~16K
 51 | blockcache=62500   #1 GB
 52 | 
 53 | # Choose a RADOS cache roughly equal to (num concurrent reads) * (object size)
 54 | # the transaction size is at most 16 MB, but is usually around 1.6MB. The
 55 | # objects can vary in size, so the cache can be capped either in quantity or
 56 | # in total size (or both)
 57 | radoscachecount=2048 #in objects
 58 | radoscachesize=256  #in MB
 59 | 
 60 | [coalescence]
 61 | earlytrip=16384 #readings
 62 | interval=5000 #ms
 63 |     """.format(collection=collection)
 64 |     with open("quasar.conf","w") as f:
 65 |         f.write(conf)
 66 | 
 67 | def wait_for_stable_ceph():
 68 |     x = !ceph -s
 69 |     while any(("creating" in y) or ("peering" in y) or ("unclean" in y) for y in x):
 70 |         print "Waiting for creation:"
 71 |         print x
 72 |         time.sleep(5)
 73 |         x = !ceph -s
 74 | 
 75 | def mkceph_local(cephpool):
 76 |     !ceph osd pool create $cephpool 4096 4096 replicated local 2
 77 |     time.sleep(5)
 78 |     wait_for_stable_ceph()
 79 | 
 80 | def mkceph_remote(cephpool):
 81 |     !ceph osd pool create $cephpool 4096 4096 replicated remote 2
 82 |     time.sleep(5)
 83 |     wait_for_stable_ceph()
 84 | 
 85 | def mkceph_tier(cephpool):
 86 |     cache = cephpool+"-cache"
 87 |     !ceph osd pool create $cephpool 4096 4096 replicated remote 2
 88 |     time.sleep(5)
 89 |     !ceph osd pool create $cache 4096 4096 replicated local 2
 90 |     time.sleep(5)
 91 |     wait_for_stable_ceph()
 92 |     !ceph osd tier add $cephpool $cache
 93 |     !ceph osd tier cache-mode $cache writeback
 94 |     !ceph osd tier set-overlay $cephpool $cache
 95 |     !ceph osd pool set $cache hit_set_type bloom
 96 |     !ceph osd pool set $cache hit_set_period 7200
 97 |     !ceph osd pool set $cache cache_min_flush_age 120
 98 |     wait_for_stable_ceph()
 99 | 
100 | def mkceph_primary(cephpool):
101 |     !ceph osd pool create $cephpool 4096 4096 replicated primary 2
102 |     time.sleep(5)
103 |     wait_for_stable_ceph()
104 | 
105 | def delceph_pool(cephpool):
106 |     !ceph osd pool delete $cephpool $cephpool --yes-i-really-really-mean-it
107 | 
108 | def delceph_tier(cephpool):
109 |     cache = cephpool+"-cache"
110 |     !ceph osd tier cache-mode $cache forward
111 |     !rados -p $cache cache-flush-evict-all > log.evict
112 |     !ceph osd tier remove-overlay $cephpool
113 |     !ceph osd tier remove $cephpool $cache
114 |     delceph_pool(cache)
115 |     delceph_pool(cephpool)
116 | 
117 | #get QDF pulled
118 | !git clone https://github.com/SoftwareDefinedBuildings/QDF.git
119 | !mv QDF/qdf .
120 | 


--------------------------------------------------------------------------------
/btrdbd/main.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"flag"
  5 | 	"fmt"
  6 | 	"os"
  7 | 	"os/signal"
  8 | 	"runtime"
  9 | 	"runtime/pprof"
 10 | 	"strconv"
 11 | 	"time"
 12 | 	
 13 | 	"github.com/SoftwareDefinedBuildings/btrdb"
 14 | 	"github.com/SoftwareDefinedBuildings/btrdb/cpinterface"
 15 | 	"github.com/SoftwareDefinedBuildings/btrdb/httpinterface"
 16 | 	"github.com/SoftwareDefinedBuildings/btrdb/internal/bstore"
 17 | 	"github.com/op/go-logging"
 18 | )
 19 | 
 20 | var log *logging.Logger
 21 | 
 22 | func init() {
 23 | 	logging.SetFormatter(logging.MustStringFormatter("%{color}%{shortfile} ▶%{color:reset} %{message}"))
 24 | 	log = logging.MustGetLogger("log")
 25 | 
 26 | }
 27 | 
 28 | var createDB = flag.Bool("makedb", false, "create a new database")
 29 | 
 30 | func main() {
 31 | 	loadConfig()
 32 | 	flag.Parse()
 33 | 
 34 | 	go func() {
 35 | 		for {
 36 | 			time.Sleep(10 * time.Second)
 37 | 			fmt.Println("Num goroutines: ", runtime.NumGoroutine())
 38 | 		}
 39 | 	}()
 40 | 	if Configuration.Debug.Cpuprofile {
 41 | 		f, err := os.Create("profile.cpu")
 42 | 		if err != nil {
 43 | 			log.Panicf("Error creating CPU profile: %v", err)
 44 | 		}
 45 | 		f2, err := os.Create("profile.block")
 46 | 		if err != nil {
 47 | 			log.Panicf("Error creating Block profile: %v", err)
 48 | 		}
 49 | 		pprof.StartCPUProfile(f)
 50 | 		runtime.SetBlockProfileRate(1)
 51 | 		defer runtime.SetBlockProfileRate(0)
 52 | 		defer pprof.Lookup("block").WriteTo(f2, 1)
 53 | 		defer pprof.StopCPUProfile()
 54 | 	}
 55 | 
 56 | 	if *createDB {
 57 | 		fmt.Printf("Creating a new database\n")
 58 | 		bstore.CreateDatabase(Params)
 59 | 		fmt.Printf("Done\n")
 60 | 		os.Exit(0)
 61 | 	}
 62 | 	nCPU := runtime.NumCPU()
 63 | 	runtime.GOMAXPROCS(nCPU)
 64 | 	cfg := btrdb.QuasarConfig{
 65 | 		DatablockCacheSize:           uint64(Configuration.Cache.BlockCache),
 66 | 		TransactionCoalesceEnable:    true,
 67 | 		TransactionCoalesceInterval:  uint64(*Configuration.Coalescence.Interval),
 68 | 		TransactionCoalesceEarlyTrip: uint64(*Configuration.Coalescence.Earlytrip),
 69 | 		Params: Params,
 70 | 	}
 71 | 	q, err := btrdb.NewQuasar(&cfg)
 72 | 	if err != nil {
 73 | 		log.Panicf("error: ", err)
 74 | 	}
 75 | 
 76 | 	if Configuration.Http.Enabled {
 77 | 		go httpinterface.QuasarServeHTTP(q, *Configuration.Http.Address+":"+strconv.FormatInt(int64(*Configuration.Http.Port), 10))
 78 | 	}
 79 | 	if Configuration.Capnp.Enabled {
 80 | 		go cpinterface.ServeCPNP(q, "tcp", *Configuration.Capnp.Address+":"+strconv.FormatInt(int64(*Configuration.Capnp.Port), 10))
 81 | 	}
 82 | 
 83 | 	if Configuration.Debug.Heapprofile {
 84 | 		go func() {
 85 | 			idx := 0
 86 | 			for {
 87 | 				f, err := os.Create(fmt.Sprintf("profile.heap.%05d", idx))
 88 | 				if err != nil {
 89 | 					log.Panicf("Could not create memory profile %v", err)
 90 | 				}
 91 | 				idx = idx + 1
 92 | 				pprof.WriteHeapProfile(f)
 93 | 				f.Close()
 94 | 				time.Sleep(30 * time.Second)
 95 | 			}
 96 | 		}()
 97 | 	}
 98 | 
 99 | 	sigchan := make(chan os.Signal, 1)
100 | 	signal.Notify(sigchan, os.Interrupt)
101 | 
102 | 	for {
103 | 		time.Sleep(5 * time.Second)
104 | 		log.Info("Still alive")
105 | 
106 | 		select {
107 | 		case _ = <-sigchan:
108 | 			log.Warning("Received Ctrl-C, waiting for graceful shutdown")
109 | 			time.Sleep(4 * time.Second) //Allow http some time
110 | 			log.Warning("Checking for pending inserts")
111 | 			for {
112 | 				if q.IsPending() {
113 | 					log.Warning("Pending inserts... waiting... ")
114 | 					time.Sleep(2 * time.Second)
115 | 				} else {
116 | 					log.Warning("No pending inserts")
117 | 					break
118 | 				}
119 | 			}
120 | 			if Configuration.Debug.Heapprofile {
121 | 				log.Warning("writing heap profile")
122 | 				f, err := os.Create("profile.heap.FIN")
123 | 				if err != nil {
124 | 					log.Panicf("Could not create memory profile %v", err)
125 | 				}
126 | 				pprof.WriteHeapProfile(f)
127 | 				f.Close()
128 | 
129 | 			}
130 | 			return //end the program
131 | 		default:
132 | 
133 | 		}
134 | 	}
135 | }
136 | 


--------------------------------------------------------------------------------
/internal/cephprovider/cephcache.go:
--------------------------------------------------------------------------------
  1 | package cephprovider
  2 | 
  3 | import (
  4 | 	"sync"
  5 | 	"time"
  6 | 	//"runtime"
  7 | )
  8 | 
  9 | //We are caching 1MB blocks for read, so the address should have the bottom 20 bits clear
 10 | const R_ADDRMASK = ^((uint64(1) << 20) - 1)
 11 | const R_OFFSETMASK = (uint64(1) << 20) - 1
 12 | 
 13 | type CephCache struct {
 14 | 	cachemap  map[uint64]*CacheItem
 15 | 	cachemiss uint64
 16 | 	cachehit  uint64
 17 | 	cacheold  *CacheItem
 18 | 	cachenew  *CacheItem
 19 | 	cachemtx  sync.Mutex
 20 | 	cachelen  uint64
 21 | 	cachemax  uint64
 22 | 	cacheinv  uint64
 23 | 	pool      *sync.Pool
 24 | }
 25 | type CacheItem struct {
 26 | 	val   []byte
 27 | 	addr  uint64
 28 | 	newer *CacheItem
 29 | 	older *CacheItem
 30 | }
 31 | 
 32 | func (cc *CephCache) initCache(size uint64) {
 33 | 	cc.cachemax = size
 34 | 	cc.cachemap = make(map[uint64]*CacheItem, size)
 35 | 	cc.pool = &sync.Pool{
 36 | 		New: func() interface{} {
 37 | 			return make([]byte, R_CHUNKSIZE)
 38 | 		},
 39 | 	}
 40 | 
 41 | 	go func() {
 42 | 		for {
 43 | 			log.Info("Ceph BlockCache: %d invs %d misses, %d hits, %.2f %%",
 44 | 				cc.cacheinv, cc.cachemiss, cc.cachehit, (float64(cc.cachehit*100) / float64(cc.cachemiss+cc.cachehit)))
 45 | 			time.Sleep(5 * time.Second)
 46 | 		}
 47 | 	}()
 48 | }
 49 | 
 50 | //This function must be called with the mutex held
 51 | func (cc *CephCache) cachePromote(i *CacheItem) {
 52 | 	if cc.cachenew == i {
 53 | 		//Already at front
 54 | 		return
 55 | 	}
 56 | 	if i.newer != nil {
 57 | 		i.newer.older = i.older
 58 | 	}
 59 | 	if i.older != nil {
 60 | 		i.older.newer = i.newer
 61 | 	}
 62 | 	if cc.cacheold == i && i.newer != nil {
 63 | 		//This was the tail of a list longer than 1
 64 | 		cc.cacheold = i.newer
 65 | 	} else if cc.cacheold == nil {
 66 | 		//This was/is the only item in the list
 67 | 		cc.cacheold = i
 68 | 	}
 69 | 
 70 | 	i.newer = nil
 71 | 	i.older = cc.cachenew
 72 | 	if cc.cachenew != nil {
 73 | 		cc.cachenew.newer = i
 74 | 	}
 75 | 	cc.cachenew = i
 76 | }
 77 | 
 78 | func (cc *CephCache) cachePut(addr uint64, item []byte) {
 79 | 	if cc.cachemax == 0 {
 80 | 		return
 81 | 	}
 82 | 	cc.cachemtx.Lock()
 83 | 	i, ok := cc.cachemap[addr]
 84 | 	if ok {
 85 | 		cc.cachePromote(i)
 86 | 	} else {
 87 | 		i = &CacheItem{
 88 | 			val:  item,
 89 | 			addr: addr,
 90 | 		}
 91 | 		cc.cachemap[addr] = i
 92 | 		cc.cachePromote(i)
 93 | 		cc.cachelen++
 94 | 		cc.cacheCheckCap()
 95 | 	}
 96 | 	cc.cachemtx.Unlock()
 97 | }
 98 | 
 99 | func (cc *CephCache) getBlank() []byte {
100 | 	rv := cc.pool.Get().([]byte)
101 | 	rv = rv[0:R_CHUNKSIZE]
102 | 
103 | 	return rv
104 | }
105 | 
106 | func (cc *CephCache) cacheGet(addr uint64) []byte {
107 | 	if cc.cachemax == 0 {
108 | 		cc.cachemiss++
109 | 		return nil
110 | 	}
111 | 	cc.cachemtx.Lock()
112 | 	rv, ok := cc.cachemap[addr]
113 | 	if ok {
114 | 		cc.cachePromote(rv)
115 | 	}
116 | 	cc.cachemtx.Unlock()
117 | 	if ok {
118 | 		cc.cachehit++
119 | 		return rv.val
120 | 	} else {
121 | 		cc.cachemiss++
122 | 		return nil
123 | 	}
124 | }
125 | 
126 | //This is rare and only happens if the block cache is too small
127 | func (cc *CephCache) cacheInvalidate(addr uint64) {
128 | 	if cc.cachemax == 0 {
129 | 		return
130 | 	}
131 | 	cc.cachemtx.Lock()
132 | 	i, ok := cc.cachemap[addr]
133 | 	if ok {
134 | 		if i.newer != nil {
135 | 			i.newer.older = i.older
136 | 		}
137 | 		if i.older != nil {
138 | 			i.older.newer = i.newer
139 | 		}
140 | 		if cc.cacheold == i {
141 | 			//This was the tail of a list longer than 1
142 | 			cc.cacheold = i.newer
143 | 		}
144 | 		if cc.cachenew == i {
145 | 			cc.cachenew = i.older
146 | 		}
147 | 		cc.cachelen--
148 | 		cc.cacheinv++
149 | 		delete(cc.cachemap, addr)
150 | 	}
151 | 	cc.cachemtx.Unlock()
152 | }
153 | 
154 | //This must be called with the mutex held
155 | func (cc *CephCache) cacheCheckCap() {
156 | 	for cc.cachelen > cc.cachemax {
157 | 		i := cc.cacheold
158 | 
159 | 		delete(cc.cachemap, i.addr)
160 | 		if i.newer != nil {
161 | 			i.newer.older = nil
162 | 		}
163 | 		cc.cacheold = i.newer
164 | 		cc.cachelen--
165 | 	}
166 | }
167 | 


--------------------------------------------------------------------------------
/qci/runtests.ipy:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env ipython
  2 | from multiprocessing import Process, Pipe
  3 | import os
  4 | import time
  5 | import sys
  6 | import subprocess
  7 | import uuid
  8 | import pymongo
  9 | %run qci/utils.ipy
 10 | runid = getid()
 11 | print "RUN ID IS", runid
 12 | build_loadgen(10000000,20,1000)
 13 | cephpool = "q"+str(runid)
 14 | collection = "q"+str(runid)
 15 | filestore = "q"+str(runid)
 16 | if "CEPHTYPE" not in os.environ or os.environ["CEPHTYPE"] == "local":
 17 |     mkceph_local(cephpool)
 18 | elif os.environ["CEPHTYPE"] == "remote":
 19 |     mkceph_remote(cephpool)
 20 | elif os.environ["CEPHTYPE"] == "primary":
 21 |     mkceph_primary(cephpool)
 22 | elif os.environ["CEPHTYPE"] == "tier":
 23 |     mkceph_tier(cephpool)
 24 | elif os.environ["CEPHTYPE"] == "filestore":
 25 |     cephpool = None
 26 | 
 27 | mkconf(cephpool, collection, filestore)
 28 | 
 29 | #Create database
 30 | rc = subprocess.call(["./exe","-makedb"])
 31 | print "rc0", rc
 32 | if rc != 0:
 33 |     sys.exit(rc)
 34 | 
 35 | #start quasar
 36 | def start_q_():
 37 |     stdout=open("log.q.stdout.%d" % (int(time.time())),"w")
 38 |     rc = subprocess.call(["./exe"],stdout=stdout, stderr=subprocess.STDOUT)    
 39 |     print "rc1", rc
 40 |     if rc != 0:
 41 |         sys.exit(rc)
 42 | 
 43 | 
 44 | def start_quasar():
 45 |     global p
 46 |     p = Process(target=start_q_)
 47 |     p.start()
 48 | 
 49 | def term_quasar():
 50 |     #send sigint
 51 |     #os.kill(p.pid, 2)
 52 |     !pkill --signal 2 exe
 53 | 
 54 |     time.sleep(120)
 55 |     #os.kill(p.pid, 9)
 56 |     !pkill --signal 9 exe
 57 | 
 58 | def kill_quasar():
 59 |     !pkill --signal 9 exe
 60 | 
 61 | def proc_profiles(pfx):
 62 |     !go tool pprof -text -cum exe profile.cpu > log.profile.cpu.cum
 63 |     !go tool pprof -text exe profile.cpu > log.profile.cpu
 64 |     hps = !ls profile.heap.*
 65 |     for hp in hps:
 66 |         num = hp.split(".")[-1]
 67 |         !go tool pprof -text exe $hp > temp
 68 |         tot = !cat temp | head -n 1 | cut -d ' ' -f 3
 69 |         tot = tot[0]
 70 |         fname = "log.heap."+pfx+"."+num+"___"+tot
 71 |         !mv temp $fname
 72 |     !rm -f profile.heap.*
 73 | 
 74 | start_quasar()
 75 | #wait a bit
 76 | time.sleep(10)
 77 | 
 78 | if not p.is_alive():
 79 |     print "quasar died:", p.exitcode
 80 |     sys.exit(1)
 81 | 
 82 | !rm FAILURE
 83 | !touch FAILURE
 84 | if "TEST_TYPE" not in os.environ or os.environ["TEST_TYPE"] == "loadgen":
 85 |     %run qci/test_loadgen.ipy
 86 | elif os.environ["TEST_TYPE"] == "readwrite1":
 87 |     print "running reqdwrite1"
 88 |     %run qci/test_readwrite1.ipy
 89 |     print "run complete"
 90 | elif os.environ["TEST_TYPE"] == "readwrite2":
 91 |     print "running reqdwrite2"
 92 |     %run qci/test_readwrite2.ipy
 93 |     print "run complete"
 94 | elif os.environ["TEST_TYPE"] == "readstat1":
 95 |     print "running readstat1"
 96 |     %run qci/test_readstat1.ipy
 97 |     print "run complete"
 98 | elif os.environ["TEST_TYPE"] == "loadgen2":
 99 |     print "running loadgen2"
100 |     %run qci/test_loadgen2.ipy
101 |     print "run complete"
102 | elif os.environ["TEST_TYPE"] == "loadgen3":
103 |     print "running loadgen3"
104 |     %run qci/test_loadgen3.ipy
105 |     print "run complete"
106 | elif os.environ["TEST_TYPE"] == "endpoint":
107 |     print "running endpoint"
108 |     %run qci/test_endpoint.ipy
109 |     print "run complete"
110 | elif os.environ["TEST_TYPE"] == "changedrange":
111 |     print "running changedrange"
112 |     %run qci/test_changedrange.ipy
113 |     print "run complete"
114 |     
115 | failed = !cat FAILURE; echo $?
116 | failed = (failed[-1] == "0")
117 | 
118 | if not p.is_alive():
119 |     print "quasar died:", p.exitcode
120 |     sys.exit(1)
121 | 
122 | if not failed:
123 |     print "WRITING SUCCESS FILE"
124 |     with open("success","w") as f:
125 |         f.write("OK\n")
126 | 
127 | term_quasar()
128 | 
129 | proc_profiles("end")
130 | 
131 | if os.environ["CEPHTYPE"] == "tier":
132 |     delceph_tier(cephpool)
133 | elif os.environ["CEPHTYPE"] == "filestore":
134 |     !rm -r {filestore}
135 |     cl = pymongo.MongoClient()
136 |     cl.drop_database(collection)
137 |     cl.disconnect()
138 | else:
139 |     delceph_pool(cephpool)
140 |     
141 | print "done"
142 | 
143 | if failed:
144 |     sys.exit(1)
145 | 
146 | 


--------------------------------------------------------------------------------
/qtree/operators.go:
--------------------------------------------------------------------------------
  1 | package qtree
  2 | 
  3 | import (
  4 | 	"math"
  5 | 
  6 | 	"github.com/SoftwareDefinedBuildings/btrdb/internal/bstore"
  7 | )
  8 | 
  9 | func (n *QTreeNode) OpCountMean() (uint64, float64) {
 10 | 	total := 0.0
 11 | 	cnt := uint64(0)
 12 | 	if n.isLeaf {
 13 | 		for i := 0; i < int(n.vector_block.Len); i++ {
 14 | 			total += n.vector_block.Value[i]
 15 | 		}
 16 | 		return uint64(n.vector_block.Len), total / float64(n.vector_block.Len)
 17 | 	} else {
 18 | 		for i := 0; i < bstore.KFACTOR; i++ {
 19 | 			if n.core_block.Count[i] == 0 {
 20 | 				continue
 21 | 			}
 22 | 			cnt += n.core_block.Count[i]
 23 | 			total += n.core_block.Mean[i] * float64(n.core_block.Count[i])
 24 | 		}
 25 | 		return cnt, total / float64(cnt)
 26 | 	}
 27 | }
 28 | 
 29 | func (n *QTreeNode) OpMin() float64 {
 30 | 	if n.isLeaf {
 31 | 		min := n.vector_block.Value[0]
 32 | 		for i := 0; i < int(n.vector_block.Len); i++ {
 33 | 			if n.vector_block.Value[i] < min {
 34 | 				min = n.vector_block.Value[i]
 35 | 			}
 36 | 		}
 37 | 		return min
 38 | 	} else {
 39 | 		min := float64(0)
 40 | 		minset := false
 41 | 		for i := 0; i < len(n.core_block.Min); i++ {
 42 | 			if n.core_block.Count[i] == 0 {
 43 | 				continue
 44 | 			}
 45 | 			if !minset || n.core_block.Min[i] < min {
 46 | 				min = n.core_block.Min[i]
 47 | 				minset = true
 48 | 			}
 49 | 		}
 50 | 		return min
 51 | 	}
 52 | }
 53 | 
 54 | func (n *QTreeNode) OpMax() float64 {
 55 | 	if n.isLeaf {
 56 | 		max := n.vector_block.Value[0]
 57 | 		for i := 0; i < int(n.vector_block.Len); i++ {
 58 | 			if n.vector_block.Value[i] > max {
 59 | 				max = n.vector_block.Value[i]
 60 | 			}
 61 | 		}
 62 | 		return max
 63 | 	} else {
 64 | 		max := float64(0)
 65 | 		maxset := false
 66 | 		for i := 0; i < len(n.core_block.Max); i++ {
 67 | 			if n.core_block.Count[i] == 0 {
 68 | 				continue
 69 | 			}
 70 | 			if !maxset || n.core_block.Max[i] > max {
 71 | 				max = n.core_block.Max[i]
 72 | 				maxset = true
 73 | 			}
 74 | 		}
 75 | 		return max
 76 | 	}
 77 | }
 78 | 
 79 | /*
 80 | 
 81 | ok so here is the problem. If we call opreduce on a core node, then we can only deliver
 82 | pointwidths GREATER than our pointwidth and less than pointwidth + 6 right?
 83 | but as a leaf we can potentially deliver pointwidths down to 0...
 84 | */
 85 | func (n *QTreeNode) OpReduce(pointwidth uint8, index uint64) (uint64, float64, float64, float64) {
 86 | 	if !n.isLeaf && pointwidth < n.PointWidth() {
 87 | 		log.Panic("Bad pointwidth for core. See code comment")
 88 | 	}
 89 | 	if pointwidth > n.PointWidth()+PWFACTOR {
 90 | 		log.Panic("Can't guarantee this PW")
 91 | 	}
 92 | 	maxpw := n.PointWidth() + PWFACTOR
 93 | 	pwdelta := pointwidth - n.PointWidth()
 94 | 	width := int64(1) << pointwidth
 95 | 	maxidx := 1 << (maxpw - pointwidth)
 96 | 	if maxidx <= 0 || index >= uint64(maxidx) {
 97 | 		log.Critical("node is %s", n.TreePath())
 98 | 		log.Panic("bad index", maxidx, index)
 99 | 	}
100 | 	sum := 0.0
101 | 	min := math.NaN()
102 | 	max := math.NaN()
103 | 	minset := false
104 | 	maxset := false
105 | 	count := uint64(0)
106 | 	if n.isLeaf {
107 | 		st := n.StartTime() + int64(index)*width
108 | 		et := st + width
109 | 		if n.vector_block.Len != 0 {
110 | 			for i := 0; i < int(n.vector_block.Len); i++ {
111 | 				if n.vector_block.Time[i] < st {
112 | 					continue
113 | 				}
114 | 				if n.vector_block.Time[i] >= et {
115 | 					break
116 | 				}
117 | 				v := n.vector_block.Value[i]
118 | 				sum += v
119 | 				if !minset || v < min {
120 | 					minset = true
121 | 					min = v
122 | 				}
123 | 				if !maxset || v > max {
124 | 					maxset = true
125 | 					max = v
126 | 				}
127 | 				count++
128 | 			}
129 | 		}
130 | 		return count, min, sum / float64(count), max
131 | 	} else {
132 | 		s := index << pwdelta
133 | 		e := (index + 1) << pwdelta
134 | 		for i := s; i < e; i++ {
135 | 			if n.core_block.Count[i] == 0 {
136 | 				continue
137 | 			}
138 | 			count += n.core_block.Count[i]
139 | 			sum += n.core_block.Mean[i] * float64(n.core_block.Count[i])
140 | 			if !minset || n.core_block.Min[i] < min {
141 | 				minset = true
142 | 				min = n.core_block.Min[i]
143 | 			}
144 | 			if !maxset || n.core_block.Max[i] > max {
145 | 				maxset = true
146 | 				max = n.core_block.Max[i]
147 | 			}
148 | 		}
149 | 		mean := sum / float64(count)
150 | 		return count, min, mean, max
151 | 	}
152 | }
153 | 


--------------------------------------------------------------------------------
/internal/bprovider/bprovider_test.go:
--------------------------------------------------------------------------------
  1 | package bprovider_test
  2 | 
  3 | import (
  4 | 	"math/rand"
  5 | 	"sync"
  6 | 	"testing"
  7 | 	"time"
  8 | 
  9 | 	"github.com/SoftwareDefinedBuildings/btrdb/internal/bprovider"
 10 | 	"github.com/SoftwareDefinedBuildings/btrdb/internal/cephprovider"
 11 | 	"github.com/SoftwareDefinedBuildings/btrdb/internal/fileprovider"
 12 | 	"github.com/op/go-logging"
 13 | )
 14 | 
 15 | var log *logging.Logger
 16 | 
 17 | func init() {
 18 | 	log = logging.MustGetLogger("log")
 19 | }
 20 | 
 21 | func makeFileProvider() *fileprovider.FileStorageProvider {
 22 | 	params := map[string]string{
 23 | 		"dbpath": "/srv/quasartestdb/",
 24 | 	}
 25 | 	fp := new(fileprovider.FileStorageProvider)
 26 | 	err := fp.CreateDatabase(params)
 27 | 	if err != nil {
 28 | 		log.Panicf("Error on create %v", err)
 29 | 	}
 30 | 	fp.Initialize(params)
 31 | 	return fp
 32 | }
 33 | 
 34 | func makeCephProvider() *cephprovider.CephStorageProvider {
 35 | 	params := map[string]string{}
 36 | 	cp := new(cephprovider.CephStorageProvider)
 37 | 	/*err := cp.CreateDatabase(params)
 38 | 	if err != nil {
 39 | 		log.Panicf("Error on create %v",err)
 40 | 	}*/
 41 | 	cp.Initialize(params)
 42 | 	return cp
 43 | }
 44 | 
 45 | func TestCephInitDB(t *testing.T) {
 46 | 	params := map[string]string{}
 47 | 	cp := new(cephprovider.CephStorageProvider)
 48 | 	err := cp.CreateDatabase(params)
 49 | 	if err != nil {
 50 | 		log.Panicf("Error on create %v", err)
 51 | 	}
 52 | }
 53 | 
 54 | func x_RW1(t *testing.T, sp bprovider.StorageProvider) {
 55 | 	seg := sp.LockSegment()
 56 | 	addr := seg.BaseAddress()
 57 | 	data := make([]byte, 1024)
 58 | 	for i := 0; i < 1024; i++ {
 59 | 		data[i] = byte(i)
 60 | 	}
 61 | 	_, err := seg.Write(addr, data)
 62 | 	if err != nil {
 63 | 		t.Fatalf("Got error on write: %v", err)
 64 | 	}
 65 | 	seg.Unlock()
 66 | 
 67 | 	//Read back
 68 | 	rdata := make([]byte, 30000)
 69 | 	rslice := sp.Read(addr, rdata)
 70 | 	if len(rslice) != len(data) {
 71 | 		t.Fatalf("Got wrong slice len back")
 72 | 	}
 73 | 	for i := 0; i < 1024; i++ {
 74 | 		if rslice[i] != data[i] {
 75 | 			t.Fatalf("Index %v differed got %v, expected %v", i, rslice[i], data[i])
 76 | 		}
 77 | 	}
 78 | }
 79 | 
 80 | func x_RWFuzz(t *testing.T, sp bprovider.StorageProvider) {
 81 | 	wg := sync.WaitGroup{}
 82 | 	const par = 2096
 83 | 	const seglimlim = 50
 84 | 	const arrszlim = 20482
 85 | 	const maxseeds = 1
 86 | 	for si := 1; si <= maxseeds; si++ {
 87 | 		log.Warning("Trying seed %v", si)
 88 | 		rand.Seed(int64(si))
 89 | 		wg.Add(par)
 90 | 		for li := 0; li < par; li++ {
 91 | 			lic := li
 92 | 			go func() {
 93 | 
 94 | 				seg := sp.LockSegment()
 95 | 				addr := seg.BaseAddress()
 96 | 				log.Warning("Segment %v base addr 0x%016x", lic, addr)
 97 | 				seglimit := 1 //rand.Int() % seglimlim
 98 | 				stored_data := make([][]byte, seglimit)
 99 | 				stored_addrs := make([]uint64, seglimit)
100 | 				for k := 0; k < seglimit; k++ {
101 | 					arrsize := rand.Int() % arrszlim
102 | 					data := make([]byte, arrsize)
103 | 					for i := 0; i < arrsize; i++ {
104 | 						data[i] = byte(rand.Int())
105 | 					}
106 | 					stored_data[k] = data
107 | 					naddr, err := seg.Write(addr, data)
108 | 					if err != nil {
109 | 						log.Error("ea %v", err)
110 | 						t.Errorf("Got error on write: %v", err)
111 | 						return
112 | 					}
113 | 					stored_addrs[k] = addr
114 | 					addr = naddr
115 | 				}
116 | 				seg.Unlock()
117 | 				sleeptime := time.Duration(rand.Int() % 2000)
118 | 				time.Sleep(sleeptime * time.Millisecond)
119 | 				//Read back
120 | 				for k := 0; k < seglimit; k++ {
121 | 					rdata := make([]byte, 33000)
122 | 					rslice := sp.Read(stored_addrs[k], rdata)
123 | 					if len(rslice) != len(stored_data[k]) {
124 | 						log.Error("eb")
125 | 						t.Errorf("Got wrong slice len back")
126 | 						return
127 | 					}
128 | 					for j := 0; j < len(stored_data[k]); j++ {
129 | 						if rslice[j] != stored_data[k][j] {
130 | 							log.Error("ec")
131 | 							t.Errorf("Index %v differed got %v, expected %v", j, rslice[j], stored_data[k][j])
132 | 						}
133 | 					}
134 | 				}
135 | 				wg.Done()
136 | 			}()
137 | 		}
138 | 		wg.Wait()
139 | 	}
140 | }
141 | 
142 | func Test_FP_RW1(t *testing.T) {
143 | 	fp := makeFileProvider()
144 | 	x_RW1(t, fp)
145 | }
146 | 
147 | func Test_FP_FUZZ(t *testing.T) {
148 | 	fp := makeFileProvider()
149 | 	x_RWFuzz(t, fp)
150 | }
151 | 
152 | func Test_CP_RW1(t *testing.T) {
153 | 	cp := makeCephProvider()
154 | 	x_RW1(t, cp)
155 | }
156 | 
157 | func Test_CP_FUZZ(t *testing.T) {
158 | 	cp := makeCephProvider()
159 | 	x_RWFuzz(t, cp)
160 | }
161 | 


--------------------------------------------------------------------------------
/btrdbd/config.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"os"
  6 | 	"strconv"
  7 | 
  8 | 	gcfg "gopkg.in/gcfg.v1"
  9 | )
 10 | 
 11 | type Config struct {
 12 | 	Http struct {
 13 | 		Port    *int
 14 | 		Address *string
 15 | 		Enabled bool
 16 | 	}
 17 | 	Capnp struct {
 18 | 		Port    *int
 19 | 		Address *string
 20 | 		Enabled bool
 21 | 	}
 22 | 	Mongo struct {
 23 | 		Server     *string
 24 | 		Collection *string
 25 | 	}
 26 | 	Storage struct {
 27 | 		Provider string
 28 | 		Filepath *string
 29 | 		Cephconf *string
 30 | 		Cephpool *string
 31 | 	}
 32 | 	Cache struct {
 33 | 		BlockCache      int
 34 | 		RadosWriteCache *int
 35 | 		RadosReadCache  *int
 36 | 	}
 37 | 	Debug struct {
 38 | 		Cpuprofile  bool
 39 | 		Heapprofile bool
 40 | 	}
 41 | 	Coalescence struct {
 42 | 		Earlytrip *int
 43 | 		Interval  *int
 44 | 	}
 45 | }
 46 | 
 47 | var Configuration Config
 48 | var Params map[string]string
 49 | 
 50 | func loadConfig() {
 51 | 	found := false
 52 | 	err := gcfg.ReadFileInto(&Configuration, "./btrdb.conf")
 53 | 	if err != nil {
 54 | 		fmt.Printf("Could not load configuration file './btrdb.conf':\n%v\n", err)
 55 | 	} else {
 56 | 		found = true
 57 | 	}
 58 | 
 59 | 	if !found {
 60 | 		err := gcfg.ReadFileInto(&Configuration, "/etc/btrdb/btrdb.conf")
 61 | 		if err != nil {
 62 | 			fmt.Printf("Could not load configuration file '/etc/btrdb/btrdb.conf':\n%v\n", err)
 63 | 		} else {
 64 | 			found = true
 65 | 		}
 66 | 	}
 67 | 
 68 | 	if !found {
 69 | 		fmt.Printf("Aborting: no configuration found!\n")
 70 | 		os.Exit(1)
 71 | 	}
 72 | 
 73 | 	if Configuration.Mongo.Server == nil || *Configuration.Mongo.Server == "" {
 74 | 		fmt.Printf("Aborting: configuration missing MongoDB server address\n")
 75 | 		os.Exit(1)
 76 | 	}
 77 | 	if Configuration.Mongo.Collection == nil || *Configuration.Mongo.Collection == "" {
 78 | 		fmt.Printf("Aborting: configuration missing MongoDB collection\n")
 79 | 		os.Exit(1)
 80 | 	}
 81 | 
 82 | 	if Configuration.Storage.Provider == "file" {
 83 | 		if Configuration.Storage.Filepath == nil {
 84 | 			fmt.Printf("Aborting: using Files for storage, but no filepath specified\n")
 85 | 			os.Exit(1)
 86 | 		}
 87 | 	} else if Configuration.Storage.Provider == "ceph" {
 88 | 		if Configuration.Storage.Cephconf == nil {
 89 | 			fmt.Printf("Aborting: using Ceph for storage, but no cephconf specified\n")
 90 | 			os.Exit(1)
 91 | 		}
 92 | 		if Configuration.Storage.Cephpool == nil {
 93 | 			fmt.Printf("Aborting: using Ceph for storage, but no cephpool specified\n")
 94 | 			os.Exit(1)
 95 | 		}
 96 | 	} else {
 97 | 		fmt.Printf("Aborting: unknown storage provider specified\n")
 98 | 		os.Exit(1)
 99 | 	}
100 | 
101 | 	if Configuration.Cache.RadosWriteCache == nil {
102 | 		z := 0
103 | 		Configuration.Cache.RadosWriteCache = &z
104 | 	}
105 | 	if Configuration.Cache.RadosReadCache == nil {
106 | 		z := 0
107 | 		Configuration.Cache.RadosReadCache = &z
108 | 	}
109 | 
110 | 	if Configuration.Http.Enabled && Configuration.Http.Port == nil {
111 | 		fmt.Printf("Aborting: http server enabled, but no port specified\n")
112 | 		os.Exit(1)
113 | 	}
114 | 
115 | 	if Configuration.Http.Enabled && Configuration.Http.Address == nil {
116 | 		fmt.Printf("Aborting: http server enabled, but no address specified\n")
117 | 		os.Exit(1)
118 | 	}
119 | 
120 | 	if Configuration.Capnp.Enabled && Configuration.Capnp.Port == nil {
121 | 		fmt.Printf("Aborting: capn proto server enabled, but no port specified\n")
122 | 		os.Exit(1)
123 | 	}
124 | 
125 | 	if Configuration.Capnp.Enabled && Configuration.Capnp.Address == nil {
126 | 		fmt.Printf("Aborting: capn proto server enabled, but no address specified\n")
127 | 		os.Exit(1)
128 | 	}
129 | 
130 | 	if Configuration.Coalescence.Earlytrip == nil {
131 | 		fmt.Printf("Aborting: transaction coalescence early trip object count not set\n")
132 | 		os.Exit(1)
133 | 	}
134 | 
135 | 	if Configuration.Coalescence.Interval == nil {
136 | 		fmt.Printf("Aborting: transaction coalescence commit interval not set\n")
137 | 		os.Exit(1)
138 | 	}
139 | 
140 | 	Params = map[string]string{
141 | 		"mongoserver": *Configuration.Mongo.Server,
142 | 		"provider":    Configuration.Storage.Provider,
143 | 		"cachesize":   strconv.FormatInt(int64(Configuration.Cache.BlockCache), 10),
144 | 		"collection":  *Configuration.Mongo.Collection,
145 | 	}
146 | 	if Configuration.Storage.Provider == "ceph" {
147 | 		Params["cephconf"] = *Configuration.Storage.Cephconf
148 | 		Params["cephpool"] = *Configuration.Storage.Cephpool
149 | 		Params["cephrcache"] = strconv.FormatInt(int64(*Configuration.Cache.RadosReadCache), 10)
150 | 		Params["cephwcache"] = strconv.FormatInt(int64(*Configuration.Cache.RadosWriteCache), 10)
151 | 	}
152 | 	if Configuration.Storage.Provider == "file" {
153 | 		Params["dbpath"] = *Configuration.Storage.Filepath
154 | 	}
155 | 
156 | 	fmt.Printf("Configuration OK!\n")
157 | }
158 | 


--------------------------------------------------------------------------------
/qci/test_readstat1.ipy:
--------------------------------------------------------------------------------
  1 | 
  2 | import qdf
  3 | import qdf.quasar
  4 | import sys
  5 | import random
  6 | import uuid
  7 | import time
  8 | import numpy as np
  9 | from twisted.internet import defer, protocol, reactor
 10 | print "entered test readwrite1"
 11 | EXIT_CODE = None
 12 | def setexit(code):
 13 |     global EXIT_CODE
 14 |     EXIT_CODE = code
 15 |     reactor.stop()
 16 | 
 17 | def statify(data, pw, starttime, endtime):
 18 |     rv = {}
 19 |     mask = ~((1<<pw)-1)
 20 |     for pr in data:
 21 |         time, value = pr
 22 |         bucket = time & mask
 23 |         if bucket not in rv:
 24 |             rv[bucket] = []
 25 |         rv[bucket].append(value)
 26 |     times = sorted(rv.keys())
 27 | 
 28 |     # Here is a statement about starttime and endtime for statistical queries:
 29 |     # starttime is inclusive, so is endtime
 30 |     rvlist = []
 31 |     for t in times:
 32 |         _min = min(rv[t])
 33 |         _mean = np.mean(rv[t])
 34 |         _max = max(rv[t])
 35 |         _count = len(rv[t])
 36 |         if (starttime & mask) <= t < (endtime & mask):
 37 |             rvlist.append([t, _min, _mean, _max, _count])
 38 |     return rvlist
 39 | 
 40 | @defer.inlineCallbacks
 41 | def testbody(db):
 42 |     print "connected"
 43 |     TOTALPOINTS = 1000000
 44 |     PERINSERT = 1000
 45 |     INTERVAL = int(1E9/120.)
 46 |     ENDTIME = 0 + TOTALPOINTS*INTERVAL
 47 | 
 48 |     UID = str(uuid.uuid4())
 49 |     data = [(x*INTERVAL, random.random()) for x in xrange(TOTALPOINTS)]
 50 |     idx = 0
 51 |     # == insert the data
 52 |     for i in xrange(TOTALPOINTS/PERINSERT):
 53 |         yield db.insertValues(UID, data[idx:idx+PERINSERT])
 54 |         idx += PERINSERT
 55 |     time.sleep(20)
 56 | 
 57 |     def idx_to_time(idx):
 58 |         return data[idx][0]
 59 | 
 60 |     # == read statistical reps
 61 |     for (p_pw, p_st, p_et) in [(22, idx_to_time(100), idx_to_time(TOTALPOINTS*2/3)),
 62 |                                (27, idx_to_time(100), idx_to_time(TOTALPOINTS*2/3))]:
 63 |         srep = []
 64 |         (status, rv) = yield db.queryStatisticalValues(UID, p_st, p_et, p_pw)
 65 |         print "status: ", status
 66 |         (version, values) = rv
 67 |         for v in values:
 68 |             srep.append([v.time, v.min, v.mean, v.max, v.count])
 69 | 
 70 |         # compare to softcalculated stats
 71 |         expected = statify(data, p_pw, p_st, p_et)
 72 |         print "p_st:",p_st
 73 |         print "p_et:",p_et
 74 |         print "len expected: ", len(expected)
 75 |         print "len recvd: ", len(srep)
 76 |         print "last expected:",expected[-1]
 77 |         print "last recvd:",srep[-1]
 78 |         if len(srep) != len(expected):
 79 |             print "LENGTH MISMATCH"
 80 |         for i in xrange(len(expected)):
 81 |             if expected[i] != srep[i]:
 82 |                 print "tuple mismatch at",i
 83 |                 print "expected:", expected[i]
 84 |                 print "recvd:", srep[i]
 85 |                 setexit(1)
 86 |                 return
 87 | 
 88 |     #delete middle 1/3 of data
 89 |     st = data[len(data)/3][0]
 90 |     et = data[2*len(data)/3][0]
 91 |     (status, rv) = yield db.deleteRange(UID, st, et)
 92 |     #also delete it from our data
 93 |     ndat = data[0:len(data)/3] #exlusive
 94 |     ndat += data[2*len(data)/3:] #inclusive
 95 |     data = ndat
 96 |     print "RUNNING POST-DELETE STAT CHECK"
 97 |     # == read statistical reps
 98 |     for (p_pw, p_st, p_et) in [(22, idx_to_time(100), idx_to_time(TOTALPOINTS-400000)),
 99 |                                (27, idx_to_time(100), idx_to_time(TOTALPOINTS-400000))]:
100 |         srep = []
101 |         (status, rv) = yield db.queryStatisticalValues(UID, p_st, p_et, p_pw)
102 |         print "status: ", status
103 |         (version, values) = rv
104 |         for v in values:
105 |             srep.append([v.time, v.min, v.mean, v.max, v.count])
106 | 
107 |         # compare to softcalculated stats
108 |         expected = statify(ndat, p_pw, p_st, p_et)
109 |         print "len expected: ", len(expected)
110 |         print "len recvd: ", len(srep)
111 |         if len(srep) != len(expected):
112 |             print "LENGTH MISMATCH"
113 |             setexit(1)
114 |             return
115 |         for i in xrange(len(expected)):
116 |             if expected[i] != srep[i]:
117 |                 print "tuple mismatch2 at",i
118 |                 print "expected:", expected[i]
119 |                 print "recvd:", srep[i]
120 |                 setexit(1)
121 |                 return
122 | 
123 |     print "All correct"
124 |     setexit(0)
125 |     return    
126 | 
127 | def onFail(param):
128 |     print "Encountered error: ", param
129 |     setexit(2)
130 | 
131 | def entrypoint():
132 |     print "in entrypoint"
133 |     try:
134 |         q = qdf.quasar.connectToArchiver("localhost", 4410)
135 |         q.addCallback(testbody)
136 |         q.addErrback(onFail)
137 |     except Exception as e:
138 |         print "ex: ",e
139 |         setexit(1)
140 | 
141 | reactor.callWhenRunning(entrypoint)
142 | reactor.run()
143 | if EXIT_CODE == None:
144 |     EXIT_CODE = 42
145 | if EXIT_CODE != 0:
146 |     sys.exit(EXIT_CODE)
147 | else:
148 |     !rm FAILURE
149 | 


--------------------------------------------------------------------------------
/qtree/qtree_test.go:
--------------------------------------------------------------------------------
  1 | package qtree
  2 | 
  3 | import (
  4 | 	"log"
  5 | 	"math/rand"
  6 | 	"testing"
  7 | 
  8 | 	"github.com/pborman/uuid"
  9 | 	"github.com/SoftwareDefinedBuildings/btrdb/internal/bstore"
 10 | )
 11 | 
 12 | var _bs *bstore.BlockStore = nil
 13 | 
 14 | func mBS() {
 15 | 	if _bs == nil {
 16 | 		params := map[string]string{
 17 | 			"dbpath":      "/srv/quasartestdb/",
 18 | 			"mongoserver": "localhost",
 19 | 			"cachesize":   "5000",
 20 | 		}
 21 | 		nbs, err := bstore.NewBlockStore(params)
 22 | 		if err != nil {
 23 | 			log.Panic(err)
 24 | 		}
 25 | 		_bs = nbs
 26 | 	}
 27 | }
 28 | 
 29 | func TestTreeSWrite(t *testing.T) {
 30 | 	//t.SkipNow()
 31 | 	mBS()
 32 | 	testuuid := uuid.NewRandom()
 33 | 
 34 | 	tr, err := NewWriteQTree(_bs, testuuid)
 35 | 	if err != nil {
 36 | 		t.Error(err)
 37 | 	}
 38 | 	records := []Record{Record{1, 1}, Record{2, 2}, Record{3, 3}}
 39 | 	tr.InsertValues(records)
 40 | 	tr.Commit()
 41 | 
 42 | 	tr, err = NewReadQTree(_bs, testuuid, bstore.LatestGeneration)
 43 | 	if err != nil {
 44 | 		log.Panic(err)
 45 | 	}
 46 | 	recordc := make(chan Record)
 47 | 	errc := make(chan error)
 48 | 	log.Printf("beginning chan select")
 49 | 	rv := make([]Record, 0, 5)
 50 | 	go tr.ReadStandardValuesCI(recordc, errc, -1, 8)
 51 | 	for {
 52 | 		select {
 53 | 		case r, r_c := <-recordc:
 54 | 			rv = append(rv, r)
 55 | 			if !r_c {
 56 | 				break
 57 | 			}
 58 | 		case err, err_c := <-errc:
 59 | 			if err != nil {
 60 | 				t.Error(err)
 61 | 				return
 62 | 			}
 63 | 			if !err_c {
 64 | 				return
 65 | 			}
 66 | 		}
 67 | 	}
 68 | 
 69 | 	for i, v := range rv {
 70 | 		if v != records[i] {
 71 | 			t.Fail()
 72 | 		}
 73 | 	}
 74 | }
 75 | 
 76 | func LoadWTree(id uuid.UUID) *QTree {
 77 | 	mBS()
 78 | 	tr, err := NewWriteQTree(_bs, id)
 79 | 	if err != nil {
 80 | 		log.Panic(err)
 81 | 	}
 82 | 	return tr
 83 | }
 84 | func GenData(s int64, e int64, avgTimeBetweenSamples uint64,
 85 | 	spread uint64, dat func(int64) float64) []Record {
 86 | 	if avgTimeBetweenSamples == 0 {
 87 | 		panic("lolwut")
 88 | 	}
 89 | 	if e <= s {
 90 | 		panic("s<=e")
 91 | 	}
 92 | 	log.Printf("e %v s %v avt %v", s, e, avgTimeBetweenSamples)
 93 | 	p3 := uint64((e-s))/avgTimeBetweenSamples + 100
 94 | 	log.Printf("p3: ", p3)
 95 | 	rv := make([]Record, 0, p3)
 96 | 	r := Record{}
 97 | 	for t := s; t < e; {
 98 | 		r.Time = t
 99 | 		r.Val = dat(t)
100 | 		rv = append(rv, r)
101 | 		nt := t + int64(avgTimeBetweenSamples)
102 | 		if spread != 0 {
103 | 			nt -= int64(spread / 2)
104 | 			nt += rand.Int63n(int64(spread))
105 | 		}
106 | 		if nt > t {
107 | 			t = nt
108 | 		}
109 | 	}
110 | 	return rv
111 | }
112 | 
113 | func MakeWTree() (*QTree, uuid.UUID) {
114 | 	id := uuid.NewRandom()
115 | 	mBS()
116 | 	tr, err := NewWriteQTree(_bs, id)
117 | 	if err != nil {
118 | 		log.Panic(err)
119 | 	}
120 | 	return tr, id
121 | }
122 | func CompareData(lhs []Record, rhs []Record) {
123 | 	if len(lhs) != len(rhs) {
124 | 		log.Panic("lhs != rhs len")
125 | 	}
126 | 	for i, v := range lhs {
127 | 		if rhs[i] != v {
128 | 			log.Panic("data differs")
129 | 		}
130 | 	}
131 | }
132 | func TestTreeSWriteLarge(t *testing.T) {
133 | 	mBS()
134 | 	testuuid := uuid.NewRandom()
135 | 	tr, err := NewWriteQTree(_bs, testuuid)
136 | 	log.Printf("Generated tree %v", testuuid.String())
137 | 	if err != nil {
138 | 		t.Error(err)
139 | 	}
140 | 	log.Printf("Generating dummy records")
141 | 	records := GenData(0, 40*DAY, HOUR, 2*MINUTE, func(t int64) float64 {
142 | 		return float64(t)
143 | 	})
144 | 	log.Printf("We generated %v records", len(records))
145 | 
146 | 	tr.InsertValues(records)
147 | 	tr.Commit()
148 | 
149 | 	tr, err = NewReadQTree(_bs, testuuid, bstore.LatestGeneration)
150 | 	if err != nil {
151 | 		log.Panic(err)
152 | 	}
153 | 	rrec, err := tr.ReadStandardValuesBlock(0, 40*DAY+2*MINUTE)
154 | 	if err != nil {
155 | 		log.Panic(err)
156 | 	}
157 | 	log.Printf("We read %v records", len(rrec))
158 | 	if len(rrec) != len(records) {
159 | 		t.FailNow()
160 | 	}
161 | 	for i := 0; i < len(rrec); i++ {
162 | 		if records[i].Time != rrec[i].Time ||
163 | 			records[i].Val != rrec[i].Val {
164 | 			t.FailNow()
165 | 		}
166 | 		//log.Printf("[%5d] w=%v r=%v d=%v", i, records[i].Time, rrec[i].Time,
167 | 		//	int64(records[i].Time- rrec[i].Time))
168 | 	}
169 | 
170 | }
171 | 
172 | func BenchmarkMultiSWrite(b *testing.B) {
173 | 	mBS()
174 | 	testuuid := uuid.NewRandom()
175 | 	log.Printf("MultiSWrite is using %v", testuuid.String())
176 | 	log.Printf("Generating dummy records")
177 | 	records := GenData(0, 1*DAY, SECOND, 100*MILLISECOND, func(t int64) float64 {
178 | 		return float64(t)
179 | 	})
180 | 	log.Printf("We generated %v records, randomizing a copy", len(records))
181 | 	rec_copy_orig := make([]Record, len(records))
182 | 	perm := rand.Perm(len(records))
183 | 	for i, v := range perm {
184 | 		rec_copy_orig[v] = records[i]
185 | 	}
186 | 	b.ResetTimer()
187 | 	for iter := 0; iter < b.N; iter++ {
188 | 		rec_copy := make([]Record, len(rec_copy_orig))
189 | 		copy(rec_copy, rec_copy_orig)
190 | 		iperstage := 4000
191 | 		idx := 0
192 | 		for {
193 | 			tr, err := NewWriteQTree(_bs, testuuid)
194 | 			if err != nil {
195 | 				b.Error(err)
196 | 			}
197 | 			end := idx + iperstage
198 | 			if end > len(rec_copy) {
199 | 				end = len(rec_copy)
200 | 			}
201 | 			tr.InsertValues(rec_copy[idx:end])
202 | 			tr.Commit()
203 | 			idx = end
204 | 			if idx == len(rec_copy) {
205 | 				break
206 | 			}
207 | 		}
208 | 		/*
209 | 			//Read back the records
210 | 			tr, err := NewReadQTree(_bs, testuuid, bstore.LatestGeneration)
211 | 			if err != nil {
212 | 				log.Panic(err)
213 | 			}
214 | 			rrec, err := tr.ReadStandardValuesBlock(0, 40*DAY+2*MINUTE)
215 | 			if err != nil {
216 | 				log.Panic(err)
217 | 			}
218 | 		*/
219 | 	}
220 | }
221 | func TestTreeMultiSWrite(t *testing.T) {
222 | 	mBS()
223 | 	testuuid := uuid.NewRandom()
224 | 	log.Printf("MultiSWrite is going into %v", testuuid.String())
225 | 	log.Printf("Generating dummy records")
226 | 	records := GenData(0, 1*HOUR, 1*MINUTE, 2*SECOND, func(t int64) float64 {
227 | 		return float64(t)
228 | 	})
229 | 	log.Printf("We generated %v records, randomizing a copy", len(records))
230 | 	rec_copy := make([]Record, len(records))
231 | 	perm := rand.Perm(len(records))
232 | 	for i, v := range perm {
233 | 		rec_copy[v] = records[i]
234 | 	}
235 | 	iperstage := 30
236 | 	idx := 0
237 | 	for {
238 | 		tr, err := NewWriteQTree(_bs, testuuid)
239 | 		if err != nil {
240 | 			t.Error(err)
241 | 		}
242 | 		end := idx + iperstage
243 | 		if end > len(rec_copy) {
244 | 			end = len(rec_copy)
245 | 		}
246 | 		tr.InsertValues(rec_copy[idx:end])
247 | 		tr.root.PrintCounts(2)
248 | 		tr.Commit()
249 | 		idx = end
250 | 		if idx == len(rec_copy) {
251 | 			break
252 | 		}
253 | 	}
254 | 
255 | 	//Read back the records
256 | 	tr, err := NewReadQTree(_bs, testuuid, bstore.LatestGeneration)
257 | 	if err != nil {
258 | 		log.Panic(err)
259 | 	}
260 | 	rrec, err := tr.ReadStandardValuesBlock(0, 40*DAY+2*MINUTE)
261 | 	if err != nil {
262 | 		log.Panic(err)
263 | 	}
264 | 	//Verify we have the same number (for now)
265 | 	log.Printf("wrote %v, read %v", len(records), len(rrec))
266 | 	tr.root.PrintCounts(0)
267 | 	if len(records) != len(rrec) {
268 | 		t.FailNow()
269 | 	}
270 | }
271 | 


--------------------------------------------------------------------------------
/cpinterface/interface.capnp:
--------------------------------------------------------------------------------
  1 | using Go = import "go.capnp";
  2 | $Go.package("cpinterface");
  3 | $Go.import("github.com/SoftwareDefinedBuildings/btrdb/cpinterface");
  4 | 
  5 | @0x85360901bcc4bed2;
  6 | 
  7 | ###
  8 | # Request type, each request gives back exactly one response
  9 | ###
 10 | struct Request {
 11 |     # This will be added to the response, so that requests can be mapped
 12 |     # to responses as they can come back out of order.
 13 |     echoTag     @0 : UInt64;
 14 |     union {
 15 |         void                    @1 : Void;
 16 |         queryStandardValues     @2 : CmdQueryStandardValues;
 17 |         queryStatisticalValues  @3 : CmdQueryStatisticalValues;
 18 |         queryWindowValues       @9 : CmdQueryWindowValues;
 19 |         queryVersion            @4 : CmdQueryVersion;
 20 |         queryNearestValue       @5 : CmdQueryNearestValue;
 21 |         queryChangedRanges      @6 : CmdQueryChangedRanges;
 22 |         insertValues            @7 : CmdInsertValues;
 23 |         deleteValues            @8 : CmdDeleteValues;
 24 |     }
 25 | }
 26 | 
 27 | # The basic record type. Times are measured in nanoseconds
 28 | # since the Epoch. At the time of writing, BTrDB is only
 29 | # capable of storing dates from approx 1935 to 2078...
 30 | struct Record {
 31 |     time    @0 : Int64;
 32 |     value   @1 : Float64;
 33 | }
 34 | 
 35 | # Query pre-aggregated statistical records from the database.
 36 | # these are particularly useful for plotting applications
 37 | # and locating where data is.
 38 | struct StatisticalRecord {
 39 |     time        @0 : Int64;
 40 |     count       @1 : UInt64;
 41 |     min         @2 : Float64;
 42 |     mean        @3 : Float64;
 43 |     max         @4 : Float64;
 44 | }
 45 | 
 46 | # Query from startTime (inclusive) to endTime (exclusive) in
 47 | # nanoseconds.
 48 | # If you want consistent values over a series of
 49 | # reads, or you wish to view a stream as it was in the past
 50 | # then you can specify a nonzero version. Repeating a query
 51 | # with the same version is guaranteed to return the same results
 52 | # irrespective of any deletes or adds that take place.
 53 | # returns many RecordLists
 54 | struct CmdQueryStandardValues {
 55 |     uuid        @0 : Data;
 56 |     version     @1 : UInt64;
 57 |     startTime   @2 : Int64;
 58 |     endTime     @3 : Int64;
 59 | }
 60 | 
 61 | 
 62 | # Query from startTime (inclusive) to endTime (exclusive) in
 63 | # nanoseconds. Note that both of those times will be rounded
 64 | # down if they have set bits in the bottom pointWidth bits.
 65 | # pointWidth is the log of the number of records to aggregate
 66 | # per result. A PW of 30 therefore means (1<<30) ns per record
 67 | # which is about a second.
 68 | # If you want consistent values over a series of
 69 | # reads, or you wish to view a stream as it was in the past
 70 | # then you can specify a nonzero version
 71 | # returns many StatisticalRecordLists
 72 | struct CmdQueryStatisticalValues {
 73 |     uuid        @0 : Data;
 74 |     version     @1 : UInt64;
 75 |     startTime   @2 : Int64;
 76 |     endTime     @3 : Int64;
 77 |     pointWidth  @4 : UInt8;
 78 | }
 79 | 
 80 | # Query from startTime (inclusive) to endTime (exclusive) in
 81 | # nanoseconds. Aggregate windows with an end time less than or equal
 82 | # to endTime will be returned. Windows start from exactly startTime and
 83 | # increase by Width. Leap seconds etc are your problem. The depth
 84 | # (currently unimplemented) represents the minimum PW to descend to
 85 | # while computing windows.
 86 | # If you want consistent values over a series of
 87 | # reads, or you wish to view a stream as it was in the past
 88 | # then you can specify a nonzero version
 89 | # returns many StatisticalRecordLists
 90 | struct CmdQueryWindowValues {
 91 |     uuid        @0 : Data;
 92 |     version     @1 : UInt64;
 93 |     startTime   @2 : Int64;
 94 |     endTime     @3 : Int64;
 95 |     width       @4 : UInt64;
 96 |     depth       @5 : UInt8;
 97 | }
 98 | 
 99 | # For every UUID given, return the current version and last
100 | # modified time of the stream.
101 | # returns VersionList
102 | struct CmdQueryVersion {
103 |     uuids       @0 : List(Data);
104 | }
105 | 
106 | # Query the next (or previous if backward=true) value in the
107 | # stream, starting from time.
108 | # returns a RecordList
109 | struct CmdQueryNearestValue {
110 |     uuid        @0 : Data;
111 |     version		  @1 : UInt64;
112 |     time        @2 : Int64;
113 |     backward    @3 : Bool;
114 | }
115 | 
116 | # For the given UUID, return all the time ranges that have
117 | # changed between the given generations. toGeneration is
118 | # not included. Note that depending on how full the stream is,
119 | # the returned result may be rounded off. A sparsely populated
120 | # stream returns less accurate results than a densely populated
121 | # one.
122 | # returns many RangeLists
123 | struct CmdQueryChangedRanges {
124 |     uuid            @0 : Data;
125 |     fromGeneration  @1 : UInt64;
126 |     toGeneration    @2 : UInt64;
127 |     unused          @3 : UInt64;
128 |     resolution      @4 : UInt8;
129 | }
130 | 
131 | # Insert values. If sync is true, the database will flush the
132 | # results to disk before returning success. Please PLEASE don't
133 | # use that without seriously considering if you need it, as it
134 | # disables transaction coalescence and reduces performance
135 | # by several orders of magnitude.
136 | # returns Void
137 | struct CmdInsertValues {
138 |     uuid        @0 : Data;
139 |     values      @1 : List(Record);
140 |     sync        @2 : Bool;
141 | }
142 | 
143 | # Delete the values between the given times.
144 | # returns Void
145 | struct CmdDeleteValues {
146 |     uuid        @0 : Data;
147 |     startTime   @1 : Int64;
148 |     endTime     @2 : Int64;
149 | }
150 | 
151 | ###
152 | # Response type
153 | ###
154 | struct Response {
155 |     echoTag                     @0 : UInt64;
156 |     statusCode                  @1 : StatusCode;
157 |     final                       @2 : Bool;
158 |     union {
159 |         void                    @3 : Void;
160 |         records                 @4 : Records;
161 |         statisticalRecords   	  @5 : StatisticalRecords;
162 |         versionList             @6 : Versions;
163 |         changedRngList          @7 : Ranges;
164 |     }
165 | }
166 | 
167 | # Contains all the error codes that are emitted by Quasar
168 | enum StatusCode {
169 |     ok                      @0;
170 | 
171 |     # Returned (ATM) for almost everything
172 |     internalError           @1;
173 | 
174 |     # Returned for a bad UUID or a bad version
175 |     noSuchStreamOrVersion   @2;
176 | 
177 |     # Returned for a bad parameter, like time range
178 |     invalidParameter        @3;
179 | 
180 |     # Returned from nearest value when it doesn't exist
181 |     noSuchPoint				@4;
182 | }
183 | 
184 | # Contains a list of records, and the version of the stream
185 | # used to satisfy the request.
186 | struct Records {
187 |     version  @0 : UInt64;
188 |     values   @1 : List(Record);
189 | }
190 | 
191 | # Contains a list of statistical records and the version of
192 | # the stream used to satisfy the request.
193 | struct StatisticalRecords {
194 |     version @0 : UInt64;
195 |     values  @1 : List(StatisticalRecord);
196 | }
197 | 
198 | # Contains the latest version numbers for the requested
199 | # streams
200 | struct Versions {
201 |     uuids       @0 : List(Data);
202 |     versions    @1 : List(UInt64);
203 | }
204 | 
205 | # Represents a range of time that has been changed
206 | struct ChangedRange {
207 |     startTime       @0 : Int64;
208 |     endTime         @1 : Int64;
209 | }
210 | 
211 | # Response to the QueryChangedRanges
212 | struct Ranges {
213 |     version			@0 : UInt64;
214 |     values      @1 : List(ChangedRange);
215 | }
216 | 


--------------------------------------------------------------------------------
/internal/cephprovider/cephprovider.c:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | #include <stdio.h>
  4 | #include <errno.h>
  5 | #include <stdlib.h>
  6 | #include <stdint.h>
  7 | #include "cephprovider.h"
  8 | #include <sys/time.h>
  9 | #include <inttypes.h>
 10 | 
 11 | #define ADDR_LOCK_SIZE 0x1000000000
 12 | #define COMP_CAP_STEP 64
 13 | #define OID_SIZE 43 //32 for uuid, 10 for id, 1 for nul
 14 | 
 15 | rados_t cluster;
 16 | char* pool;
 17 | 
 18 | const char nibbles [] = {'0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f'};
 19 | 
 20 | void make_object_id(uint8_t *uuid, uint64_t address, char* dest)
 21 | {
 22 | 	int i;
 23 | 	int dp;
 24 | 	for (i=0;i<16;i++)
 25 | 	{
 26 | 		int nibble;
 27 | 		dest[i*2] 	= nibbles[uuid[i]>>4];
 28 | 		dest[i*2+1] = nibbles[uuid[i]&0xF];
 29 | 	}
 30 | 	for (i=0;i<10;i++)
 31 | 	{
 32 | 		dest[32+i] = nibbles[address >> (4*(9-i)) & 0xF];
 33 | 	}
 34 | 	dest[OID_SIZE-1] = 0;
 35 | }
 36 | 
 37 | void initialize_provider(const char* conffile, const char* cephpool)
 38 | {
 39 | 	int err;
 40 | 	err = rados_create(&cluster, NULL);
 41 | 	if (err < 0)
 42 | 	{
 43 | 		fprintf(stderr, "could not create RADOS cluster handle\n");
 44 | 		errno = -err;
 45 | 		return;
 46 | 	}
 47 | 
 48 | 	err = rados_conf_read_file(cluster, conffile);
 49 | 	if (err < 0)
 50 | 	{
 51 | 		fprintf(stderr, "could not create load ceph conf\n");
 52 | 		errno = -err;
 53 | 		return;
 54 | 	}
 55 | 
 56 | 	err = rados_connect(cluster);
 57 | 	if (err < 0)
 58 | 	{
 59 | 		fprintf(stderr, "could not create connect to cluster\n");
 60 | 		errno = -err;
 61 | 		return;
 62 | 	}
 63 | 
 64 | 	pool = (char*) malloc(strlen(cephpool)+1);
 65 | 	strcpy(pool, cephpool);
 66 | 
 67 | 	errno = 0;
 68 | }
 69 | 
 70 | cephprovider_handle_t* handle_create()
 71 | {
 72 | 	int err;
 73 | 	cephprovider_handle_t *rv = (cephprovider_handle_t*) malloc(sizeof(cephprovider_handle_t));
 74 | 	rv->comps = (rados_completion_t*) malloc(sizeof(rados_completion_t) *COMP_CAP_STEP);
 75 | 	rv->comp_cap = COMP_CAP_STEP;
 76 | 	rv->comp_len = 0;
 77 | 
 78 | 	err = rados_ioctx_create(cluster, pool, &rv->ctx);
 79 | 	if (err < 0)
 80 | 	{
 81 | 		fprintf(stderr, "could not create io context\n");
 82 | 		errno = -err;
 83 | 		rados_ioctx_destroy(rv->ctx);
 84 | 		free(rv);
 85 | 		return NULL;
 86 | 	}
 87 | 	errno = 0;
 88 | 	return rv;
 89 | }
 90 | 
 91 | void handle_write(cephprovider_handle_t *h, uint8_t *uuid, uint64_t address, const char *data, int len, int trunc)
 92 | {
 93 | 	//The ceph provider uses 24 bits of address per object, and the top 40 bits as an object ID
 94 | 	int offset = address & 0xFFFFFF;
 95 | 	uint64_t id = address >> 24;
 96 | 	int err;
 97 | 	char oid [OID_SIZE];
 98 | 	make_object_id(uuid, id, &oid[0]);
 99 | 	if (trunc)
100 | 	{
101 | 		err = rados_trunc(h->ctx, oid, len + offset);
102 | 		if (err < 0)
103 | 		{
104 | 			fprintf(stderr, "could not trunc\n");
105 | 			errno = -err;
106 | 			return;
107 | 		}
108 | 	}
109 | 	//Check we have a completion we can use
110 | 	if (h->comp_len == h->comp_cap)
111 | 	{
112 | 		h->comp_cap += COMP_CAP_STEP;
113 | 		h->comps = realloc(h->comps, (h->comp_cap * sizeof(rados_completion_t)));
114 | 		if (!h->comps)
115 | 		{
116 | 			return;
117 | 		}
118 | 	}
119 | 	err = rados_aio_create_completion(NULL, NULL, NULL, &(h->comps[h->comp_len]));
120 | 	if (err < 0)
121 | 	{
122 | 		fprintf(stderr, "could not create completion\n");
123 | 		errno = -err;
124 | 		return;
125 | 	}
126 | 	err = rados_aio_write(h->ctx, oid, h->comps[h->comp_len], data, len, offset);
127 | 	if (err < 0)
128 | 	{
129 | 		fprintf(stderr, "could not aio write\n");
130 | 		errno = -err;
131 | 		return;
132 | 	}
133 | 	h->comp_len++;
134 | 	errno = 0;
135 | }
136 | 
137 | int handle_read(cephprovider_handle_t *h, uint8_t *uuid, uint64_t address, char* dest, int len)
138 | {
139 | 	//The ceph provider uses 24 bits of address per object, and the top 40 bits as an object ID
140 | 	int offset = address & 0xFFFFFF;
141 | 	uint64_t id = address >> 24;
142 | 	int rv;
143 | 	char oid [OID_SIZE];
144 | 	make_object_id(uuid, id, &oid[0]);
145 | 	rv = rados_read(h->ctx, oid, dest, len, offset);
146 | 	if (rv < 0)
147 | 	{
148 | 		fprintf(stderr, "could not read %s\n", oid);
149 | 		errno = -rv;
150 | 		return -1;
151 | 	}
152 | 	errno = 0;
153 | 	return rv;
154 | }
155 | 
156 | void handle_init_allocator(cephprovider_handle_t *h)
157 | {
158 | 	int err;
159 | 	struct timeval dur;
160 | 	dur.tv_sec = 5;
161 | 	dur.tv_usec = 0;
162 | 	uint64_t addr;
163 | 	if (h->comp_len == h->comp_cap)
164 | 	{
165 | 		h->comp_cap += COMP_CAP_STEP;
166 | 		h->comps = realloc(h->comps, (h->comp_cap * sizeof(rados_completion_t)));
167 | 		if (!h->comps)
168 | 		{
169 | 			errno = -err;
170 | 			return;
171 | 		}
172 | 	}
173 | 	err = rados_aio_create_completion(NULL, NULL, NULL, &(h->comps[h->comp_len]));
174 | 	if (err < 0)
175 | 	{
176 | 		fprintf(stderr, "could not create completion\n");
177 | 		errno = -err;
178 | 		return;
179 | 	}
180 | 
181 | 	err = rados_lock_exclusive(h->ctx, "allocator", "alloc_lock", "main", "alloc", &dur, 0);
182 | 	if (err < 0) {
183 | 		fprintf(stderr, "could not lock allocator\n");
184 | 		errno = -err;
185 | 		return;
186 | 	}
187 | 	addr = 0x1000000; //Not zero!!
188 | 	err = rados_aio_write_full(h->ctx, "allocator", h->comps[h->comp_len], (char *) &addr, 8);
189 | 	if (err < 0) {
190 | 		fprintf(stderr, "could not write allocator\n");
191 | 		errno = -err;
192 | 		return;
193 | 	}
194 | 	rados_aio_wait_for_safe(h->comps[h->comp_len]);
195 | 	err = rados_unlock(h->ctx, "allocator", "alloc_lock", "main");
196 | 	if (err < 0) {
197 | 		fprintf(stderr, "could not unlock allocator\n");
198 | 		errno = -err;
199 | 		return;
200 | 	}
201 | 	rados_aio_release(h->comps[h->comp_len]);
202 | 	errno = 0;
203 | }
204 | 
205 | //Returns the address of the start of a range that can be
206 | //used
207 | uint64_t handle_obtainrange(cephprovider_handle_t *h)
208 | {
209 | 	int err;
210 | 	int rv;
211 | 	int then;
212 | 	struct timeval dur;
213 | 	dur.tv_sec = 60;
214 | 	dur.tv_usec = 0;
215 | 	uint64_t addr;
216 | 	if (h->comp_len == h->comp_cap)
217 | 	{
218 | 		h->comp_cap += COMP_CAP_STEP;
219 | 		h->comps = realloc(h->comps, (h->comp_cap * sizeof(rados_completion_t)));
220 | 		if (!h->comps)
221 | 		{
222 | 			errno = -err;
223 | 			return 0;
224 | 		}
225 | 	}
226 | 	err = rados_aio_create_completion(NULL, NULL, NULL, &(h->comps[h->comp_len]));
227 | 	if (err < 0)
228 | 	{
229 | 		fprintf(stderr, "could not create completion\n");
230 | 		errno = -err;
231 | 		return 0;
232 | 	}
233 | 	then = (int) time();
234 | 	while((int)time() - then < 60)
235 | 	{
236 | 		err = rados_lock_exclusive(h->ctx, "allocator", "alloc_lock", "main", "alloc", &dur, 0);
237 | 		if (err == 0) {
238 | 			break;
239 | 		}
240 | 	}
241 | 	if (err < 0) {
242 | 		fprintf(stderr, "could not lock allocator\n");
243 | 		errno = -err;
244 | 		return 0;
245 | 	}
246 | 	rv = rados_read(h->ctx, "allocator", (char *) &addr, 8, 0);
247 | 	if (rv < 0 || rv != 8) {
248 | 		fprintf(stderr, "could not read allocator\n");
249 | 		errno = -err;
250 | 		return 0;
251 | 	}
252 | 	printf("read allocation 0x%016" PRIx64 "\n",addr);
253 | 	addr += ADDR_LOCK_SIZE;
254 | 	printf("writing allocation 0x%016" PRIx64 "\n",addr);
255 | 	err = rados_aio_write_full(h->ctx, "allocator", h->comps[h->comp_len], (char *) &addr, 8);
256 | 	if (err < 0) {
257 | 		fprintf(stderr, "could not write allocator\n");
258 | 		errno = -err;
259 | 		return 0;
260 | 	}
261 | 	rados_aio_wait_for_safe(h->comps[h->comp_len]);
262 | 	err = rados_unlock(h->ctx, "allocator", "alloc_lock", "main");
263 | 	if (err < 0) {
264 | 		fprintf(stderr, "could not unlock allocator\n");
265 | 		errno = -err;
266 | 		return 0;
267 | 	}
268 | 	rados_aio_release(h->comps[h->comp_len]);
269 | 	errno = 0;
270 | 	printf("Returning %016" PRIx64 "\n", addr - ADDR_LOCK_SIZE);
271 | 	return addr - ADDR_LOCK_SIZE;
272 | }
273 | 
274 | void handle_close(cephprovider_handle_t *h)
275 | {
276 | 	int i;
277 | 	for (i=0; i < h->comp_len; i++)
278 | 	{
279 | 		rados_aio_wait_for_complete(h->comps[i]);
280 | 		rados_aio_release(h->comps[i]);
281 | 	}
282 | 	free(h->comps);
283 | 	rados_ioctx_destroy(h->ctx);
284 | 	free(h);
285 | 
286 | 	errno = 0;
287 | }
288 | 


--------------------------------------------------------------------------------
/internal/fileprovider/fileprovider.go:
--------------------------------------------------------------------------------
  1 | package fileprovider
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"io"
  6 | 	"os"
  7 | 	"sync"
  8 | 
  9 | 	"github.com/SoftwareDefinedBuildings/btrdb/internal/bprovider"
 10 | 	"github.com/op/go-logging"
 11 | )
 12 | 
 13 | var log *logging.Logger
 14 | 
 15 | func init() {
 16 | 	log = logging.MustGetLogger("log")
 17 | }
 18 | 
 19 | const NUMFILES = 256
 20 | 
 21 | type writeparams struct {
 22 | 	Address uint64
 23 | 	Data    []byte
 24 | }
 25 | 
 26 | type FileProviderSegment struct {
 27 | 	sp    *FileStorageProvider
 28 | 	fidx  int
 29 | 	f     *os.File
 30 | 	base  int64
 31 | 	ptr   int64
 32 | 	wchan chan writeparams
 33 | 	wg    sync.WaitGroup
 34 | }
 35 | 
 36 | type FileStorageProvider struct {
 37 | 	fidx     chan int
 38 | 	retfidx  chan int
 39 | 	dbf      []*os.File
 40 | 	dbrf     []*os.File
 41 | 	dbrf_mtx []sync.Mutex
 42 | 	favail   []bool
 43 | }
 44 | 
 45 | func (seg *FileProviderSegment) writer() {
 46 | 
 47 | 	for args := range seg.wchan {
 48 | 		off := int64(args.Address & ((1 << 50) - 1))
 49 | 		lenarr := make([]byte, 2)
 50 | 		lenarr[0] = byte(len(args.Data))
 51 | 		lenarr[1] = byte(len(args.Data) >> 8)
 52 | 		_, err := seg.f.WriteAt(lenarr, off)
 53 | 		if err != nil {
 54 | 			log.Panic("File writing error %v", err)
 55 | 		}
 56 | 		_, err = seg.f.WriteAt(args.Data, off+2)
 57 | 		if err != nil {
 58 | 			log.Panic("File writing error %v", err)
 59 | 		}
 60 | 	}
 61 | 	seg.wg.Done()
 62 | }
 63 | func (seg *FileProviderSegment) init() {
 64 | 	seg.wchan = make(chan writeparams, 16)
 65 | 	seg.wg.Add(1)
 66 | 	go seg.writer()
 67 | }
 68 | 
 69 | //Returns the address of the first free word in the segment when it was locked
 70 | func (seg *FileProviderSegment) BaseAddress() uint64 {
 71 | 	//This seems arbitrary, why not go with the top 8 bits? The reason is this:
 72 | 	//a) this still leaves 1PB per file
 73 | 	//b) The huffman encoding can do 58 bits in 8 bytes, but anything more is 9
 74 | 	//c) if we later decide to more than 256 files, we can
 75 | 	return (uint64(seg.fidx) << 50) + uint64(seg.base)
 76 | }
 77 | 
 78 | //Unlocks the segment for the StorageProvider to give to other consumers
 79 | //Implies a flush
 80 | func (seg *FileProviderSegment) Unlock() {
 81 | 	seg.Flush()
 82 | 	seg.sp.retfidx <- seg.fidx
 83 | }
 84 | 
 85 | //Writes a slice to the segment, returns immediately
 86 | //Returns nil if op is OK, otherwise ErrNoSpace or ErrInvalidArgument
 87 | //It is up to the implementer to work out how to report no space immediately
 88 | //The uint64 rv is the address to be used for the next write
 89 | func (seg *FileProviderSegment) Write(uuid []byte, address uint64, data []byte) (uint64, error) {
 90 | 	//TODO remove
 91 | 	if seg.ptr != int64(address&((1<<50)-1)) {
 92 | 		log.Panic("Pointer does not match address %x vs %x", seg.ptr, int64(address&((1<<50)-1)))
 93 | 	}
 94 | 	wp := writeparams{Address: address, Data: data}
 95 | 	seg.wchan <- wp
 96 | 	seg.ptr = int64(address&((1<<50)-1)) + int64(len(data)) + 2
 97 | 	return uint64(seg.ptr) + (uint64(seg.fidx) << 50), nil
 98 | }
 99 | 
100 | //Block until all writes are complete, not
101 | func (seg *FileProviderSegment) Flush() {
102 | 	close(seg.wchan)
103 | 	seg.wg.Wait()
104 | }
105 | 
106 | //Provide file indices into fidx, does not return
107 | func (sp *FileStorageProvider) provideFiles() {
108 | 	for {
109 | 		//Read all returned files
110 | 	ldretfi:
111 | 		for {
112 | 			select {
113 | 			case fi := <-sp.retfidx:
114 | 				sp.favail[fi] = true
115 | 			default:
116 | 				break ldretfi
117 | 			}
118 | 		}
119 | 
120 | 		//Greedily select file
121 | 		minidx := -1
122 | 		var minv int64 = 0
123 | 		for i := 0; i < NUMFILES; i++ {
124 | 			if !sp.favail[i] {
125 | 				continue
126 | 			}
127 | 			off, err := sp.dbf[i].Seek(0, os.SEEK_CUR)
128 | 			if err != nil {
129 | 				log.Panic(err)
130 | 			}
131 | 			if minidx == -1 || off < minv {
132 | 				minidx = i
133 | 				minv = off
134 | 			}
135 | 		}
136 | 
137 | 		//Return it, or do blocking read if not found
138 | 		if minidx != -1 {
139 | 			sp.favail[minidx] = false
140 | 			sp.fidx <- minidx
141 | 		} else {
142 | 			//Do a blocking read on retfidx to avoid fast spin on nonblocking
143 | 			fi := <-sp.retfidx
144 | 			sp.favail[fi] = true
145 | 		}
146 | 
147 | 	}
148 | }
149 | 
150 | //Called at startup
151 | func (sp *FileStorageProvider) Initialize(opts map[string]string) {
152 | 	//Initialize file indices thingy
153 | 	sp.fidx = make(chan int)
154 | 	sp.retfidx = make(chan int, NUMFILES+1)
155 | 	sp.dbf = make([]*os.File, NUMFILES)
156 | 	sp.dbrf = make([]*os.File, NUMFILES)
157 | 	sp.dbrf_mtx = make([]sync.Mutex, NUMFILES)
158 | 	sp.favail = make([]bool, NUMFILES)
159 | 	for i := 0; i < NUMFILES; i++ {
160 | 		//Open file
161 | 		dbpath, ok := opts["dbpath"]
162 | 		if !ok {
163 | 			log.Panic("Expected dbpath")
164 | 		}
165 | 		fname := fmt.Sprintf("%s/blockstore.%02x.db", dbpath, i)
166 | 		//write file descriptor
167 | 		{
168 | 			f, err := os.OpenFile(fname, os.O_RDWR, 0666)
169 | 			if err != nil && os.IsNotExist(err) {
170 | 				log.Critical("Aborting: seems database does not exist. Have you run `btrdbd -makedb`?")
171 | 				os.Exit(1)
172 | 			}
173 | 			if err != nil {
174 | 				log.Panicf("Problem with blockstore DB: ", err)
175 | 			}
176 | 			sp.dbf[i] = f
177 | 		}
178 | 		//Read file descriptor
179 | 		{
180 | 			f, err := os.OpenFile(fname, os.O_RDONLY, 0666)
181 | 			if err != nil {
182 | 				log.Panicf("Problem with blockstore DB: ", err)
183 | 			}
184 | 			sp.dbrf[i] = f
185 | 		}
186 | 		sp.favail[i] = true
187 | 	}
188 | 	go sp.provideFiles()
189 | 
190 | }
191 | 
192 | // Lock a segment, or block until a segment can be locked
193 | // Returns a Segment struct
194 | func (sp *FileStorageProvider) LockSegment(uuid []byte) bprovider.Segment {
195 | 	//Grab a file index
196 | 	fidx := <-sp.fidx
197 | 	f := sp.dbf[fidx]
198 | 	l, err := f.Seek(0, os.SEEK_END)
199 | 	if err != nil {
200 | 		log.Panicf("Error on lock segment: %v", err)
201 | 	}
202 | 
203 | 	//Construct segment
204 | 	seg := &FileProviderSegment{sp: sp, fidx: fidx, f: sp.dbf[fidx], base: l, ptr: l}
205 | 	seg.init()
206 | 
207 | 	return seg
208 | }
209 | 
210 | //This is the size of a maximal size cblock + header
211 | const FIRSTREAD = 3459
212 | 
213 | func (sp *FileStorageProvider) Read(uuid []byte, address uint64, buffer []byte) []byte {
214 | 	fidx := address >> 50
215 | 	off := int64(address & ((1 << 50) - 1))
216 | 	if fidx > NUMFILES {
217 | 		log.Panic("Encoded file idx too large")
218 | 	}
219 | 	sp.dbrf_mtx[fidx].Lock()
220 | 	nread, err := sp.dbrf[fidx].ReadAt(buffer[:FIRSTREAD], off)
221 | 	if err != nil && err != io.EOF {
222 | 		log.Panic("Non EOF read error: %v", err)
223 | 	}
224 | 	if nread < 2 {
225 | 		log.Panic("Unexpected (very) short read")
226 | 	}
227 | 	//Now we read the blob size
228 | 	bsize := int(buffer[0]) + (int(buffer[1]) << 8)
229 | 	if bsize > nread-2 {
230 | 		_, err := sp.dbrf[fidx].ReadAt(buffer[nread:bsize+2], off+int64(nread))
231 | 		if err != nil {
232 | 			log.Panic("Read error: %v", err)
233 | 		}
234 | 	}
235 | 	sp.dbrf_mtx[fidx].Unlock()
236 | 	return buffer[2 : bsize+2]
237 | }
238 | 
239 | //Called to create the database for the first time
240 | func (sp *FileStorageProvider) CreateDatabase(opts map[string]string) error {
241 | 	for i := 0; i < NUMFILES; i++ {
242 | 		//Open file
243 | 		dbpath, ok := opts["dbpath"]
244 | 		if !ok {
245 | 			log.Panicf("Expected dbpath")
246 | 		}
247 | 		fname := fmt.Sprintf("%s/blockstore.%02x.db", dbpath, i)
248 | 		//write file descriptor
249 | 		{
250 | 			f, err := os.OpenFile(fname, os.O_RDWR|os.O_CREATE|os.O_EXCL, 0666)
251 | 			if err != nil && !os.IsExist(err) {
252 | 				log.Panicf("Problem with blockstore DB: ", err)
253 | 			} else if os.IsExist(err) {
254 | 				return bprovider.ErrExists
255 | 			}
256 | 			//Add a file tag
257 | 			//An exercise left for the reader: if you remove this, everything breaks :-)
258 | 			//Hint: what is the physical address of the first byte of file zero?
259 | 			_, err = f.Write([]byte("QUASARDB"))
260 | 			if err != nil {
261 | 				log.Panicf("Could not write to blockstore:", err)
262 | 			}
263 | 
264 | 			err = f.Close()
265 | 			if err != nil {
266 | 				log.Panicf("Error on close %v", err)
267 | 			}
268 | 		}
269 | 	}
270 | 	return nil
271 | }
272 | 


--------------------------------------------------------------------------------
/quasar.go:
--------------------------------------------------------------------------------
  1 | package btrdb
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"sync"
  6 | 	"time"
  7 | 
  8 | 	"github.com/pborman/uuid"
  9 | 	"github.com/SoftwareDefinedBuildings/btrdb/internal/bstore"
 10 | 	"github.com/SoftwareDefinedBuildings/btrdb/qtree"
 11 | 	"github.com/op/go-logging"
 12 | )
 13 | 
 14 | var log *logging.Logger
 15 | 
 16 | func init() {
 17 | 	log = logging.MustGetLogger("log")
 18 | }
 19 | 
 20 | type openTree struct {
 21 | 	store []qtree.Record
 22 | 	id    uuid.UUID
 23 | 	sigEC chan bool
 24 | }
 25 | 
 26 | const MinimumTime = -(16 << 56)
 27 | const MaximumTime = (48 << 56)
 28 | const LatestGeneration = bstore.LatestGeneration
 29 | 
 30 | type Quasar struct {
 31 | 	cfg QuasarConfig
 32 | 	bs  *bstore.BlockStore
 33 | 
 34 | 	//Transaction coalescence
 35 | 	globlock  sync.Mutex
 36 | 	treelocks map[[16]byte]*sync.Mutex
 37 | 	openTrees map[[16]byte]*openTree
 38 | }
 39 | 
 40 | func newOpenTree(id uuid.UUID) *openTree {
 41 | 	return &openTree{
 42 | 		id: id,
 43 | 	}
 44 | }
 45 | 
 46 | type QuasarConfig struct {
 47 | 	//Measured in the number of datablocks
 48 | 	//So 1000 is 8 MB cache
 49 | 	DatablockCacheSize uint64
 50 | 
 51 | 	//This enables the grouping of value inserts
 52 | 	//with a commit every Interval millis
 53 | 	//If the number of stored values exceeds
 54 | 	//EarlyTrip
 55 | 	TransactionCoalesceEnable    bool
 56 | 	TransactionCoalesceInterval  uint64
 57 | 	TransactionCoalesceEarlyTrip uint64
 58 | 
 59 | 	Params map[string]string
 60 | }
 61 | 
 62 | // Return true if there are uncommited results to be written to disk
 63 | // Should only be used during shutdown as it hogs the glock
 64 | func (q *Quasar) IsPending() bool {
 65 | 	isPend := false
 66 | 	q.globlock.Lock()
 67 | 	for uuid, ot := range q.openTrees {
 68 | 		q.treelocks[uuid].Lock()
 69 | 		if len(ot.store) != 0 {
 70 | 			isPend = true
 71 | 			q.treelocks[uuid].Unlock()
 72 | 			break
 73 | 		}
 74 | 		q.treelocks[uuid].Unlock()
 75 | 	}
 76 | 	q.globlock.Unlock()
 77 | 	return isPend
 78 | }
 79 | 
 80 | func NewQuasar(cfg *QuasarConfig) (*Quasar, error) {
 81 | 	bs, err := bstore.NewBlockStore(cfg.Params)
 82 | 	if err != nil {
 83 | 		return nil, err
 84 | 	}
 85 | 	rv := &Quasar{
 86 | 		cfg:       *cfg,
 87 | 		bs:        bs,
 88 | 		openTrees: make(map[[16]byte]*openTree, 128),
 89 | 		treelocks: make(map[[16]byte]*sync.Mutex, 128),
 90 | 	}
 91 | 	return rv, nil
 92 | }
 93 | 
 94 | func (q *Quasar) getTree(id uuid.UUID) (*openTree, *sync.Mutex) {
 95 | 	mk := bstore.UUIDToMapKey(id)
 96 | 	q.globlock.Lock()
 97 | 	ot, ok := q.openTrees[mk]
 98 | 	if !ok {
 99 | 		ot := newOpenTree(id)
100 | 		mtx := &sync.Mutex{}
101 | 		q.openTrees[mk] = ot
102 | 		q.treelocks[mk] = mtx
103 | 		q.globlock.Unlock()
104 | 		return ot, mtx
105 | 	}
106 | 	mtx, ok := q.treelocks[mk]
107 | 	if !ok {
108 | 		log.Panicf("This should not happen")
109 | 	}
110 | 	q.globlock.Unlock()
111 | 	return ot, mtx
112 | }
113 | 
114 | func (t *openTree) commit(q *Quasar) {
115 | 	if len(t.store) == 0 {
116 | 		//This might happen with a race in the timeout commit
117 | 		fmt.Println("no store in commit")
118 | 		return
119 | 	}
120 | 	tr, err := qtree.NewWriteQTree(q.bs, t.id)
121 | 	if err != nil {
122 | 		log.Panic(err)
123 | 	}
124 | 	if err := tr.InsertValues(t.store); err != nil {
125 | 		log.Error("BAD INSERT: ", err)
126 | 	}
127 | 	tr.Commit()
128 | 	t.store = nil
129 | }
130 | func (q *Quasar) InsertValues(id uuid.UUID, r []qtree.Record) {
131 | 	defer func() {
132 | 		if r := recover(); r != nil {
133 | 			log.Error("BAD INSERT: ", r)
134 | 		}
135 | 	}()
136 | 	tr, mtx := q.getTree(id)
137 | 	mtx.Lock()
138 | 	if tr == nil {
139 | 		log.Panicf("This should not happen")
140 | 	}
141 | 	if tr.store == nil {
142 | 		//Empty store
143 | 		tr.store = make([]qtree.Record, 0, len(r)*2)
144 | 		tr.sigEC = make(chan bool, 1)
145 | 		//Also spawn the coalesce timeout goroutine
146 | 		go func(abrt chan bool) {
147 | 			tmt := time.After(time.Duration(q.cfg.TransactionCoalesceInterval) * time.Millisecond)
148 | 			select {
149 | 			case <-tmt:
150 | 				//do coalesce
151 | 				mtx.Lock()
152 | 				//In case we early tripped between waiting for lock and getting it, commit will return ok
153 | 				//log.Debug("Coalesce timeout %v", id.String())
154 | 				tr.commit(q)
155 | 				mtx.Unlock()
156 | 			case <-abrt:
157 | 				return
158 | 			}
159 | 		}(tr.sigEC)
160 | 	}
161 | 	tr.store = append(tr.store, r...)
162 | 	if uint64(len(tr.store)) >= q.cfg.TransactionCoalesceEarlyTrip {
163 | 		tr.sigEC <- true
164 | 		log.Debug("Coalesce early trip %v", id.String())
165 | 		tr.commit(q)
166 | 	}
167 | 	mtx.Unlock()
168 | }
169 | func (q *Quasar) Flush(id uuid.UUID) error {
170 | 	tr, mtx := q.getTree(id)
171 | 	mtx.Lock()
172 | 	if len(tr.store) != 0 {
173 | 		tr.sigEC <- true
174 | 		tr.commit(q)
175 | 		fmt.Printf("Commit done %+v\n", id)
176 | 	} else {
177 | 		fmt.Printf("no store\n")
178 | 	}
179 | 	mtx.Unlock()
180 | 	return nil
181 | }
182 | 
183 | //These functions are the API. TODO add all the bounds checking on PW, and sanity on start/end
184 | func (q *Quasar) QueryValues(id uuid.UUID, start int64, end int64, gen uint64) ([]qtree.Record, uint64, error) {
185 | 	tr, err := qtree.NewReadQTree(q.bs, id, gen)
186 | 	if err != nil {
187 | 		return nil, 0, err
188 | 	}
189 | 	rv, err := tr.ReadStandardValuesBlock(start, end)
190 | 	return rv, tr.Generation(), err
191 | }
192 | 
193 | func (q *Quasar) QueryValuesStream(id uuid.UUID, start int64, end int64, gen uint64) (chan qtree.Record, chan error, uint64) {
194 | 	tr, err := qtree.NewReadQTree(q.bs, id, gen)
195 | 	if err != nil {
196 | 		return nil, nil, 0
197 | 	}
198 | 	recordc := make(chan qtree.Record)
199 | 	errc := make(chan error)
200 | 	go tr.ReadStandardValuesCI(recordc, errc, start, end)
201 | 	return recordc, errc, tr.Generation()
202 | }
203 | 
204 | func (q *Quasar) QueryStatisticalValues(id uuid.UUID, start int64, end int64,
205 | 	gen uint64, pointwidth uint8) ([]qtree.StatRecord, uint64, error) {
206 | 	//fmt.Printf("QSV0 s=%v e=%v pw=%v\n", start, end, pointwidth)
207 | 	start &^= ((1 << pointwidth) - 1)
208 | 	end &^= ((1 << pointwidth) - 1)
209 | 	end -= 1
210 | 	tr, err := qtree.NewReadQTree(q.bs, id, gen)
211 | 	if err != nil {
212 | 		return nil, 0, err
213 | 	}
214 | 	rv, err := tr.QueryStatisticalValuesBlock(start, end, pointwidth)
215 | 	if err != nil {
216 | 		return nil, 0, err
217 | 	}
218 | 	return rv, tr.Generation(), nil
219 | }
220 | func (q *Quasar) QueryStatisticalValuesStream(id uuid.UUID, start int64, end int64,
221 | 	gen uint64, pointwidth uint8) (chan qtree.StatRecord, chan error, uint64) {
222 | 	fmt.Printf("QSV1 s=%v e=%v pw=%v\n", start, end, pointwidth)
223 | 	start &^= ((1 << pointwidth) - 1)
224 | 	end &^= ((1 << pointwidth) - 1)
225 | 	end -= 1
226 | 	rvv := make(chan qtree.StatRecord, 1024)
227 | 	rve := make(chan error)
228 | 	tr, err := qtree.NewReadQTree(q.bs, id, gen)
229 | 	if err != nil {
230 | 		return nil, nil, 0
231 | 	}
232 | 	go tr.QueryStatisticalValues(rvv, rve, start, end, pointwidth)
233 | 	return rvv, rve, tr.Generation()
234 | }
235 | 
236 | func (q *Quasar) QueryWindow(id uuid.UUID, start int64, end int64,
237 | 	gen uint64, width uint64, depth uint8) (chan qtree.StatRecord, uint64) {
238 | 	rvv := make(chan qtree.StatRecord, 1024)
239 | 	tr, err := qtree.NewReadQTree(q.bs, id, gen)
240 | 	if err != nil {
241 | 		return nil, 0
242 | 	}
243 | 	go tr.QueryWindow(start, end, width, depth, rvv)
244 | 	return rvv, tr.Generation()
245 | }
246 | 
247 | func (q *Quasar) QueryGeneration(id uuid.UUID) (uint64, error) {
248 | 	sb := q.bs.LoadSuperblock(id, bstore.LatestGeneration)
249 | 	if sb == nil {
250 | 		return 0, qtree.ErrNoSuchStream
251 | 	}
252 | 	return sb.Gen(), nil
253 | }
254 | 
255 | func (q *Quasar) QueryNearestValue(id uuid.UUID, time int64, backwards bool, gen uint64) (qtree.Record, uint64, error) {
256 | 	tr, err := qtree.NewReadQTree(q.bs, id, gen)
257 | 	if err != nil {
258 | 		return qtree.Record{}, 0, err
259 | 	}
260 | 	rv, err := tr.FindNearestValue(time, backwards)
261 | 	return rv, tr.Generation(), err
262 | }
263 | 
264 | type ChangedRange struct {
265 | 	Start int64
266 | 	End   int64
267 | }
268 | 
269 | //Resolution is how far down the tree to go when working out which blocks have changed. Higher resolutions are faster
270 | //but will give you back coarser results.
271 | func (q *Quasar) QueryChangedRanges(id uuid.UUID, startgen uint64, endgen uint64, resolution uint8) ([]ChangedRange, uint64, error) {
272 | 	//0 is a reserved generation, so is 1, which means "before first"
273 | 	if startgen == 0 {
274 | 		startgen = 1
275 | 	}
276 | 	tr, err := qtree.NewReadQTree(q.bs, id, endgen)
277 | 	if err != nil {
278 | 		log.Debug("Error on QCR open tree")
279 | 		return nil, 0, err
280 | 	}
281 | 	rv := make([]ChangedRange, 0, 1024)
282 | 	rch := tr.FindChangedSince(startgen, resolution)
283 | 	var lr *ChangedRange = nil
284 | 	for {
285 | 
286 | 		select {
287 | 		case cr, ok := <-rch:
288 | 			if !ok {
289 | 				//This is the end.
290 | 				//Do we have an unsaved LR?
291 | 				if lr != nil {
292 | 					rv = append(rv, *lr)
293 | 				}
294 | 				return rv, tr.Generation(), nil
295 | 			}
296 | 			if !cr.Valid {
297 | 				log.Panicf("Didn't think this could happen")
298 | 			}
299 | 			//Coalesce
300 | 			if lr != nil && cr.Start == lr.End {
301 | 				lr.End = cr.End
302 | 			} else {
303 | 				if lr != nil {
304 | 					rv = append(rv, *lr)
305 | 				}
306 | 				lr = &ChangedRange{Start: cr.Start, End: cr.End}
307 | 			}
308 | 		}
309 | 	}
310 | 	return rv, tr.Generation(), nil
311 | }
312 | 
313 | func (q *Quasar) DeleteRange(id uuid.UUID, start int64, end int64) error {
314 | 	tr, mtx := q.getTree(id)
315 | 	mtx.Lock()
316 | 	if len(tr.store) != 0 {
317 | 		tr.sigEC <- true
318 | 		tr.commit(q)
319 | 	}
320 | 	wtr, err := qtree.NewWriteQTree(q.bs, id)
321 | 	if err != nil {
322 | 		log.Panic(err)
323 | 	}
324 | 	err = wtr.DeleteRange(start, end)
325 | 	if err != nil {
326 | 		log.Panic(err)
327 | 	}
328 | 	wtr.Commit()
329 | 	mtx.Unlock()
330 | 	return nil
331 | }
332 | 


--------------------------------------------------------------------------------
/internal/bstore/bstore_test.go:
--------------------------------------------------------------------------------
  1 | package bstore
  2 | 
  3 | import (
  4 | 	"github.com/pborman/uuid"
  5 | 	"math/rand"
  6 | 	"reflect"
  7 | 	"strings"
  8 | 	"testing"
  9 | 	"time"
 10 | )
 11 | 
 12 | func mUint64() uint64 {
 13 | 	return uint64(rand.Uint32())
 14 | 	//return (uint64(rand.Uint32()) << 32) + uint64(rand.Uint32())
 15 | }
 16 | func mInt64() int64 {
 17 | 	return int64(mUint64())
 18 | }
 19 | func mFloat64() float64 {
 20 | 	return rand.Float64()
 21 | }
 22 | 
 23 | /**
 24 |  * Randomly populate the fields of a struct
 25 |  */
 26 | func FillBlock(rv interface{}) {
 27 | 	rand.Seed(time.Now().UnixNano())
 28 | 	t := reflect.ValueOf(rv)
 29 | 	for i := 0; i < t.Elem().NumField(); i++ {
 30 | 		fld := t.Elem().Field(i)
 31 | 		switch fld.Type().Kind() {
 32 | 		case reflect.Array:
 33 | 			for k := 0; k < fld.Len(); k++ {
 34 | 				if fld.Type().Elem().Kind() == reflect.Float64 {
 35 | 					fld.Index(k).SetFloat(mFloat64())
 36 | 				} else if fld.Type().Elem().Kind() == reflect.Uint64 {
 37 | 					fld.Index(k).SetUint(mUint64())
 38 | 				} else if fld.Type().Elem().Kind() == reflect.Int64 {
 39 | 					fld.Index(k).SetInt(mInt64())
 40 | 				} else if fld.Type().Elem().Kind() == reflect.Uint8 {
 41 | 					fld.Index(k).SetUint(mUint64())
 42 | 				} else {
 43 | 					log.Panic("Unhandled element type: %v", fld.Type().Elem().Kind())
 44 | 				}
 45 | 			}
 46 | 		case reflect.Uint64:
 47 | 			fld.SetUint(mUint64())
 48 | 		case reflect.Uint8:
 49 | 			fld.SetUint(mUint64() & 0xFF)
 50 | 		case reflect.Uint16:
 51 | 			fld.SetUint(mUint64() & 0xFFFF)
 52 | 		case reflect.Int64:
 53 | 			fld.SetInt(mInt64())
 54 | 		case reflect.Int:
 55 | 			fld.SetInt(mInt64())
 56 | 		default:
 57 | 			log.Panicf("Unrecognized type: %+v", fld.Type().Kind())
 58 | 		}
 59 | 	}
 60 | }
 61 | 
 62 | func MakeAllocatedCoreblock() *Coreblock {
 63 | 	mBS()
 64 | 	db, err := _gen.AllocateCoreblock()
 65 | 	if err != nil {
 66 | 		log.Panic(err)
 67 | 	}
 68 | 	addr := db.Identifier
 69 | 	FillBlock(db)
 70 | 	db.Identifier = addr
 71 | 	return db
 72 | }
 73 | 
 74 | func MakeAllocatedVBlock() *Vectorblock {
 75 | 	mBS()
 76 | 	v, err := _gen.AllocateVectorblock()
 77 | 	if err != nil {
 78 | 		log.Panic(err)
 79 | 	}
 80 | 	addr := v.Identifier
 81 | 	FillBlock(v)
 82 | 	v.Len = VSIZE
 83 | 	v.Identifier = addr
 84 | 	return v
 85 | }
 86 | 
 87 | func MakeCoreblock() *Coreblock {
 88 | 	db := new(Coreblock)
 89 | 	FillBlock(db)
 90 | 	for i := 0; i < KFACTOR; i++ {
 91 | 		//These have special meaning, so don't test it here
 92 | 		if db.Addr[i] == 0 {
 93 | 			db.Addr[i] = 1
 94 | 		}
 95 | 	}
 96 | 	return db
 97 | }
 98 | 
 99 | func MakeVBlock() *Vectorblock {
100 | 	v := new(Vectorblock)
101 | 	FillBlock(v)
102 | 	v.Len = VSIZE
103 | 	return v
104 | }
105 | 
106 | /**
107 |  * This should work with any object that uses the struct tags to
108 |  * mean fields that don't need to match after SERDES
109 |  */
110 | func CompareNoTags(lhs interface{}, rhs interface{}, tags []string) bool {
111 | 	chk := make(map[string]bool)
112 | 	for _, s := range tags {
113 | 		chk[s] = true
114 | 	}
115 | 	vlhs := reflect.ValueOf(lhs)
116 | 	vrhs := reflect.ValueOf(rhs)
117 | 	if vlhs.Type() != vrhs.Type() {
118 | 		log.Fatalf("Types differ %v %v", vlhs.Type(), vrhs.Type())
119 | 		return false
120 | 	}
121 | 	for k := 0; k < vlhs.NumField(); k++ {
122 | 		tagstring := string(reflect.TypeOf(lhs).Field(k).Tag)
123 | 		tags := strings.Split(tagstring, ",")
124 | 		doskip := false
125 | 		for _, k := range tags {
126 | 			if chk[k] {
127 | 				doskip = true
128 | 			}
129 | 		}
130 | 		if doskip {
131 | 			continue
132 | 		}
133 | 		if !reflect.DeepEqual(vlhs.Field(k).Interface(), vrhs.Field(k).Interface()) {
134 | 			log.Fatalf("Field differs: %v, %v != %v", reflect.TypeOf(lhs).Field(k).Name,
135 | 				vlhs.Field(k).Interface(), vrhs.Field(k).Interface())
136 | 			return false
137 | 		}
138 | 	}
139 | 	return true
140 | }
141 | 
142 | var _bs *BlockStore = nil
143 | var _gen *Generation = nil
144 | 
145 | func mBS() {
146 | 	testuuid := uuid.NewRandom()
147 | 	params := map[string]string{
148 | 		"dbpath":      "/srv/quasartestdb/",
149 | 		"mongoserver": "localhost",
150 | 		"cachesize":   "0",
151 | 	}
152 | 	nbs, err := NewBlockStore(params)
153 | 	if err != nil {
154 | 		log.Panic(err)
155 | 	}
156 | 	if _bs == nil {
157 | 		_bs = nbs
158 | 		_gen = _bs.ObtainGeneration(testuuid)
159 | 	}
160 | }
161 | 
162 | func TestCoreBlockSERDES(t *testing.T) {
163 | 	db := MakeCoreblock()
164 | 	buf := make([]byte, CBSIZE)
165 | 	db.Serialize(buf)
166 | 	out := new(Coreblock)
167 | 	out.Deserialize(buf)
168 | 	if !CompareNoTags(*db, *out, []string{"implicit"}) {
169 | 		t.Error("Core block SERDES faled")
170 | 	}
171 | }
172 | 
173 | func TestCoreBlockSERDESAbsFullZero(t *testing.T) {
174 | 	db := MakeCoreblock()
175 | 	db.Addr[10] = 0
176 | 	db.Min[10] = 0
177 | 	db.Mean[10] = 0
178 | 	db.Max[10] = 0
179 | 	db.Count[10] = 0
180 | 
181 | 	db.Addr[11] = 0
182 | 	db.Min[11] = 0
183 | 	db.Mean[11] = 0
184 | 	db.Max[11] = 0
185 | 	db.Count[11] = 0
186 | 	db.CGeneration[11] = 0
187 | 
188 | 	db.Addr[54] = 0
189 | 	db.Min[54] = 0
190 | 	db.Mean[54] = 0
191 | 	db.Max[54] = 0
192 | 	db.Count[54] = 0
193 | 
194 | 	for i := 55; i < KFACTOR; i++ {
195 | 		db.Addr[i] = 0
196 | 		db.Min[i] = 0
197 | 		db.Mean[i] = 0
198 | 		db.Max[i] = 0
199 | 		db.Count[i] = 0
200 | 		db.CGeneration[i] = 0
201 | 	}
202 | 
203 | 	buf := make([]byte, CBSIZE)
204 | 	db.Serialize(buf)
205 | 	out := new(Coreblock)
206 | 	out.Deserialize(buf)
207 | 
208 | 	if !CompareNoTags(*db, *out, []string{"implicit"}) {
209 | 		t.Error("Core block SERDES faled")
210 | 	}
211 | }
212 | 
213 | func TestCoreBlockBadDES(t *testing.T) {
214 | 	db := MakeCoreblock()
215 | 	buf := make([]byte, CBSIZE)
216 | 	db.Serialize(buf)
217 | 	out := new(Coreblock)
218 | 	out.Deserialize(buf)
219 | 	if out.GetDatablockType() != Core {
220 | 		t.FailNow()
221 | 	}
222 | 	defer func() {
223 | 		if r := recover(); r == nil {
224 | 			//We expected a failure
225 | 			t.FailNow()
226 | 		}
227 | 	}()
228 | 	vb := new(Vectorblock)
229 | 	vb.Deserialize(buf)
230 | 	t.FailNow()
231 | }
232 | func TestVectorBlockBadDES(t *testing.T) {
233 | 	v := MakeVBlock()
234 | 	buf := make([]byte, VBSIZE)
235 | 	v.Serialize(buf)
236 | 	out := new(Vectorblock)
237 | 	out.Deserialize(buf)
238 | 	if out.GetDatablockType() != Vector {
239 | 		t.Fatal("Wrong id on block")
240 | 	}
241 | 	defer func() {
242 | 		if r := recover(); r == nil {
243 | 			//We expected a failure
244 | 			t.Fatal("Did not throw exception")
245 | 		}
246 | 	}()
247 | 	cb := new(Coreblock)
248 | 	cb.Deserialize(buf)
249 | 	t.FailNow()
250 | }
251 | func TestBufferType(t *testing.T) {
252 | 	v := MakeVBlock()
253 | 	buf := make([]byte, VBSIZE)
254 | 	v.Serialize(buf)
255 | 	if DatablockGetBufferType(buf) != Vector {
256 | 		t.Fatal("Expected Vector")
257 | 	}
258 | 	c := MakeCoreblock()
259 | 	buf2 := make([]byte, CBSIZE)
260 | 	c.Serialize(buf2)
261 | 	if DatablockGetBufferType(buf2) != Core {
262 | 		t.Fatal("Expected Core")
263 | 	}
264 | 	buf3 := make([]byte, 2)
265 | 	buf3[0] = byte(5)
266 | 	if DatablockGetBufferType(buf3) != Bad {
267 | 		t.Fatal("Expected Bad")
268 | 	}
269 | }
270 | func TestVBlockSERDES(t *testing.T) {
271 | 	v := MakeVBlock()
272 | 	buf := make([]byte, VBSIZE)
273 | 	v.Serialize(buf)
274 | 	out := new(Vectorblock)
275 | 	out.Deserialize(buf)
276 | 	if !CompareNoTags(*v, *out, []string{"implicit"}) {
277 | 		t.Error("Vector block SERDES failed")
278 | 	}
279 | }
280 | 
281 | func TestVBlockManSERDES(t *testing.T) {
282 | 	v := new(Vectorblock)
283 | 	for i := 0; i < 6; i++ {
284 | 		v.Time[i] = int64(i * 100000)
285 | 		v.Value[i] = float64(i * 100000.0)
286 | 	}
287 | 	v.Len = 6
288 | 	buf := make([]byte, VBSIZE)
289 | 	v.Serialize(buf)
290 | 	out := new(Vectorblock)
291 | 	out.Deserialize(buf)
292 | 	for i := 0; i < 6; i++ {
293 | 		if v.Value[i] != out.Value[i] {
294 | 			t.Error("Fail")
295 | 		}
296 | 	}
297 | }
298 | 
299 | func TestCBlockE2ESERDES(t *testing.T) {
300 | 	db := MakeAllocatedCoreblock()
301 | 	for i := 0; i < KFACTOR; i++ {
302 | 		vb, err := _gen.AllocateVectorblock()
303 | 		if err != nil {
304 | 			t.Errorf("Could not allocate VB %v", err)
305 | 		}
306 | 		reloc_addr := vb.Identifier
307 | 		FillBlock(vb)
308 | 		vb.Len = VSIZE
309 | 		vb.Identifier = reloc_addr
310 | 		db.Addr[i] = vb.Identifier
311 | 	}
312 | 	cpy := *db
313 | 	amap, err := _gen.Commit()
314 | 	if err != nil {
315 | 		t.Error(err)
316 | 	}
317 | 	_bs = nil
318 | 	_gen = nil
319 | 	log.Info("reloc address was 0x%016x", cpy.Identifier)
320 | 	log.Info("cnt0 was %v", cpy.Count[0])
321 | 	actual_addr, ok := amap[cpy.Identifier]
322 | 	if !ok {
323 | 		t.Errorf("relocation address 0x%016x did not exist in address map", cpy.Identifier)
324 | 	}
325 | 	mBS()
326 | 	out := _bs.ReadDatablock(actual_addr, cpy.Generation, cpy.PointWidth, cpy.StartTime)
327 | 	cpy.Identifier = actual_addr
328 | 	for i := 0; i < KFACTOR; i++ {
329 | 		cpy.Addr[i] = amap[cpy.Addr[i]]
330 | 	}
331 | 	if !CompareNoTags(cpy, *(out.(*Coreblock)), []string{}) {
332 | 		t.Error("E2E C SERDES failed")
333 | 	}
334 | }
335 | 
336 | func TestVBlockE2ESERDES(t *testing.T) {
337 | 	db := MakeAllocatedVBlock()
338 | 	cpy := *db
339 | 	amap, err := _gen.Commit()
340 | 	if err != nil {
341 | 		t.Error(err)
342 | 	}
343 | 	_bs = nil
344 | 	_gen = nil
345 | 	log.Info("reloc address was 0x%016x", cpy.Identifier)
346 | 	actual_addr, ok := amap[cpy.Identifier]
347 | 	if !ok {
348 | 		t.Errorf("relocation address 0x%016x did not exist in address map", cpy.Identifier)
349 | 	}
350 | 	mBS()
351 | 	out := _bs.ReadDatablock(actual_addr, cpy.Generation, cpy.PointWidth, cpy.StartTime)
352 | 	cpy.Identifier = actual_addr
353 | 	//cpy.Identifier = actual_addr
354 | 	if !CompareNoTags(cpy, *(out.(*Vectorblock)), []string{}) {
355 | 		t.Error("E2E V SERDES failed")
356 | 	}
357 | }
358 | 
359 | func TestVCopyInto(t *testing.T) {
360 | 	db := MakeVBlock()
361 | 	out := &Vectorblock{}
362 | 	db.CopyInto(out)
363 | 	if !CompareNoTags(*db, *out, []string{"metadata"}) {
364 | 		t.Error("V CopyInto failed")
365 | 	}
366 | }
367 | 
368 | func TestCCopyInto(t *testing.T) {
369 | 	db := MakeCoreblock()
370 | 	out := &Coreblock{}
371 | 	db.CopyInto(out)
372 | 	if !CompareNoTags(*db, *out, []string{"metadata"}) {
373 | 		t.Error("C CopyInto failed")
374 | 	}
375 | }
376 | 
377 | /*
378 | func BenchmarkSERDER(b *testing.B) {
379 | 	dblocks_in := make([]*Coreblock, b.N)
380 | 	for i := 0; i < b.N; i++ {
381 | 		dblocks_in[i] = MakeCoreblock()
382 | 	}
383 | 	dblocks_out := make([]*Coreblock, b.N)
384 | 	for i := 0; i < b.N; i++ {
385 | 		dblocks_out[i] = new(Coreblock)
386 | 	}
387 | 	buf := make([]byte, DBSIZE)
388 | 	b.ResetTimer()
389 | 	for i := 0; i < b.N; i++ {
390 | 		dblocks_in[0].Serialize(buf)
391 | 		dblocks_out[0].Deserialize(buf)
392 | 	}
393 | }
394 | */
395 | 


--------------------------------------------------------------------------------
/qtree/qtree_utils.go:
--------------------------------------------------------------------------------
  1 | package qtree
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 
  6 | 	"github.com/pborman/uuid"
  7 | 	"github.com/SoftwareDefinedBuildings/btrdb/internal/bstore"
  8 | )
  9 | 
 10 | const PWFACTOR = bstore.PWFACTOR
 11 | const KFACTOR = bstore.KFACTOR
 12 | const MICROSECOND = 1000
 13 | const MILLISECOND = 1000 * MICROSECOND
 14 | const SECOND = 1000 * MILLISECOND
 15 | const MINUTE = 60 * SECOND
 16 | const HOUR = 60 * MINUTE
 17 | const DAY = 24 * HOUR
 18 | const ROOTPW = 56 //This makes each bucket at the root ~= 2.2 years
 19 | //so the root spans 146.23 years
 20 | const ROOTSTART = -1152921504606846976 //This makes the 16th bucket start at 1970 (0)
 21 | const MinimumTime = -(16 << 56)
 22 | const MaximumTime = (48 << 56)
 23 | 
 24 | type QTree struct {
 25 | 	sb       *bstore.Superblock
 26 | 	bs       *bstore.BlockStore
 27 | 	gen      *bstore.Generation
 28 | 	root     *QTreeNode
 29 | 	commited bool
 30 | }
 31 | 
 32 | type Record struct {
 33 | 	Time int64
 34 | 	Val  float64
 35 | }
 36 | 
 37 | type QTreeNode struct {
 38 | 	tr           *QTree
 39 | 	vector_block *bstore.Vectorblock
 40 | 	core_block   *bstore.Coreblock
 41 | 	isLeaf       bool
 42 | 	child_cache  [bstore.KFACTOR]*QTreeNode
 43 | 	parent       *QTreeNode
 44 | 	isNew        bool
 45 | }
 46 | 
 47 | type RecordSlice []Record
 48 | 
 49 | type ChangedRange struct {
 50 | 	Valid bool
 51 | 	Start int64
 52 | 	End   int64
 53 | }
 54 | 
 55 | func (s RecordSlice) Len() int {
 56 | 	return len(s)
 57 | }
 58 | 
 59 | func (s RecordSlice) Swap(i, j int) {
 60 | 	s[i], s[j] = s[j], s[i]
 61 | }
 62 | 
 63 | func (s RecordSlice) Less(i, j int) bool {
 64 | 	return s[i].Time < s[j].Time
 65 | }
 66 | 
 67 | func (tr *QTree) Commit() {
 68 | 	if tr.commited {
 69 | 		log.Panicf("Tree alredy comitted")
 70 | 	}
 71 | 	if tr.gen == nil {
 72 | 		log.Panicf("Commit on non-write-tree")
 73 | 	}
 74 | 
 75 | 	tr.gen.Commit()
 76 | 	tr.commited = true
 77 | 	tr.gen = nil
 78 | 
 79 | }
 80 | 
 81 | func (n *QTree) FindNearestValue(time int64, backwards bool) (Record, error) {
 82 | 	if n.root == nil {
 83 | 		return Record{}, ErrNoSuchPoint
 84 | 	}
 85 | 	return n.root.FindNearestValue(time, backwards)
 86 | }
 87 | 
 88 | func (n *QTree) Generation() uint64 {
 89 | 	if n.gen != nil {
 90 | 		//Return the gen it will have after commit
 91 | 		return n.gen.Number()
 92 | 	} else {
 93 | 		//Return it's current gen
 94 | 		return n.sb.Gen()
 95 | 	}
 96 | 	return n.gen.Number()
 97 | }
 98 | 
 99 | func (tr *QTree) GetReferencedAddrsDebug() map[uint64]bool {
100 | 	refset := make(map[uint64]bool, 1024000)
101 | 
102 | 	rchan := tr.GetAllReferencedVAddrs()
103 | 	//for i, v := range e_tree.
104 | 	idx := 0
105 | 	for {
106 | 		val, ok := <-rchan
107 | 		if idx%8192 == 0 {
108 | 			log.Info("Got referenced addr #%d", idx)
109 | 		}
110 | 		idx += 1
111 | 		if !ok {
112 | 			break
113 | 		}
114 | 		refset[val] = true
115 | 	}
116 | 	return refset
117 | }
118 | 
119 | func (tr *QTree) LoadNode(addr uint64, impl_Generation uint64, impl_Pointwidth uint8, impl_StartTime int64) (*QTreeNode, error) {
120 | 	db := tr.bs.ReadDatablock(tr.sb.Uuid(), addr, impl_Generation, impl_Pointwidth, impl_StartTime)
121 | 	n := &QTreeNode{tr: tr}
122 | 	switch db.GetDatablockType() {
123 | 	case bstore.Vector:
124 | 		n.vector_block = db.(*bstore.Vectorblock)
125 | 		n.isLeaf = true
126 | 	case bstore.Core:
127 | 		n.core_block = db.(*bstore.Coreblock)
128 | 		n.isLeaf = false
129 | 	default:
130 | 		log.Panicf("What kind of type is this? %+v", db.GetDatablockType())
131 | 	}
132 | 	if n.ThisAddr() == 0 {
133 | 		log.Panicf("Node has zero address")
134 | 	}
135 | 	return n, nil
136 | }
137 | 
138 | func (tr *QTree) NewCoreNode(startTime int64, pointWidth uint8) (*QTreeNode, error) {
139 | 	if tr.gen == nil {
140 | 		return nil, ErrImmutableTree
141 | 	}
142 | 	cb, err := tr.gen.AllocateCoreblock()
143 | 	if err != nil {
144 | 		return nil, err
145 | 	}
146 | 	cb.PointWidth = pointWidth
147 | 	startTime = ClampTime(startTime, pointWidth)
148 | 	cb.StartTime = startTime
149 | 	rv := &QTreeNode{
150 | 		core_block: cb,
151 | 		tr:         tr,
152 | 		isNew:      true,
153 | 	}
154 | 	return rv, nil
155 | }
156 | 
157 | func (tr *QTree) NewVectorNode(startTime int64, pointWidth uint8) (*QTreeNode, error) {
158 | 	if tr.gen == nil {
159 | 		return nil, ErrImmutableTree
160 | 	}
161 | 	vb, err := tr.gen.AllocateVectorblock()
162 | 	if err != nil {
163 | 		return nil, err
164 | 	}
165 | 	vb.PointWidth = pointWidth
166 | 	startTime = ClampTime(startTime, pointWidth)
167 | 	vb.StartTime = startTime
168 | 	rv := &QTreeNode{
169 | 		vector_block: vb,
170 | 		tr:           tr,
171 | 		isLeaf:       true,
172 | 		isNew:        true,
173 | 	}
174 | 	return rv, nil
175 | }
176 | 
177 | /**
178 |  * Load a quasar tree
179 |  */
180 | func NewReadQTree(bs *bstore.BlockStore, id uuid.UUID, generation uint64) (*QTree, error) {
181 | 	sb := bs.LoadSuperblock(id, generation)
182 | 	if sb == nil {
183 | 		return nil, ErrNoSuchStream
184 | 	}
185 | 	rv := &QTree{sb: sb, bs: bs}
186 | 	if sb.Root() != 0 {
187 | 		rt, err := rv.LoadNode(sb.Root(), sb.Gen(), ROOTPW, ROOTSTART)
188 | 		if err != nil {
189 | 			log.Panicf("%v", err)
190 | 			return nil, err
191 | 		}
192 | 		//log.Debug("The start time for the root is %v",rt.StartTime())
193 | 		rv.root = rt
194 | 	}
195 | 	return rv, nil
196 | }
197 | 
198 | func NewWriteQTree(bs *bstore.BlockStore, id uuid.UUID) (*QTree, error) {
199 | 	gen := bs.ObtainGeneration(id)
200 | 	rv := &QTree{
201 | 		sb:  gen.New_SB,
202 | 		gen: gen,
203 | 		bs:  bs,
204 | 	}
205 | 
206 | 	//If there is an existing root node, we need to load it so that it
207 | 	//has the correct values
208 | 	if rv.sb.Root() != 0 {
209 | 		rt, err := rv.LoadNode(rv.sb.Root(), rv.sb.Gen(), ROOTPW, ROOTSTART)
210 | 		if err != nil {
211 | 			log.Panicf("%v", err)
212 | 			return nil, err
213 | 		}
214 | 		rv.root = rt
215 | 	} else {
216 | 		rt, err := rv.NewCoreNode(ROOTSTART, ROOTPW)
217 | 		if err != nil {
218 | 			log.Panicf("%v", err)
219 | 			return nil, err
220 | 		}
221 | 		rv.root = rt
222 | 	}
223 | 
224 | 	return rv, nil
225 | }
226 | 
227 | func (n *QTreeNode) Generation() uint64 {
228 | 	if n.isLeaf {
229 | 		return n.vector_block.Generation
230 | 	} else {
231 | 		return n.core_block.Generation
232 | 	}
233 | }
234 | 
235 | func (n *QTreeNode) TreePath() string {
236 | 	rv := ""
237 | 	if n.isLeaf {
238 | 		rv += "V"
239 | 	} else {
240 | 		rv += "C"
241 | 	}
242 | 	dn := n
243 | 	for {
244 | 		par := dn.Parent()
245 | 		if par == nil {
246 | 			return rv
247 | 		}
248 | 		//Try locate the index of this node in the parent
249 | 		addr := dn.ThisAddr()
250 | 		found := false
251 | 		for i := 0; i < bstore.KFACTOR; i++ {
252 | 			if par.core_block.Addr[i] == addr {
253 | 				rv = fmt.Sprintf("(%v)[%v].", par.PointWidth(), i) + rv
254 | 				found = true
255 | 				break
256 | 			}
257 | 		}
258 | 		if !found {
259 | 			log.Panicf("Could not find self address in parent")
260 | 		}
261 | 		dn = par
262 | 	}
263 | }
264 | 
265 | func (n *QTreeNode) ArbitraryStartTime(idx uint64, pw uint8) int64 {
266 | 	return n.StartTime() + int64(idx*(1<<pw))
267 | }
268 | 
269 | func (n *QTreeNode) ChildPW() uint8 {
270 | 	if n.PointWidth() <= PWFACTOR {
271 | 		return 0
272 | 	} else {
273 | 		return n.PointWidth() - PWFACTOR
274 | 	}
275 | }
276 | 
277 | func (n *QTreeNode) ChildStartTime(idx uint16) int64 {
278 | 	return n.ArbitraryStartTime(uint64(idx), n.PointWidth())
279 | }
280 | 
281 | func (n *QTreeNode) ChildEndTime(idx uint16) int64 {
282 | 	return n.ArbitraryStartTime(uint64(idx+1), n.PointWidth())
283 | }
284 | 
285 | func (n *QTreeNode) ClampBucket(t int64) uint16 {
286 | 	if n.isLeaf {
287 | 		log.Panicf("Not meant to use this on leaves")
288 | 	}
289 | 	if t < n.StartTime() {
290 | 		t = n.StartTime()
291 | 	}
292 | 	t -= n.StartTime()
293 | 
294 | 	rv := (t >> n.PointWidth())
295 | 	if rv >= bstore.KFACTOR {
296 | 		rv = bstore.KFACTOR - 1
297 | 	}
298 | 	return uint16(rv)
299 | }
300 | 
301 | //Unlike core nodes, vectors have infinitely many buckets. This
302 | //function allows you to get a bucket idx for a time and an
303 | //arbitrary point width
304 | func (n *QTreeNode) ClampVBucket(t int64, pw uint8) uint64 {
305 | 	if !n.isLeaf {
306 | 		log.Panicf("This is intended for vectors")
307 | 	}
308 | 	if t < n.StartTime() {
309 | 		t = n.StartTime()
310 | 	}
311 | 	t -= n.StartTime()
312 | 	if pw > n.Parent().PointWidth() {
313 | 		log.Panicf("I can't do this dave")
314 | 	}
315 | 	idx := uint64(t) >> pw
316 | 	maxidx := uint64(n.Parent().WidthTime()) >> pw
317 | 	if idx >= maxidx {
318 | 		idx = maxidx - 1
319 | 	}
320 | 	return idx
321 | }
322 | 
323 | func (n *QTreeNode) clone() (*QTreeNode, error) {
324 | 	var rv *QTreeNode
325 | 	var err error
326 | 	if !n.isLeaf {
327 | 		rv, err = n.tr.NewCoreNode(n.StartTime(), n.PointWidth())
328 | 		if err != nil {
329 | 			return nil, err
330 | 		}
331 | 		n.core_block.CopyInto(rv.core_block)
332 | 	} else {
333 | 		rv, err = n.tr.NewVectorNode(n.StartTime(), n.PointWidth())
334 | 		if err != nil {
335 | 			return nil, err
336 | 		}
337 | 		n.vector_block.CopyInto(rv.vector_block)
338 | 	}
339 | 	return rv, nil
340 | }
341 | 
342 | func (n *QTreeNode) EndTime() int64 {
343 | 	if n.isLeaf {
344 | 		//We do this because out point width might not be *KFACTOR as we might be
345 | 		//at the lowest level
346 | 		return n.StartTime() + (1 << n.Parent().PointWidth())
347 | 	} else {
348 | 		//A core node has multiple buckets
349 | 		return n.StartTime() + (1<<n.PointWidth())*bstore.KFACTOR
350 | 	}
351 | }
352 | 
353 | func (n *QTreeNode) FindParentIndex() (uint16, error) {
354 | 	//Try locate the index of this node in the parent
355 | 	addr := n.ThisAddr()
356 | 	for i := uint16(0); i < bstore.KFACTOR; i++ {
357 | 		if n.Parent().core_block.Addr[i] == addr {
358 | 			return i, nil
359 | 		}
360 | 	}
361 | 	return bstore.KFACTOR, ErrIdxNotFound
362 | }
363 | 
364 | func (n *QTreeNode) Parent() *QTreeNode {
365 | 	return n.parent
366 | }
367 | 
368 | func (n *QTreeNode) PointWidth() uint8 {
369 | 	if n.isLeaf {
370 | 		return n.vector_block.PointWidth
371 | 	} else {
372 | 		return n.core_block.PointWidth
373 | 	}
374 | }
375 | 
376 | func (n *QTreeNode) StartTime() int64 {
377 | 	if n.isLeaf {
378 | 		return n.vector_block.StartTime
379 | 	} else {
380 | 		return n.core_block.StartTime
381 | 	}
382 | }
383 | 
384 | func (n *QTreeNode) ThisAddr() uint64 {
385 | 	if n.isLeaf {
386 | 		return n.vector_block.Identifier
387 | 	} else {
388 | 		return n.core_block.Identifier
389 | 	}
390 | }
391 | 
392 | //So this might be the only explanation of how PW really relates to time:
393 | //If the node is core, the node's PW is the log of the amount of time that
394 | //each child covers. So a pw of 8 means that each child covers 1<<8 nanoseconds
395 | //If the node is a vector, the PW represents what the PW would be if it were
396 | //a core. It does NOT represent the PW of the vector itself.
397 | func (n *QTreeNode) WidthTime() int64 {
398 | 	return 1 << n.PointWidth()
399 | }
400 | 
401 | func ClampTime(t int64, pw uint8) int64 {
402 | 	if pw == 0 {
403 | 		return t
404 | 	}
405 | 	//Protip... &^ is bitwise and not in golang... not XOR
406 | 	return t &^ ((1 << pw) - 1)
407 | 
408 | }
409 | 


--------------------------------------------------------------------------------
/internal/bstore/blockstore.go:
--------------------------------------------------------------------------------
  1 | package bstore
  2 | 
  3 | import (
  4 | 	"errors"
  5 | 	"log"
  6 | 	"os"
  7 | 	"strconv"
  8 | 	"sync"
  9 | 
 10 | 	"github.com/pborman/uuid"
 11 | 	"github.com/SoftwareDefinedBuildings/btrdb/internal/bprovider"
 12 | 	"github.com/SoftwareDefinedBuildings/btrdb/internal/cephprovider"
 13 | 	"github.com/SoftwareDefinedBuildings/btrdb/internal/fileprovider"
 14 | 	"gopkg.in/mgo.v2"
 15 | 	"gopkg.in/mgo.v2/bson"
 16 | )
 17 | 
 18 | const LatestGeneration = uint64(^(uint64(0)))
 19 | 
 20 | func UUIDToMapKey(id uuid.UUID) [16]byte {
 21 | 	rv := [16]byte{}
 22 | 	copy(rv[:], id)
 23 | 	return rv
 24 | }
 25 | 
 26 | type BlockStore struct {
 27 | 	ses     *mgo.Session
 28 | 	db      *mgo.Database
 29 | 	_wlocks map[[16]byte]*sync.Mutex
 30 | 	glock   sync.RWMutex
 31 | 
 32 | 	//	basepath string
 33 | 	//	metaLock sync.Mutex
 34 | 	params map[string]string
 35 | 
 36 | 	cachemap map[uint64]*CacheItem
 37 | 	cacheold *CacheItem
 38 | 	cachenew *CacheItem
 39 | 	cachemtx sync.Mutex
 40 | 	cachelen uint64
 41 | 	cachemax uint64
 42 | 
 43 | 	cachemiss uint64
 44 | 	cachehit  uint64
 45 | 
 46 | 	store bprovider.StorageProvider
 47 | 	alloc chan uint64
 48 | }
 49 | 
 50 | var block_buf_pool = sync.Pool{
 51 | 	New: func() interface{} {
 52 | 		return make([]byte, DBSIZE+5)
 53 | 	},
 54 | }
 55 | 
 56 | var ErrDatablockNotFound = errors.New("Coreblock not found")
 57 | var ErrGenerationNotFound = errors.New("Generation not found")
 58 | 
 59 | /* A generation stores all the information acquired during a write pass.
 60 |  * A superblock contains all the information required to navigate a tree.
 61 |  */
 62 | type Generation struct {
 63 | 	Cur_SB     *Superblock
 64 | 	New_SB     *Superblock
 65 | 	cblocks    []*Coreblock
 66 | 	vblocks    []*Vectorblock
 67 | 	blockstore *BlockStore
 68 | 	flushed    bool
 69 | }
 70 | 
 71 | func (g *Generation) UpdateRootAddr(addr uint64) {
 72 | 	//log.Printf("updateaddr called (%v)",addr)
 73 | 	g.New_SB.root = addr
 74 | }
 75 | func (g *Generation) Uuid() *uuid.UUID {
 76 | 	return &g.Cur_SB.uuid
 77 | }
 78 | 
 79 | func (g *Generation) Number() uint64 {
 80 | 	return g.New_SB.gen
 81 | }
 82 | 
 83 | func (bs *BlockStore) UnlinkGenerations(id uuid.UUID, sgen uint64, egen uint64) error {
 84 | 	iter := bs.db.C("superblocks").Find(bson.M{"uuid": id.String(), "gen": bson.M{"$gte": sgen, "$lt": egen}, "unlinked": false}).Iter()
 85 | 	rs := fake_sblock{}
 86 | 	for iter.Next(&rs) {
 87 | 		rs.Unlinked = true
 88 | 		_, err := bs.db.C("superblocks").Upsert(bson.M{"uuid": id.String(), "gen": rs.Gen}, rs)
 89 | 		if err != nil {
 90 | 			lg.Panic(err)
 91 | 		}
 92 | 	}
 93 | 	return nil
 94 | }
 95 | func NewBlockStore(params map[string]string) (*BlockStore, error) {
 96 | 	bs := BlockStore{}
 97 | 	bs.params = params
 98 | 	ses, err := mgo.Dial(params["mongoserver"])
 99 | 	if err != nil {
100 | 		return nil, err
101 | 	}
102 | 	bs.ses = ses
103 | 	bs.db = ses.DB(params["collection"])
104 | 	bs._wlocks = make(map[[16]byte]*sync.Mutex)
105 | 
106 | 	//	bs.basepath = dbpath
107 | 	/*	if err := os.MkdirAll(bs.basepath, 0755); err != nil {
108 | 		log.Panic(err)
109 | 	}*/
110 | 
111 | 	bs.alloc = make(chan uint64, 256)
112 | 	go func() {
113 | 		relocation_addr := uint64(RELOCATION_BASE)
114 | 		for {
115 | 			bs.alloc <- relocation_addr
116 | 			relocation_addr += 1
117 | 			if relocation_addr < RELOCATION_BASE {
118 | 				relocation_addr = RELOCATION_BASE
119 | 			}
120 | 		}
121 | 	}()
122 | 
123 | 	switch params["provider"] {
124 | 	case "file":
125 | 		bs.store = new(fileprovider.FileStorageProvider)
126 | 	case "ceph":
127 | 		bs.store = new(cephprovider.CephStorageProvider)
128 | 	default:
129 | 		log.Panic("Invalid provider")
130 | 
131 | 	}
132 | 
133 | 	bs.store.Initialize(params)
134 | 	cachesz, err := strconv.ParseInt(params["cachesize"], 0, 64)
135 | 	if err != nil {
136 | 		lg.Panic("Bad cache size: %v", err)
137 | 	}
138 | 	bs.initCache(uint64(cachesz))
139 | 
140 | 	return &bs, nil
141 | }
142 | 
143 | /*
144 |  * This obtains a generation, blocking if necessary
145 |  */
146 | func (bs *BlockStore) ObtainGeneration(id uuid.UUID) *Generation {
147 | 	//The first thing we do is obtain a write lock on the UUID, as a generation
148 | 	//represents a lock
149 | 	mk := UUIDToMapKey(id)
150 | 	bs.glock.RLock()
151 | 	mtx, ok := bs._wlocks[mk]
152 | 	bs.glock.RUnlock()
153 | 	if !ok {
154 | 		//Mutex doesn't exist so is unlocked
155 | 		mtx := new(sync.Mutex)
156 | 		mtx.Lock()
157 | 		bs.glock.Lock()
158 | 		bs._wlocks[mk] = mtx
159 | 		bs.glock.Unlock()
160 | 	} else {
161 | 		mtx.Lock()
162 | 	}
163 | 
164 | 	gen := &Generation{
165 | 		cblocks: make([]*Coreblock, 0, 8192),
166 | 		vblocks: make([]*Vectorblock, 0, 8192),
167 | 	}
168 | 	//We need a generation. Lets see if one is on disk
169 | 	qry := bs.db.C("superblocks").Find(bson.M{"uuid": id.String()})
170 | 	rs := fake_sblock{}
171 | 	qerr := qry.Sort("-gen").One(&rs)
172 | 	if qerr == mgo.ErrNotFound {
173 | 		lg.Info("no superblock found for %v", id.String())
174 | 		//Ok just create a new superblock/generation
175 | 		gen.Cur_SB = NewSuperblock(id)
176 | 	} else if qerr != nil {
177 | 		//Well thats more serious
178 | 		lg.Panic("Mongodb error: %v", qerr)
179 | 	} else {
180 | 		//Ok we have a superblock, pop the gen
181 | 		//log.Info("Found a superblock for %v", id.String())
182 | 		sb := Superblock{
183 | 			uuid: id,
184 | 			root: rs.Root,
185 | 			gen:  rs.Gen,
186 | 		}
187 | 		gen.Cur_SB = &sb
188 | 	}
189 | 
190 | 	gen.New_SB = gen.Cur_SB.Clone()
191 | 	gen.New_SB.gen = gen.Cur_SB.gen + 1
192 | 	gen.blockstore = bs
193 | 	return gen
194 | }
195 | 
196 | //The returned address map is primarily for unit testing
197 | func (gen *Generation) Commit() (map[uint64]uint64, error) {
198 | 	if gen.flushed {
199 | 		return nil, errors.New("Already Flushed")
200 | 	}
201 | 
202 | 	//then := time.Now()
203 | 	address_map := LinkAndStore([]byte(*gen.Uuid()), gen.blockstore, gen.blockstore.store, gen.vblocks, gen.cblocks)
204 | 	rootaddr, ok := address_map[gen.New_SB.root]
205 | 	if !ok {
206 | 		lg.Panic("Could not obtain root address")
207 | 	}
208 | 	gen.New_SB.root = rootaddr
209 | 	//dt := time.Now().Sub(then)
210 | 
211 | 	//log.Info("(LAS %4dus %dc%dv) ins blk u=%v gen=%v root=0x%016x",
212 | 	//	uint64(dt/time.Microsecond), len(gen.cblocks), len(gen.vblocks), gen.Uuid().String(), gen.Number(), rootaddr)
213 | 	/*if len(gen.vblocks) > 100 {
214 | 		total := 0
215 | 		for _, v:= range gen.vblocks {
216 | 			total += int(v.Len)
217 | 		}
218 | 		log.Critical("Triggered vblock examination: %v blocks, %v points, %v avg", len(gen.vblocks), total, total/len(gen.vblocks))
219 | 	}*/
220 | 	gen.vblocks = nil
221 | 	gen.cblocks = nil
222 | 
223 | 	fsb := fake_sblock{
224 | 		Uuid: gen.New_SB.uuid.String(),
225 | 		Gen:  gen.New_SB.gen,
226 | 		Root: gen.New_SB.root,
227 | 	}
228 | 	if err := gen.blockstore.db.C("superblocks").Insert(fsb); err != nil {
229 | 		lg.Panic(err)
230 | 	}
231 | 	gen.flushed = true
232 | 	gen.blockstore.glock.RLock()
233 | 	//log.Printf("bs is %v, wlocks is %v", gen.blockstore, gen.blockstore._wlocks)
234 | 	gen.blockstore._wlocks[UUIDToMapKey(*gen.Uuid())].Unlock()
235 | 	gen.blockstore.glock.RUnlock()
236 | 	return address_map, nil
237 | }
238 | 
239 | func (bs *BlockStore) datablockBarrier(fi int) {
240 | 	//Gonuts group says that I don't need to call Sync()
241 | 
242 | 	//Block until all datablocks have finished writing
243 | 	/*bs.blockmtx[fi].Lock()
244 | 	err := bs.dbf[fi].Sync()
245 | 	if err != nil {
246 | 		log.Panic(err)
247 | 	}
248 | 	bs.blockmtx[fi].Unlock()*/
249 | 	//bs.ses.Fsync(false)
250 | }
251 | 
252 | func (bs *BlockStore) allocateBlock() uint64 {
253 | 	relocation_address := <-bs.alloc
254 | 	return relocation_address
255 | }
256 | 
257 | /**
258 |  * The real function is supposed to allocate an address for the data
259 |  * block, reserving it on disk, and then give back the data block that
260 |  * can be filled in
261 |  * This stub makes up an address, and mongo pretends its real
262 |  */
263 | func (gen *Generation) AllocateCoreblock() (*Coreblock, error) {
264 | 	cblock := &Coreblock{}
265 | 	cblock.Identifier = gen.blockstore.allocateBlock()
266 | 	cblock.Generation = gen.Number()
267 | 	gen.cblocks = append(gen.cblocks, cblock)
268 | 	return cblock, nil
269 | }
270 | 
271 | func (gen *Generation) AllocateVectorblock() (*Vectorblock, error) {
272 | 	vblock := &Vectorblock{}
273 | 	vblock.Identifier = gen.blockstore.allocateBlock()
274 | 	vblock.Generation = gen.Number()
275 | 	gen.vblocks = append(gen.vblocks, vblock)
276 | 	return vblock, nil
277 | }
278 | 
279 | func (bs *BlockStore) FreeCoreblock(cb **Coreblock) {
280 | 	*cb = nil
281 | }
282 | 
283 | func (bs *BlockStore) FreeVectorblock(vb **Vectorblock) {
284 | 	*vb = nil
285 | }
286 | 
287 | func (bs *BlockStore) DEBUG_DELETE_UUID(id uuid.UUID) {
288 | 	lg.Info("DEBUG removing uuid '%v' from database", id.String())
289 | 	_, err := bs.db.C("superblocks").RemoveAll(bson.M{"uuid": id.String()})
290 | 	if err != nil && err != mgo.ErrNotFound {
291 | 		lg.Panic(err)
292 | 	}
293 | 	if err == mgo.ErrNotFound {
294 | 		lg.Info("Quey did not find supeblock to delete")
295 | 	} else {
296 | 		lg.Info("err was nik")
297 | 	}
298 | 	//bs.datablockBarrier()
299 | }
300 | 
301 | func (bs *BlockStore) ReadDatablock(uuid uuid.UUID, addr uint64, impl_Generation uint64, impl_Pointwidth uint8, impl_StartTime int64) Datablock {
302 | 	//Try hit the cache first
303 | 	db := bs.cacheGet(addr)
304 | 	if db != nil {
305 | 		return db
306 | 	}
307 | 	syncbuf := block_buf_pool.Get().([]byte)
308 | 	trimbuf := bs.store.Read([]byte(uuid), addr, syncbuf)
309 | 	switch DatablockGetBufferType(trimbuf) {
310 | 	case Core:
311 | 		rv := &Coreblock{}
312 | 		rv.Deserialize(trimbuf)
313 | 		block_buf_pool.Put(syncbuf)
314 | 		rv.Identifier = addr
315 | 		rv.Generation = impl_Generation
316 | 		rv.PointWidth = impl_Pointwidth
317 | 		rv.StartTime = impl_StartTime
318 | 		bs.cachePut(addr, rv)
319 | 		return rv
320 | 	case Vector:
321 | 		rv := &Vectorblock{}
322 | 		rv.Deserialize(trimbuf)
323 | 		block_buf_pool.Put(syncbuf)
324 | 		rv.Identifier = addr
325 | 		rv.Generation = impl_Generation
326 | 		rv.PointWidth = impl_Pointwidth
327 | 		rv.StartTime = impl_StartTime
328 | 		bs.cachePut(addr, rv)
329 | 		return rv
330 | 	}
331 | 	lg.Panic("Strange datablock type")
332 | 	return nil
333 | }
334 | 
335 | type fake_sblock struct {
336 | 	Uuid     string
337 | 	Gen      uint64
338 | 	Root     uint64
339 | 	Unlinked bool
340 | }
341 | 
342 | func (bs *BlockStore) LoadSuperblock(id uuid.UUID, generation uint64) *Superblock {
343 | 	var sb = fake_sblock{}
344 | 	if generation == LatestGeneration {
345 | 		//log.Info("loading superblock uuid=%v (lgen)", id.String())
346 | 		qry := bs.db.C("superblocks").Find(bson.M{"uuid": id.String()})
347 | 		if err := qry.Sort("-gen").One(&sb); err != nil {
348 | 			if err == mgo.ErrNotFound {
349 | 				lg.Info("sb notfound!")
350 | 				return nil
351 | 			} else {
352 | 				lg.Panic(err)
353 | 			}
354 | 		}
355 | 	} else {
356 | 		qry := bs.db.C("superblocks").Find(bson.M{"uuid": id.String(), "gen": generation})
357 | 		if err := qry.One(&sb); err != nil {
358 | 			if err == mgo.ErrNotFound {
359 | 				return nil
360 | 			} else {
361 | 				lg.Panic(err)
362 | 			}
363 | 		}
364 | 	}
365 | 	rv := Superblock{
366 | 		uuid:     id,
367 | 		gen:      sb.Gen,
368 | 		root:     sb.Root,
369 | 		unlinked: sb.Unlinked,
370 | 	}
371 | 	return &rv
372 | }
373 | 
374 | func CreateDatabase(params map[string]string) {
375 | 	ses, err := mgo.Dial(params["mongoserver"])
376 | 	if err != nil {
377 | 		lg.Critical("Could not connect to mongo database", err)
378 | 		os.Exit(1)
379 | 	}
380 | 	db := ses.DB(params["collection"])
381 | 	idx := mgo.Index{
382 | 		Key:        []string{"uuid", "-gen"},
383 | 		Unique:     true,
384 | 		DropDups:   true,
385 | 		Background: true,
386 | 		Sparse:     false,
387 | 	}
388 | 	db.C("superblocks").EnsureIndex(idx)
389 | 	switch params["provider"] {
390 | 	case "file":
391 | 		if err := os.MkdirAll(params["dbpath"], 0755); err != nil {
392 | 			lg.Panic(err)
393 | 		}
394 | 		fp := new(fileprovider.FileStorageProvider)
395 | 		err := fp.CreateDatabase(params)
396 | 		if err != nil {
397 | 			lg.Critical("Error on create: %v", err)
398 | 			os.Exit(1)
399 | 		}
400 | 	case "ceph":
401 | 		cp := new(cephprovider.CephStorageProvider)
402 | 		err := cp.CreateDatabase(params)
403 | 		if err != nil {
404 | 			lg.Critical("Error on create: %v", err)
405 | 			os.Exit(1)
406 | 		}
407 | 	}
408 | }
409 | 


--------------------------------------------------------------------------------
/quasar_test.go:
--------------------------------------------------------------------------------
  1 | package btrdb
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	_ "log"
  6 | 	"math/rand"
  7 | 	"testing"
  8 | 	"time"
  9 | 
 10 | 	"github.com/pborman/uuid"
 11 | 	"github.com/SoftwareDefinedBuildings/btrdb/qtree"
 12 | )
 13 | 
 14 | const MICROSECOND = 1000
 15 | const MILLISECOND = 1000 * MICROSECOND
 16 | const SECOND = 1000 * MILLISECOND
 17 | const MINUTE = 60 * SECOND
 18 | const HOUR = 60 * MINUTE
 19 | const DAY = 24 * HOUR
 20 | 
 21 | /*
 22 | func TestMultInsert(t *testing.T) {
 23 | 	testuuid := uuid.NewRandom()
 24 | 	cfg := &DefaultQuasarConfig
 25 | 	cfg.BlockPath = "/srv/quasartestdb"
 26 | 	q, err := NewQuasar(cfg)
 27 | 	if err != nil {
 28 | 		log.Panic(err)
 29 | 	}
 30 | 	vals := []qtree.Record{{10, 10}, {20, 20}}
 31 | 	q.InsertValues(testuuid, vals)
 32 | 	q.InsertValues(testuuid, vals)
 33 | }
 34 | */
 35 | func init() {
 36 | 	sd := time.Now().Unix()
 37 | 	fmt.Printf(">>>> USING %v AS SEED <<<<<", sd)
 38 | 	rand.Seed(sd)
 39 | }
 40 | 
 41 | /*
 42 | var _bs *bstore.BlockStore = nil
 43 | 
 44 | func mBS() {
 45 | 	if _bs == nil {
 46 | 		nbs, err := bstore.NewBlockStore("localhost", 0, "/srv/quasartestdb/")
 47 | 		if err != nil {
 48 | 			log.Panic(err)
 49 | 		}
 50 | 		_bs = nbs
 51 | 	}
 52 | }
 53 | func GenBrk(avg uint64, spread uint64) chan uint64 {
 54 | 	rv := make(chan uint64)
 55 | 	go func() {
 56 | 		for {
 57 | 			num := int64(avg)
 58 | 			num -= int64(spread / 2)
 59 | 			num += rand.Int63n(int64(spread))
 60 | 			rv <- uint64(num)
 61 | 		}
 62 | 	}()
 63 | 	return rv
 64 | }
 65 | func GenData(s int64, e int64, avgTimeBetweenSamples uint64,
 66 | 	spread uint64, dat func(int64) float64) []qtree.Record {
 67 | 	if avgTimeBetweenSamples == 0 {
 68 | 		panic("lolwut")
 69 | 	}
 70 | 	if e <= s {
 71 | 		panic("s<=e")
 72 | 	}
 73 | 	log.Printf("e %v s %v avt %v", s, e, avgTimeBetweenSamples)
 74 | 	p3 := uint64((e-s))/avgTimeBetweenSamples + 100
 75 | 	log.Printf("p3: ", p3)
 76 | 	rv := make([]qtree.Record, 0, p3)
 77 | 	r := qtree.Record{}
 78 | 	for t := s; t < e; {
 79 | 		r.Time = t
 80 | 		r.Val = dat(t)
 81 | 		rv = append(rv, r)
 82 | 		nt := t + int64(avgTimeBetweenSamples)
 83 | 		if spread != 0 {
 84 | 			nt -= int64(spread / 2)
 85 | 			nt += rand.Int63n(int64(spread))
 86 | 		}
 87 | 		if nt > t {
 88 | 			t = nt
 89 | 		}
 90 | 	}
 91 | 	return rv
 92 | }
 93 | func MakeWTree() (*qtree.QTree, uuid.UUID) {
 94 | 	id := uuid.NewRandom()
 95 | 	mBS()
 96 | 	tr, err := qtree.NewWriteQTree(_bs, id)
 97 | 	if err != nil {
 98 | 		log.Panic(err)
 99 | 	}
100 | 	return tr, id
101 | }
102 | */
103 | func CompareData(lhs []qtree.Record, rhs []qtree.Record) {
104 | 	if len(lhs) != len(rhs) {
105 | 		log.Panicf("lhs != rhs len %d vs %d\n", len(lhs), len(rhs))
106 | 	}
107 | 	for i, v := range lhs {
108 | 		if rhs[i] != v {
109 | 			log.Panic("data differs")
110 | 		}
111 | 	}
112 | }
113 | 
114 | /*
115 | func LoadWTree(id uuid.UUID) *qtree.QTree {
116 | 	mBS()
117 | 	tr, err := qtree.NewWriteQTree(_bs, id)
118 | 	if err != nil {
119 | 		log.Panic(err)
120 | 	}
121 | 	return tr
122 | }
123 | 
124 | //This flushes, for now
125 | func TestInsertFlush(t *testing.T) {
126 | 	gs := int64(23) * 365 * DAY
127 | 	ge := int64(25) * 365 * DAY
128 | 	freq := uint64(100 * MINUTE)
129 | 	varn := uint64(10 * MINUTE)
130 | 	tdat := GenData(gs, ge, freq, varn,
131 | 		func(_ int64) float64 { return rand.Float64() })
132 | 	log.Printf("generated %v records", len(tdat))
133 | 
134 | 	cfg := &DefaultQuasarConfig
135 | 	cfg.BlockPath = "/srv/quasartestdb"
136 | 	q, err := NewQuasar(cfg)
137 | 	if err != nil {
138 | 		log.Panic(err)
139 | 	}
140 | 
141 | 	id := uuid.NewRandom()
142 | 	log.Printf("Generating uuid=%s", id)
143 | 	brk := GenBrk(100, 50)
144 | 	idx := 0
145 | 	for idx < len(tdat) {
146 | 		time.Sleep(100 * time.Millisecond)
147 | 		ln := int(<-brk)
148 | 		end := idx + ln
149 | 		if end > len(tdat) {
150 | 			end = len(tdat)
151 | 		}
152 | 		q.InsertValues(id, tdat[idx:end])
153 | 		q.Flush(id)
154 | 		idx += ln
155 | 	}
156 | 
157 | 	q.Flush(id)
158 | 
159 | 	dat, gen, err := q.QueryValues(id, gs, ge, LatestGeneration)
160 | 	if err != nil {
161 | 		log.Panic(err)
162 | 	}
163 | 	log.Printf("Test gen was: %v", gen)
164 | 	CompareData(dat, tdat)
165 | 
166 | }
167 | */
168 | func TestArbWindow(t *testing.T) {
169 | 	Params := map[string]string{
170 | 		"mongoserver": "localhost",
171 | 		"provider":    "file",
172 | 		"cachesize":   "16000",
173 | 		"collection":  "testdb",
174 | 		"dbpath":      "/srv/testqdb/",
175 | 	}
176 | 	cfg := QuasarConfig{
177 | 		DatablockCacheSize:           uint64(0),
178 | 		TransactionCoalesceEnable:    true,
179 | 		TransactionCoalesceInterval:  uint64(5000),
180 | 		TransactionCoalesceEarlyTrip: uint64(16000),
181 | 		Params: Params,
182 | 	}
183 | 	q, err := NewQuasar(&cfg)
184 | 	if err != nil {
185 | 		log.Panicf("error: ", err)
186 | 	}
187 | 	startt := 0
188 | 	deltat := 1000000000
189 | 	tnum := 50000
190 | 	tdat := make([]qtree.Record, tnum)
191 | 	id := uuid.NewRandom()
192 | 	for i := 0; i < tnum; i++ {
193 | 		tdat[i].Time = int64(startt) + int64(deltat*i)
194 | 		tdat[i].Val = float64(i)
195 | 	}
196 | 	q.InsertValues(id, tdat)
197 | 	for i := 0; i < tnum; i++ {
198 | 		tdat[i].Time = int64(startt) + int64(deltat*i) + int64(tnum*2*deltat)
199 | 		tdat[i].Val = float64(i)
200 | 	}
201 | 	q.InsertValues(id, tdat)
202 | 	q.Flush(id)
203 | 	time.Sleep(2 * time.Second)
204 | 	log.Info("Stream: %+v\n", id)
205 | 	var rstart int64 = int64(startt) - int64(4000*deltat)
206 | 	var rend int64 = int64(startt + deltat*250000 + 5000000000)
207 | 	rvalc, _ := q.QueryWindow(id, rstart, rend, LatestGeneration, uint64(deltat)*700, 0)
208 | 	for {
209 | 		v, ok := <-rvalc
210 | 		log.Info("reading: %+v", v)
211 | 		if !ok {
212 | 			panic("eof")
213 | 		}
214 | 		/*exp := float64(v.Time+v.Time+int64(deltat)) / float64(deltat) / 2.0
215 | 		if math.Abs(v.Mean-exp) > 0.00001 {
216 | 			log.Panicf("got bad %+v\n expected mean: ", v, exp)
217 | 		}*/
218 | 	}
219 | }
220 | 
221 | /*
222 | func TestUnlinkBlocks(t *testing.T) {
223 | 
224 | 	gs := int64(24) * 365 * DAY
225 | 	ge := int64(25) * 365 * DAY
226 | 	freq := uint64(300 * MINUTE)
227 | 	varn := uint64(10 * MINUTE)
228 | 	tdat := GenData(gs, ge, freq, varn,
229 | 		func(_ int64) float64 { return rand.Float64() })
230 | 	log.Printf("generated %v records", len(tdat))
231 | 
232 | 	cfg := &DefaultQuasarConfig
233 | 	cfg.BlockPath = "/srv/quasartestdb"
234 | 	q, err := NewQuasar(cfg)
235 | 	if err != nil {
236 | 		log.Panic(err)
237 | 	}
238 | 
239 | 	{
240 | 		alloced, free, strange, leaked := q.bs.InspectBlocks()
241 | 		log.Printf("BEFORE SUMMARY:")
242 | 		log.Printf("ALLOCED: %d", alloced)
243 | 		log.Printf("FREE   : %d", free)
244 | 		log.Printf("STRANGE: %d", strange)
245 | 		log.Printf("LEAKED : %d", leaked)
246 | 		log.Printf("USAGE  : %.2f %%\n", float64(alloced)/float64(alloced+free)*100)
247 | 	}
248 | 	id := uuid.NewRandom()
249 | 	log.Printf("Generating uuid=%s", id)
250 | 	brk := GenBrk(100, 50)
251 | 	idx := 0
252 | 	for idx < len(tdat) {
253 | 		time.Sleep(1 * time.Second)
254 | 		ln := int(<-brk)
255 | 		end := idx + ln
256 | 		if end > len(tdat) {
257 | 			end = len(tdat)
258 | 		}
259 | 		q.InsertValues(id, tdat[idx:end])
260 | 		idx += ln
261 | 	}
262 | 	//Allow for coalescence
263 | 	time.Sleep(10 * time.Second)
264 | 
265 | 	{
266 | 		alloced, free, strange, leaked := q.bs.InspectBlocks()
267 | 		log.Printf("AFTER SUMMARY:")
268 | 		log.Printf("ALLOCED: %d", alloced)
269 | 		log.Printf("FREE   : %d", free)
270 | 		log.Printf("STRANGE: %d", strange)
271 | 		log.Printf("LEAKED : %d", leaked)
272 | 		log.Printf("USAGE  : %.2f %%\n", float64(alloced)/float64(alloced+free)*100)
273 | 	}
274 | 	{
275 | 		dat, gen, err := q.QueryValues(id, gs, ge, LatestGeneration)
276 | 		if err != nil {
277 | 			log.Panic(err)
278 | 		}
279 | 		log.Printf("Test gen was: %v", gen)
280 | 		CompareData(dat, tdat)
281 | 		err = q.UnlinkBlocks([]uuid.UUID{id}, []uint64{0}, []uint64{gen - 1})
282 | 		if err != nil {
283 | 			log.Panic(err)
284 | 		}
285 | 	}
286 | 
287 | 	{
288 | 		dat, gen, err := q.QueryValues(id, gs, ge, LatestGeneration)
289 | 		if err != nil {
290 | 			log.Panic(err)
291 | 		}
292 | 		log.Printf("Test gen was: %v", gen)
293 | 		CompareData(dat, tdat)
294 | 	}
295 | 
296 | 	{
297 | 		alloced, free, strange, leaked := q.bs.InspectBlocks()
298 | 		log.Printf("AFTER2 SUMMARY:")
299 | 		log.Printf("ALLOCED: %d", alloced)
300 | 		log.Printf("FREE   : %d", free)
301 | 		log.Printf("STRANGE: %d", strange)
302 | 		log.Printf("LEAKED : %d", leaked)
303 | 		log.Printf("USAGE  : %.2f %%\n", float64(alloced)/float64(alloced+free)*100)
304 | 	}
305 | }
306 | func TestCompleteDelete(t *testing.T) {
307 | 	gs := int64(24) * 365 * DAY
308 | 	ge := int64(25) * 365 * DAY
309 | 	freq := uint64(300 * MINUTE)
310 | 	varn := uint64(10 * MINUTE)
311 | 	tdat := GenData(gs, ge, freq, varn,
312 | 		func(_ int64) float64 { return rand.Float64() })
313 | 	log.Printf("generated %v records", len(tdat))
314 | 	id := uuid.NewRandom()
315 | 	cfg := &DefaultQuasarConfig
316 | 	cfg.BlockPath = "/srv/quasartestdb"
317 | 	q, err := NewQuasar(cfg)
318 | 	if err != nil {
319 | 		log.Panic(err)
320 | 	}
321 | 	{
322 | 		q.InsertValues(id, tdat)
323 | 		q.Flush(id)
324 | 	}
325 | 	{
326 | 		dat, _, err := q.QueryValues(id, gs, ge, LatestGeneration)
327 | 		if err != nil {
328 | 			log.Panic(err)
329 | 		}
330 | 		CompareData(dat, tdat)
331 | 	}
332 | 	{
333 | 		q.DeleteRange(id, gs, ge+1)
334 | 		dat, _, err := q.QueryValues(id, gs, ge, LatestGeneration)
335 | 		if err != nil {
336 | 			log.Panic(err)
337 | 		}
338 | 		if len(dat) != 0 {
339 | 			t.Log("dat length wrong")
340 | 			t.Fail()
341 | 		}
342 | 	}
343 | 	{
344 | 		q.InsertValues(id, tdat)
345 | 		q.Flush(id)
346 | 	}
347 | 	{
348 | 		dat, _, err := q.QueryValues(id, gs, ge, LatestGeneration)
349 | 		if err != nil {
350 | 			log.Panic(err)
351 | 		}
352 | 		CompareData(dat, tdat)
353 | 	}
354 | 
355 | }
356 | func TestUnlinkBlocks2(t *testing.T) {
357 | 
358 | 	gs := int64(24) * 365 * DAY
359 | 	ge := int64(25) * 365 * DAY
360 | 	freq := uint64(300 * MINUTE)
361 | 	varn := uint64(10 * MINUTE)
362 | 	tdat := GenData(gs, ge, freq, varn,
363 | 		func(_ int64) float64 { return rand.Float64() })
364 | 	log.Printf("generated %v records", len(tdat))
365 | 
366 | 	cfg := &DefaultQuasarConfig
367 | 	cfg.BlockPath = "/srv/quasartestdb"
368 | 	q, err := NewQuasar(cfg)
369 | 	if err != nil {
370 | 		log.Panic(err)
371 | 	}
372 | 
373 | 	{
374 | 		alloced, free, strange, leaked := q.bs.InspectBlocks()
375 | 		log.Printf("BEFORE SUMMARY:")
376 | 		log.Printf("ALLOCED: %d", alloced)
377 | 		log.Printf("FREE   : %d", free)
378 | 		log.Printf("STRANGE: %d", strange)
379 | 		log.Printf("LEAKED : %d", leaked)
380 | 		log.Printf("USAGE  : %.2f %%\n", float64(alloced)/float64(alloced+free)*100)
381 | 	}
382 | 	id := uuid.NewRandom()
383 | 	log.Printf("Generating uuid=%s", id)
384 | 	brk := GenBrk(100, 50)
385 | 	idx := 0
386 | 	for idx < len(tdat) {
387 | 		time.Sleep(1 * time.Second)
388 | 		ln := int(<-brk)
389 | 		end := idx + ln
390 | 		if end > len(tdat) {
391 | 			end = len(tdat)
392 | 		}
393 | 		q.InsertValues(id, tdat[idx:end])
394 | 		idx += ln
395 | 	}
396 | 	//Allow for coalescence
397 | 	time.Sleep(10 * time.Second)
398 | 	{
399 | 		alloced, free, strange, leaked := q.bs.InspectBlocks()
400 | 		log.Printf("BEFORE DELETE:")
401 | 		log.Printf("ALLOCED: %d", alloced)
402 | 		log.Printf("FREE   : %d", free)
403 | 		log.Printf("STRANGE: %d", strange)
404 | 		log.Printf("LEAKED : %d", leaked)
405 | 		log.Printf("USAGE  : %.2f %%\n", float64(alloced)/float64(alloced+free)*100)
406 | 	}
407 | 	{
408 | 		err := q.DeleteRange(id, tdat[1].Time, ge)
409 | 		if err != nil {
410 | 			t.Error(err)
411 | 		}
412 | 	}
413 | 	{
414 | 		q.InsertValues(id, []qtree.Record{{0, 100}})
415 | 		q.Flush(id)
416 | 	}
417 | 	{
418 | 		alloced, free, strange, leaked := q.bs.InspectBlocks()
419 | 		log.Printf("AFTER DELETE:")
420 | 		log.Printf("ALLOCED: %d", alloced)
421 | 		log.Printf("FREE   : %d", free)
422 | 		log.Printf("STRANGE: %d", strange)
423 | 		log.Printf("LEAKED : %d", leaked)
424 | 		log.Printf("USAGE  : %.2f %%\n", float64(alloced)/float64(alloced+free)*100)
425 | 	}
426 | 	{
427 | 		_, gen, err := q.QueryValues(id, gs, ge, LatestGeneration)
428 | 		if err != nil {
429 | 			log.Panic(err)
430 | 		}
431 | 		err = q.UnlinkBlocks([]uuid.UUID{id}, []uint64{0}, []uint64{gen})
432 | 		if err != nil {
433 | 			log.Panic(err)
434 | 		}
435 | 	}
436 | 
437 | 	{
438 | 		alloced, free, strange, leaked := q.bs.InspectBlocks()
439 | 		log.Printf("AFTER FREE:")
440 | 		log.Printf("ALLOCED: %d", alloced)
441 | 		log.Printf("FREE   : %d", free)
442 | 		log.Printf("STRANGE: %d", strange)
443 | 		log.Printf("LEAKED : %d", leaked)
444 | 		log.Printf("USAGE  : %.2f %%\n", float64(alloced)/float64(alloced+free)*100)
445 | 	}
446 | }
447 | */
448 | 


--------------------------------------------------------------------------------
/qtree/qtree2_test.go:
--------------------------------------------------------------------------------
  1 | package qtree
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"log"
  6 | 	"math/rand"
  7 | 	"testing"
  8 | 	"time"
  9 | 
 10 | 	"github.com/SoftwareDefinedBuildings/btrdb/internal/bstore"
 11 | )
 12 | 
 13 | func init() {
 14 | 	sd := time.Now().Unix()
 15 | 	fmt.Printf(">>>> USING %v AS SEED <<<<<", sd)
 16 | 	//rand.Seed(1417417715)
 17 | 	rand.Seed(sd)
 18 | }
 19 | func GenBrk(avg uint64, spread uint64) chan uint64 {
 20 | 	rv := make(chan uint64)
 21 | 	go func() {
 22 | 		for {
 23 | 			num := int64(avg)
 24 | 			num -= int64(spread / 2)
 25 | 			num += rand.Int63n(int64(spread))
 26 | 			rv <- uint64(num)
 27 | 		}
 28 | 	}()
 29 | 	return rv
 30 | }
 31 | 
 32 | //TODO PW test at range with no data
 33 | func TestQT2_PW2(t *testing.T) {
 34 | 	log.Printf("Inserting data 0-4096")
 35 | 	te := int64(4096)
 36 | 	tdat := GenData(0, 4096, 1, 0, func(_ int64) float64 { return rand.Float64() })
 37 | 	if int64(len(tdat)) != te {
 38 | 		log.Panic("GenDat messed up a bit")
 39 | 	}
 40 | 	tr, uuid := MakeWTree()
 41 | 	tr.InsertValues(tdat)
 42 | 	tr.Commit()
 43 | 	var err error
 44 | 	tr, err = NewReadQTree(_bs, uuid, bstore.LatestGeneration)
 45 | 	if err != nil {
 46 | 		t.Error(err)
 47 | 	}
 48 | 
 49 | 	moddat := make([]StatRecord, len(tdat))
 50 | 	for i, v := range tdat {
 51 | 		moddat[i] = StatRecord{
 52 | 			Time:  v.Time,
 53 | 			Count: 1,
 54 | 			Min:   v.Val,
 55 | 			Mean:  v.Val,
 56 | 			Max:   v.Val,
 57 | 		}
 58 | 	}
 59 | 	expected_qty := 4096
 60 | 	for pwi := uint8(0); pwi < 63; pwi++ {
 61 | 		qrydat, err := tr.QueryStatisticalValuesBlock(-(16 << 56), 48<<56, pwi)
 62 | 		if err != nil {
 63 | 			log.Panic(err)
 64 | 		}
 65 | 		//log.Printf("for pwi %v, we got len %v",pwi, len(qrydat))
 66 | 		if len(qrydat) != expected_qty {
 67 | 			log.Printf("qdat: %v", qrydat)
 68 | 			log.Printf("expected %v, got %v", expected_qty, len(qrydat))
 69 | 			t.FailNow()
 70 | 		}
 71 | 		if expected_qty != 1 {
 72 | 			expected_qty >>= 1
 73 | 		}
 74 | 	}
 75 | }
 76 | func TestQT2_PW(t *testing.T) {
 77 | 	log.Printf("Inserting data 0-4096")
 78 | 	te := int64(4096)
 79 | 	tdat := GenData(0, 4096, 1, 0, func(_ int64) float64 { return rand.Float64() })
 80 | 	if int64(len(tdat)) != te {
 81 | 		log.Panic("GenDat messed up a bit")
 82 | 	}
 83 | 	tr, uuid := MakeWTree()
 84 | 	err := tr.InsertValues(tdat)
 85 | 	if err != nil {
 86 | 		t.Error(err)
 87 | 	}
 88 | 	tr.Commit()
 89 | 	tr, err = NewReadQTree(_bs, uuid, bstore.LatestGeneration)
 90 | 	if err != nil {
 91 | 		t.Error(err)
 92 | 	}
 93 | 
 94 | 	moddat := make([]StatRecord, len(tdat))
 95 | 	for i, v := range tdat {
 96 | 		moddat[i] = StatRecord{
 97 | 			Time:  v.Time,
 98 | 			Count: 1,
 99 | 			Min:   v.Val,
100 | 			Mean:  v.Val,
101 | 			Max:   v.Val,
102 | 		}
103 | 	}
104 | 	for pwi := uint8(0); pwi < 12; pwi++ {
105 | 		qrydat, err := tr.QueryStatisticalValuesBlock(0, te, pwi)
106 | 		if err != nil {
107 | 			log.Panic(err)
108 | 		}
109 | 		if int64(len(qrydat)) != te>>pwi {
110 | 			t.Log("len of qrydat mismatch %v vs %v", len(qrydat), te>>pwi)
111 | 			log.Printf("qry dat %+v", qrydat)
112 | 			t.FailNow()
113 | 		} else {
114 | 			t.Log("LEN MATCH %v", len(qrydat))
115 | 		}
116 | 		min := func(a float64, b float64) float64 {
117 | 			if a < b {
118 | 				return a
119 | 			}
120 | 			return b
121 | 		}
122 | 		max := func(a float64, b float64) float64 {
123 | 			if a > b {
124 | 				return a
125 | 			}
126 | 			return b
127 | 		}
128 | 		moddat2 := make([]StatRecord, len(moddat)/2)
129 | 		for i := 0; i < len(moddat)/2; i++ {
130 | 			nmean := moddat[2*i].Mean*float64(moddat[2*i].Count) +
131 | 				moddat[2*i+1].Mean*float64(moddat[2*i+1].Count)
132 | 			nmean /= float64(moddat[2*i].Count + moddat[2*i+1].Count)
133 | 
134 | 			moddat2[i] = StatRecord{
135 | 				Time:  moddat[2*i].Time,
136 | 				Count: moddat[2*i].Count + moddat[2*i+1].Count,
137 | 				Min:   min(moddat[2*i].Min, moddat[2*i+1].Min),
138 | 				Mean:  nmean,
139 | 				Max:   max(moddat[2*i].Max, moddat[2*i+1].Max),
140 | 			}
141 | 		}
142 | 	}
143 | }
144 | func TestQT2_A(t *testing.T) {
145 | 	gs := int64(20+rand.Intn(10)) * 365 * DAY
146 | 	ge := int64(30+rand.Intn(10)) * 365 * DAY
147 | 	freq := uint64(rand.Intn(10)+1) * HOUR
148 | 	varn := uint64(30 * MINUTE)
149 | 	tdat := GenData(gs, ge, freq, varn,
150 | 		func(_ int64) float64 { return rand.Float64() })
151 | 	log.Printf("generated %v records", len(tdat))
152 | 	tr, uuid := MakeWTree()
153 | 	log.Printf("geneated tree %v", tr.gen.Uuid().String())
154 | 	tr.Commit()
155 | 
156 | 	idx := uint64(0)
157 | 	brks := GenBrk(100, 50)
158 | 	loops := GenBrk(4, 4)
159 | 	for idx < uint64(len(tdat)) {
160 | 		tr := LoadWTree(uuid)
161 | 		loop := <-loops
162 | 		for i := uint64(0); i < loop; i++ {
163 | 			brk := <-brks
164 | 			if idx+brk >= uint64(len(tdat)) {
165 | 				brk = uint64(len(tdat)) - idx
166 | 			}
167 | 			if brk == 0 {
168 | 				continue
169 | 			}
170 | 			tr.InsertValues(tdat[idx : idx+brk])
171 | 			idx += brk
172 | 		}
173 | 		tr.Commit()
174 | 	}
175 | 
176 | 	rtr, err := NewReadQTree(_bs, uuid, bstore.LatestGeneration)
177 | 	if err != nil {
178 | 		log.Panic(err)
179 | 	}
180 | 	rval, err := rtr.ReadStandardValuesBlock(gs, ge+int64(2*varn))
181 | 	if err != nil {
182 | 		log.Panic(err)
183 | 	}
184 | 	log.Printf("wrote %v, read %v", len(tdat), len(rval))
185 | 	CompareData(tdat, rval)
186 | }
187 | 
188 | func TestQT2_Superdense(t *testing.T) {
189 | 	tdat := make([]Record, 10000)
190 | 	for i := 0; i < 10000; i++ {
191 | 		tdat[i] = Record{Time: 5, Val: i}
192 | 	}
193 | 	tr, uuid := MakeWTree()
194 | 	log.Printf("geneated tree %v", tr.gen.Uuid().String())
195 | 	tr.Commit()
196 | 
197 | 	idx := uint64(0)
198 | 	brks := GenBrk(100, 50)
199 | 	loops := GenBrk(4, 4)
200 | 	for idx < uint64(len(tdat)) {
201 | 		tr := LoadWTree(uuid)
202 | 		loop := <-loops
203 | 		for i := uint64(0); i < loop; i++ {
204 | 			brk := <-brks
205 | 			if idx+brk >= uint64(len(tdat)) {
206 | 				brk = uint64(len(tdat)) - idx
207 | 			}
208 | 			if brk == 0 {
209 | 				continue
210 | 			}
211 | 			tr.InsertValues(tdat[idx : idx+brk])
212 | 			idx += brk
213 | 		}
214 | 		tr.Commit()
215 | 	}
216 | 
217 | 	rtr, err := NewReadQTree(_bs, uuid, bstore.LatestGeneration)
218 | 	if err != nil {
219 | 		log.Panic(err)
220 | 	}
221 | 	rval, err := rtr.ReadStandardValuesBlock(gs, ge+int64(2*varn))
222 | 	if err != nil {
223 | 		log.Panic(err)
224 | 	}
225 | 	log.Printf("wrote %v, read %v", len(tdat), len(rval))
226 | 	CompareData(tdat, rval)
227 | }
228 | 
229 | func TestQT2_Nearest(t *testing.T) {
230 | 	vals := []Record{
231 | 		{int64(1 << 56), 1},
232 | 		{int64(2 << 56), 2},
233 | 		{int64(3 << 56), 3},
234 | 	}
235 | 	tr, uuid := MakeWTree()
236 | 	err := tr.InsertValues(vals)
237 | 	if err != nil {
238 | 		t.Error(err)
239 | 	}
240 | 	tr.Commit()
241 | 	rtr, err := NewReadQTree(_bs, uuid, bstore.LatestGeneration)
242 | 	if err != nil {
243 | 		log.Panic(err)
244 | 	}
245 | 	tparams := []struct {
246 | 		time      int64
247 | 		backwards bool
248 | 		expectOk  bool
249 | 		val       float64
250 | 	}{
251 | 		{(2 << 56) + 1, true, true, 2},
252 | 		{(2 << 56), true, true, 1},
253 | 		{(2 << 56), false, true, 2},
254 | 		{(2 << 56) + 1, false, true, 3},
255 | 		{0, false, true, 1},
256 | 		{4 << 56, true, true, 3},
257 | 		{0, true, false, -1},
258 | 		{4 << 56, false, false, -1},
259 | 	}
260 | 	for i, v := range tparams {
261 | 		rv, err := rtr.FindNearestValue(v.time, v.backwards)
262 | 		if v.expectOk {
263 | 			if err != nil || rv.Val != v.val {
264 | 				t.Fatal("subtest [%v] = %+v", i, v)
265 | 			}
266 | 		} else {
267 | 			if err != ErrNoSuchPoint {
268 | 				t.Fatal("subtest [%v] = %+v", i, v)
269 | 			}
270 | 		}
271 | 	}
272 | }
273 | 
274 | func TestQT2_DEL(t *testing.T) {
275 | 	gs := int64(20+rand.Intn(10)) * 365 * DAY
276 | 	ge := int64(30+rand.Intn(10)) * 365 * DAY
277 | 	freq := uint64(rand.Intn(10)+1) * HOUR
278 | 	varn := uint64(30 * MINUTE)
279 | 	tdat := GenData(gs, ge, freq, varn,
280 | 		func(_ int64) float64 { return rand.Float64() })
281 | 	log.Printf("generated %v records", len(tdat))
282 | 	tr, uuid := MakeWTree()
283 | 	log.Printf("geneated tree %v", tr.gen.Uuid().String())
284 | 	tr.Commit()
285 | 
286 | 	idx := uint64(0)
287 | 	brks := GenBrk(100, 50)
288 | 	loops := GenBrk(4, 4)
289 | 	for idx < uint64(len(tdat)) {
290 | 		tr := LoadWTree(uuid)
291 | 		loop := <-loops
292 | 		for i := uint64(0); i < loop; i++ {
293 | 			brk := <-brks
294 | 			if idx+brk >= uint64(len(tdat)) {
295 | 				brk = uint64(len(tdat)) - idx
296 | 			}
297 | 			if brk == 0 {
298 | 				continue
299 | 			}
300 | 			tr.InsertValues(tdat[idx : idx+brk])
301 | 			idx += brk
302 | 		}
303 | 		tr.Commit()
304 | 	}
305 | 
306 | 	rtr, err := NewReadQTree(_bs, uuid, bstore.LatestGeneration)
307 | 	if err != nil {
308 | 		log.Panic(err)
309 | 	}
310 | 	rval, err := rtr.ReadStandardValuesBlock(gs, ge+int64(2*varn))
311 | 	if err != nil {
312 | 		log.Panic(err)
313 | 	}
314 | 	log.Printf("wrote %v, read %v", len(tdat), len(rval))
315 | 	CompareData(tdat, rval)
316 | 
317 | 	dtr, err := NewWriteQTree(_bs, uuid)
318 | 	dtr.DeleteRange(tdat[1].Time, tdat[len(tdat)-2].Time+1)
319 | 	dtr.Commit()
320 | 	{
321 | 		rtr, err := NewReadQTree(_bs, uuid, bstore.LatestGeneration)
322 | 		if err != nil {
323 | 			log.Panic(err)
324 | 		}
325 | 		rval, err := rtr.ReadStandardValuesBlock(gs, ge+int64(2*varn))
326 | 		if err != nil {
327 | 			log.Panic(err)
328 | 		}
329 | 
330 | 		if len(rval) != 2 {
331 | 			t.Log("Mismatch in expected length")
332 | 			t.Fail()
333 | 		}
334 | 	}
335 | 	{
336 | 		rtr, err := NewReadQTree(_bs, uuid, bstore.LatestGeneration)
337 | 		if err != nil {
338 | 			log.Panic(err)
339 | 		}
340 | 		rch := rtr.GetAllReferencedVAddrs()
341 | 		refd := make([]uint64, 0, 10)
342 | 		for v := range rch {
343 | 			log.Printf("Referenced: 0x%016x", v)
344 | 			refd = append(refd, v)
345 | 		}
346 | 		/*
347 | 			if len(refd) != 5 {
348 | 				t.Log("Referencing != 5 nodes (%v)", len(refd))
349 | 				t.Fail()
350 | 			}*/
351 | 	}
352 | }
353 | 
354 | func TestQT2_CRNG(t *testing.T) {
355 | 	gs := int64(20+rand.Intn(10)) * 365 * DAY
356 | 	ge := int64(30+rand.Intn(10)) * 365 * DAY
357 | 	freq := uint64(rand.Intn(10)+1) * HOUR
358 | 	varn := uint64(30 * MINUTE)
359 | 	tdat := GenData(gs, ge, freq, varn,
360 | 		func(_ int64) float64 { return rand.Float64() })
361 | 	log.Printf("generated %v records", len(tdat))
362 | 	tr, uuid := MakeWTree()
363 | 	log.Printf("geneated tree %v", tr.gen.Uuid().String())
364 | 	tr.Commit()
365 | 
366 | 	idx := uint64(0)
367 | 	brks := GenBrk(100, 50)
368 | 	loops := GenBrk(4, 4)
369 | 	for idx < uint64(len(tdat)) {
370 | 		tr := LoadWTree(uuid)
371 | 		loop := <-loops
372 | 		for i := uint64(0); i < loop; i++ {
373 | 			brk := <-brks
374 | 			if idx+brk >= uint64(len(tdat)) {
375 | 				brk = uint64(len(tdat)) - idx
376 | 			}
377 | 			if brk == 0 {
378 | 				continue
379 | 			}
380 | 			tr.InsertValues(tdat[idx : idx+brk])
381 | 			idx += brk
382 | 		}
383 | 		tr.Commit()
384 | 	}
385 | 
386 | 	rtr, err := NewReadQTree(_bs, uuid, bstore.LatestGeneration)
387 | 	if err != nil {
388 | 		log.Panic(err)
389 | 	}
390 | 	rval, err := rtr.ReadStandardValuesBlock(gs, ge+int64(2*varn))
391 | 	if err != nil {
392 | 		log.Panic(err)
393 | 	}
394 | 	initial_gen := rtr.Generation()
395 | 	log.Printf("wrote %v, read %v", len(tdat), len(rval))
396 | 	CompareData(tdat, rval)
397 | 
398 | 	dtr, err := NewWriteQTree(_bs, uuid)
399 | 	dtr.DeleteRange(tdat[0].Time, tdat[5].Time)
400 | 	dtr.Commit()
401 | 	{
402 | 		rtr, err := NewReadQTree(_bs, uuid, bstore.LatestGeneration)
403 | 		if err != nil {
404 | 			log.Panic(err)
405 | 		}
406 | 		rval, err := rtr.ReadStandardValuesBlock(gs, ge+int64(2*varn))
407 | 		if err != nil {
408 | 			log.Panic(err)
409 | 		}
410 | 		if len(rval) != len(tdat)-5 {
411 | 			t.Log("Mismatch in expected length %v %v %v", len(rval), len(tdat)-5, len(tdat))
412 | 			t.Fail()
413 | 		}
414 | 		log.Printf("gen was, gen is: %v / %v", initial_gen, rtr.Generation())
415 | 		log.Printf("========== STARTING CHANGED RANGE INVOCATION ==============")
416 | 		changed_ranges := rtr.FindChangedSinceSlice(initial_gen, 0)
417 | 		log.Printf("Changed ranges: %+v", changed_ranges)
418 | 		s, e, ds, de := tdat[0].Time, tdat[5].Time, changed_ranges[0].Start-tdat[0].Time, changed_ranges[0].End-tdat[5].Time
419 | 		dsm := float64(ds) / (1E9 * 60)
420 | 		dem := float64(de) / (1E9 * 60)
421 | 		log.Printf("We deleted from %v to %v \n(delta %v %v) (delta min %.3f %.3f)", s, e, ds, de, dsm, dem)
422 | 		rtr.root.PrintCounts(0)
423 | 	}
424 | 
425 | 	{
426 | 		dtr, err := NewWriteQTree(_bs, uuid)
427 | 		dtr.InsertValues([]Record{{ge - 1000, 100}})
428 | 		dtr.Commit()
429 | 		rtr, err := NewReadQTree(_bs, uuid, bstore.LatestGeneration)
430 | 		if err != nil {
431 | 			log.Panic(err)
432 | 		}
433 | 		rval, err := rtr.ReadStandardValuesBlock(gs, ge+int64(2*varn))
434 | 		if err != nil {
435 | 			log.Panic(err)
436 | 		}
437 | 		if len(rval) != len(tdat)-4 {
438 | 			t.Log("Mismatch in expected length %v %v %v", len(rval), len(tdat)-5, len(tdat))
439 | 			t.Fail()
440 | 		}
441 | 		log.Printf("gen was, gen is: %v / %v", initial_gen, rtr.Generation())
442 | 		log.Printf("========== STARTING CHANGED RANGE INVOCATION ==============")
443 | 		changed_ranges := rtr.FindChangedSinceSlice(initial_gen, 0)
444 | 		log.Printf("Changed ranges: %+v", changed_ranges)
445 | 		s, e, ds, de := tdat[0].Time, tdat[5].Time, changed_ranges[0].Start-tdat[0].Time, changed_ranges[0].End-tdat[5].Time
446 | 		dsm := float64(ds) / (1E9 * 60)
447 | 		dem := float64(de) / (1E9 * 60)
448 | 		log.Printf("We deleted from %v to %v \n(delta %v %v) (delta min %.3f %.3f)", s, e, ds, de, dsm, dem)
449 | 		rtr.root.PrintCounts(0)
450 | 	}
451 | }
452 | 


--------------------------------------------------------------------------------
/cpinterface/cpinterface.go:
--------------------------------------------------------------------------------
  1 | package cpinterface
  2 | 
  3 | import (
  4 | 	"net"
  5 | 	"os"
  6 | 	"os/signal"
  7 | 	"sync"
  8 | 
  9 | 	"github.com/pborman/uuid"
 10 | 	"github.com/SoftwareDefinedBuildings/btrdb"
 11 | 	"github.com/SoftwareDefinedBuildings/btrdb/qtree"
 12 | 	capn "github.com/glycerine/go-capnproto"
 13 | 	"github.com/op/go-logging"
 14 | )
 15 | 
 16 | var log *logging.Logger
 17 | 
 18 | func init() {
 19 | 	log = logging.MustGetLogger("log")
 20 | }
 21 | 
 22 | type CPInterface struct {
 23 | 	isShuttingDown bool
 24 | }
 25 | 
 26 | func ServeCPNP(q *btrdb.Quasar, ntype string, laddr string) *CPInterface {
 27 | 	rv := &CPInterface{}
 28 | 	go func() {
 29 | 		sigchan := make(chan os.Signal, 1)
 30 | 		signal.Notify(sigchan, os.Interrupt)
 31 | 		_ = <-sigchan
 32 | 		rv.isShuttingDown = true
 33 | 	}()
 34 | 	l, err := net.Listen(ntype, laddr)
 35 | 	if err != nil {
 36 | 		log.Panic(err)
 37 | 	}
 38 | 	defer l.Close()
 39 | 	for !rv.isShuttingDown {
 40 | 		conn, err := l.Accept()
 41 | 		if err != nil {
 42 | 			log.Panic(err)
 43 | 		}
 44 | 		go func(c net.Conn) {
 45 | 			rv.dispatchCommands(q, c)
 46 | 		}(conn)
 47 | 	}
 48 | 	return rv
 49 | }
 50 | 
 51 | func (c *CPInterface) Shutdown() {
 52 | 	c.isShuttingDown = true
 53 | }
 54 | 
 55 | func (c *CPInterface) dispatchCommands(q *btrdb.Quasar, conn net.Conn) {
 56 | 	//This governs the stream
 57 | 	rmtx := sync.Mutex{}
 58 | 	wmtx := sync.Mutex{}
 59 | 	log.Info("cpnp connection")
 60 | 	for !c.isShuttingDown {
 61 | 		rmtx.Lock()
 62 | 		seg, err := capn.ReadFromStream(conn, nil)
 63 | 		if err != nil {
 64 | 			log.Warning("ERR (%v) :: %v", conn.RemoteAddr(), err)
 65 | 			conn.Close()
 66 | 			break
 67 | 		}
 68 | 		rmtx.Unlock()
 69 | 		go func() {
 70 | 			seg := seg
 71 | 			req := ReadRootRequest(seg)
 72 | 			mkresp := func() (Response, *capn.Segment) {
 73 | 				rvseg := capn.NewBuffer(nil)
 74 | 				resp := NewRootResponse(rvseg)
 75 | 				resp.SetEchoTag(req.EchoTag())
 76 | 				return resp, rvseg
 77 | 			}
 78 | 			sendresp := func(seg *capn.Segment) {
 79 | 				wmtx.Lock()
 80 | 				seg.WriteTo(conn)
 81 | 				wmtx.Unlock()
 82 | 			}
 83 | 			switch req.Which() {
 84 | 			case REQUEST_QUERYSTANDARDVALUES:
 85 | 				//log.Info("QSV\n")
 86 | 				st := req.QueryStandardValues().StartTime()
 87 | 				et := req.QueryStandardValues().EndTime()
 88 | 				uuid := uuid.UUID(req.QueryStandardValues().Uuid())
 89 | 				ver := req.QueryStandardValues().Version()
 90 | 				//log.Info("[REQ=QsV] st=%v, et=%v, uuid=%v, gen=%v", st, et, uuid, ver)
 91 | 				if ver == 0 {
 92 | 					ver = btrdb.LatestGeneration
 93 | 				}
 94 | 				recordc, errorc, gen := q.QueryValuesStream(uuid, st, et, ver)
 95 | 				if recordc == nil {
 96 | 					log.Warning("RESPONDING ERR: %v", err)
 97 | 					resp, rvseg := mkresp()
 98 | 					resp.SetStatusCode(STATUSCODE_INTERNALERROR)
 99 | 					resp.SetFinal(true)
100 | 					sendresp(rvseg)
101 | 					return
102 | 				} else {
103 | 					bufarr := make([]qtree.Record, 0, 4096)
104 | 					for {
105 | 						resp, rvseg := mkresp()
106 | 						fail := false
107 | 						fin := false
108 | 						for {
109 | 							select {
110 | 							case _, ok := <-errorc:
111 | 								if ok {
112 | 									fin = true
113 | 									fail = true
114 | 									goto donestandard
115 | 								}
116 | 							case r, ok := <-recordc:
117 | 								if !ok {
118 | 									fin = true
119 | 									goto donestandard
120 | 								}
121 | 								bufarr = append(bufarr, r)
122 | 								if len(bufarr) == cap(bufarr) {
123 | 									goto donestandard
124 | 								}
125 | 							}
126 | 						}
127 | 					donestandard:
128 | 						if fail {
129 | 							resp.SetStatusCode(STATUSCODE_INTERNALERROR)
130 | 							resp.SetFinal(true)
131 | 							//consume channels
132 | 							go func() {
133 | 								for _ = range recordc {
134 | 								}
135 | 							}()
136 | 							go func() {
137 | 								for _ = range errorc {
138 | 								}
139 | 							}()
140 | 							sendresp(rvseg)
141 | 							return
142 | 						}
143 | 						records := NewRecords(rvseg)
144 | 						rl := NewRecordList(rvseg, len(bufarr))
145 | 						rla := rl.ToArray()
146 | 						for i, v := range bufarr {
147 | 							rla[i].SetTime(v.Time)
148 | 							rla[i].SetValue(v.Val)
149 | 						}
150 | 						records.SetVersion(gen)
151 | 						records.SetValues(rl)
152 | 						resp.SetRecords(records)
153 | 						resp.SetStatusCode(STATUSCODE_OK)
154 | 						if fin {
155 | 							resp.SetFinal(true)
156 | 						}
157 | 						sendresp(rvseg)
158 | 						bufarr = bufarr[:0]
159 | 						if fin {
160 | 							return
161 | 						}
162 | 					}
163 | 				}
164 | 			case REQUEST_QUERYWINDOWVALUES:
165 | 				st := req.QueryWindowValues().StartTime()
166 | 				et := req.QueryWindowValues().EndTime()
167 | 				id := uuid.UUID(req.QueryWindowValues().Uuid())
168 | 				width := req.QueryWindowValues().Width()
169 | 				ver := req.QueryWindowValues().Version()
170 | 				depth := req.QueryWindowValues().Depth()
171 | 				if ver == 0 {
172 | 					ver = btrdb.LatestGeneration
173 | 				}
174 | 				recordc, gen := q.QueryWindow(id, st, et, ver, width, depth)
175 | 				if recordc == nil {
176 | 					log.Warning("RESPONDING ERR: %v", err)
177 | 					resp, rvseg := mkresp()
178 | 					resp.SetStatusCode(STATUSCODE_INTERNALERROR)
179 | 					resp.SetFinal(true)
180 | 					sendresp(rvseg)
181 | 					return
182 | 				} else {
183 | 					bufarr := make([]qtree.StatRecord, 0, 4096)
184 | 					for {
185 | 						resp, rvseg := mkresp()
186 | 						fail := false
187 | 						fin := false
188 | 						for {
189 | 							select {
190 | 							case r, ok := <-recordc:
191 | 								if !ok {
192 | 									fin = true
193 | 									goto donewindow
194 | 								}
195 | 								bufarr = append(bufarr, r)
196 | 								if len(bufarr) == cap(bufarr) {
197 | 									goto donewindow
198 | 								}
199 | 							}
200 | 						}
201 | 					donewindow:
202 | 						if fail {
203 | 							resp.SetStatusCode(STATUSCODE_INTERNALERROR)
204 | 							resp.SetFinal(true)
205 | 							//consume channels
206 | 							go func() {
207 | 								for _ = range recordc {
208 | 								}
209 | 							}()
210 | 							sendresp(rvseg)
211 | 							return
212 | 						}
213 | 						records := NewStatisticalRecords(rvseg)
214 | 						rl := NewStatisticalRecordList(rvseg, len(bufarr))
215 | 						rla := rl.ToArray()
216 | 						for i, v := range bufarr {
217 | 							rla[i].SetTime(v.Time)
218 | 							rla[i].SetCount(v.Count)
219 | 							rla[i].SetMin(v.Min)
220 | 							rla[i].SetMean(v.Mean)
221 | 							rla[i].SetMax(v.Max)
222 | 						}
223 | 						records.SetVersion(gen)
224 | 						records.SetValues(rl)
225 | 						resp.SetStatisticalRecords(records)
226 | 						resp.SetStatusCode(STATUSCODE_OK)
227 | 						if fin {
228 | 							resp.SetFinal(true)
229 | 						}
230 | 						sendresp(rvseg)
231 | 						bufarr = bufarr[:0]
232 | 						if fin {
233 | 							return
234 | 						}
235 | 					}
236 | 				}
237 | 			case REQUEST_QUERYSTATISTICALVALUES:
238 | 				st := req.QueryStatisticalValues().StartTime()
239 | 				et := req.QueryStatisticalValues().EndTime()
240 | 				uuid := uuid.UUID(req.QueryStatisticalValues().Uuid())
241 | 				pw := req.QueryStatisticalValues().PointWidth()
242 | 				ver := req.QueryStatisticalValues().Version()
243 | 				if ver == 0 {
244 | 					ver = btrdb.LatestGeneration
245 | 				}
246 | 				recordc, errorc, gen := q.QueryStatisticalValuesStream(uuid, st, et, ver, pw)
247 | 				if recordc == nil {
248 | 					log.Warning("RESPONDING ERR: %v", err)
249 | 					resp, rvseg := mkresp()
250 | 					resp.SetStatusCode(STATUSCODE_INTERNALERROR)
251 | 					resp.SetFinal(true)
252 | 					sendresp(rvseg)
253 | 					return
254 | 				} else {
255 | 					bufarr := make([]qtree.StatRecord, 0, 4096)
256 | 					for {
257 | 						resp, rvseg := mkresp()
258 | 						fail := false
259 | 						fin := false
260 | 						for {
261 | 							select {
262 | 							case _, ok := <-errorc:
263 | 								if ok {
264 | 									fin = true
265 | 									fail = true
266 | 									goto donestat
267 | 								}
268 | 							case r, ok := <-recordc:
269 | 								if !ok {
270 | 									fin = true
271 | 									goto donestat
272 | 								}
273 | 								bufarr = append(bufarr, r)
274 | 								if len(bufarr) == cap(bufarr) {
275 | 									goto donestat
276 | 								}
277 | 							}
278 | 						}
279 | 					donestat:
280 | 						if fail {
281 | 							resp.SetStatusCode(STATUSCODE_INTERNALERROR)
282 | 							resp.SetFinal(true)
283 | 							//consume channels
284 | 							go func() {
285 | 								for _ = range recordc {
286 | 								}
287 | 							}()
288 | 							go func() {
289 | 								for _ = range errorc {
290 | 								}
291 | 							}()
292 | 							sendresp(rvseg)
293 | 							return
294 | 						}
295 | 						records := NewStatisticalRecords(rvseg)
296 | 						rl := NewStatisticalRecordList(rvseg, len(bufarr))
297 | 						rla := rl.ToArray()
298 | 						for i, v := range bufarr {
299 | 							rla[i].SetTime(v.Time)
300 | 							rla[i].SetCount(v.Count)
301 | 							rla[i].SetMin(v.Min)
302 | 							rla[i].SetMean(v.Mean)
303 | 							rla[i].SetMax(v.Max)
304 | 						}
305 | 						records.SetVersion(gen)
306 | 						records.SetValues(rl)
307 | 						resp.SetStatisticalRecords(records)
308 | 						resp.SetStatusCode(STATUSCODE_OK)
309 | 						if fin {
310 | 							resp.SetFinal(true)
311 | 						}
312 | 						sendresp(rvseg)
313 | 						bufarr = bufarr[:0]
314 | 						if fin {
315 | 							return
316 | 						}
317 | 					}
318 | 				}
319 | 			case REQUEST_QUERYVERSION:
320 | 				//ul := req.
321 | 				ul := req.QueryVersion().Uuids()
322 | 				ull := ul.ToArray()
323 | 				resp, rvseg := mkresp()
324 | 				rvers := NewVersions(rvseg)
325 | 				vlist := rvseg.NewUInt64List(len(ull))
326 | 				ulist := rvseg.NewDataList(len(ull))
327 | 				for i, v := range ull {
328 | 					ver, err := q.QueryGeneration(uuid.UUID(v))
329 | 					if err != nil {
330 | 						resp.SetStatusCode(STATUSCODE_INTERNALERROR)
331 | 						resp.SetFinal(true)
332 | 						sendresp(rvseg)
333 | 						return
334 | 					}
335 | 					//I'm not sure that the array that sits behind the uuid slice will stick around
336 | 					//so I'm copying it.
337 | 					uuid := make([]byte, 16)
338 | 					copy(uuid, v)
339 | 					vlist.Set(i, ver)
340 | 					ulist.Set(i, uuid)
341 | 				}
342 | 				resp.SetStatusCode(STATUSCODE_OK)
343 | 				rvers.SetUuids(ulist)
344 | 				rvers.SetVersions(vlist)
345 | 				resp.SetVersionList(rvers)
346 | 				resp.SetFinal(true)
347 | 				sendresp(rvseg)
348 | 			case REQUEST_QUERYNEARESTVALUE:
349 | 				resp, rvseg := mkresp()
350 | 				t := req.QueryNearestValue().Time()
351 | 				id := uuid.UUID(req.QueryNearestValue().Uuid())
352 | 				ver := req.QueryNearestValue().Version()
353 | 				if ver == 0 {
354 | 					ver = btrdb.LatestGeneration
355 | 				}
356 | 				back := req.QueryNearestValue().Backward()
357 | 				rv, gen, err := q.QueryNearestValue(id, t, back, ver)
358 | 				switch err {
359 | 				case nil:
360 | 					resp.SetStatusCode(STATUSCODE_OK)
361 | 					records := NewRecords(rvseg)
362 | 					rl := NewRecordList(rvseg, 1)
363 | 					rla := rl.ToArray()
364 | 					rla[0].SetTime(rv.Time)
365 | 					rla[0].SetValue(rv.Val)
366 | 					records.SetVersion(gen)
367 | 					records.SetValues(rl)
368 | 					resp.SetRecords(records)
369 | 				case qtree.ErrNoSuchPoint:
370 | 					resp.SetStatusCode(STATUSCODE_NOSUCHPOINT)
371 | 				default:
372 | 					resp.SetStatusCode(STATUSCODE_INTERNALERROR)
373 | 				}
374 | 				resp.SetFinal(true)
375 | 				sendresp(rvseg)
376 | 			case REQUEST_QUERYCHANGEDRANGES:
377 | 				resp, rvseg := mkresp()
378 | 				id := uuid.UUID(req.QueryChangedRanges().Uuid())
379 | 				sgen := req.QueryChangedRanges().FromGeneration()
380 | 				egen := req.QueryChangedRanges().ToGeneration()
381 | 				if egen == 0 {
382 | 					egen = btrdb.LatestGeneration
383 | 				}
384 | 				resolution := req.QueryChangedRanges().Resolution()
385 | 				rv, ver, err := q.QueryChangedRanges(id, sgen, egen, resolution)
386 | 				switch err {
387 | 				case nil:
388 | 					resp.SetStatusCode(STATUSCODE_OK)
389 | 					ranges := NewRanges(rvseg)
390 | 					ranges.SetVersion(ver)
391 | 					crl := NewChangedRangeList(rvseg, len(rv))
392 | 					crla := crl.ToArray()
393 | 					for i := 0; i < len(rv); i++ {
394 | 						crla[i].SetStartTime(rv[i].Start)
395 | 						crla[i].SetEndTime(rv[i].End)
396 | 					}
397 | 					ranges.SetValues(crl)
398 | 					resp.SetChangedRngList(ranges)
399 | 				default:
400 | 					log.Critical("qcr error: ", err)
401 | 					resp.SetStatusCode(STATUSCODE_INTERNALERROR)
402 | 				}
403 | 				resp.SetFinal(true)
404 | 				sendresp(rvseg)
405 | 
406 | 			case REQUEST_INSERTVALUES:
407 | 				resp, rvseg := mkresp()
408 | 				uuid := uuid.UUID(req.InsertValues().Uuid())
409 | 				rl := req.InsertValues().Values()
410 | 				rla := rl.ToArray()
411 | 				if len(rla) != 0 {
412 | 					qtr := make([]qtree.Record, len(rla))
413 | 					for i, v := range rla {
414 | 						qtr[i] = qtree.Record{Time: v.Time(), Val: v.Value()}
415 | 					}
416 | 					q.InsertValues(uuid, qtr)
417 | 				}
418 | 				if req.InsertValues().Sync() {
419 | 					q.Flush(uuid)
420 | 				}
421 | 				resp.SetFinal(true)
422 | 				resp.SetStatusCode(STATUSCODE_OK)
423 | 				sendresp(rvseg)
424 | 			case REQUEST_DELETEVALUES:
425 | 				resp, rvseg := mkresp()
426 | 				id := uuid.UUID(req.DeleteValues().Uuid())
427 | 				stime := req.DeleteValues().StartTime()
428 | 				etime := req.DeleteValues().EndTime()
429 | 				err := q.DeleteRange(id, stime, etime)
430 | 				switch err {
431 | 				case nil:
432 | 					resp.SetStatusCode(STATUSCODE_OK)
433 | 				default:
434 | 					resp.SetStatusCode(STATUSCODE_INTERNALERROR)
435 | 				}
436 | 				resp.SetFinal(true)
437 | 				sendresp(rvseg)
438 | 			default:
439 | 				log.Critical("weird segment")
440 | 			}
441 | 		}()
442 | 	}
443 | }
444 | 
445 | /*
446 | func EncodeMsg() *bytes.Buffer {
447 | 	rv := bytes.Buffer{}
448 | 	seg := capn.NewBuffer(nil)
449 | 	cmd := NewRootRequest(seg)
450 | 
451 | 	qsv := NewCmdQueryStandardValues(seg)
452 | 	cmd.SetEchoTag(500)
453 | 	qsv.SetStartTime(0x5a5a)
454 | 	qsv.SetEndTime(0xf7f7)
455 | 	cmd.SetQueryStandardValues(qsv)
456 | 	seg.WriteTo(&rv)
457 | 	return &rv
458 | }
459 | 
460 | func DecodeMsg(b *bytes.Buffer) {
461 | 	seg, err := capn.ReadFromStream(b, nil)
462 | 	if err != nil {
463 | 		log.Panic(err)
464 | 	}
465 | 	cmd := ReadRootRequest(seg)
466 | 	switch cmd.Which() {
467 | 	case REQUEST_QUERYSTANDARDVALUES:
468 | 		ca := cmd.QueryStandardValues()
469 | 	default:
470 | 		log.Critical("wtf")
471 | 	}
472 | }
473 | */
474 | 


--------------------------------------------------------------------------------
/internal/cephprovider/cephprovider.go:
--------------------------------------------------------------------------------
  1 | package cephprovider
  2 | 
  3 | // #cgo LDFLAGS: -lrados
  4 | // #include "cephprovider.h"
  5 | // #include <stdlib.h>
  6 | import "C"
  7 | 
  8 | import (
  9 | 	"strconv"
 10 | 	"sync"
 11 | 	"unsafe"
 12 | 
 13 | 	"github.com/SoftwareDefinedBuildings/btrdb/internal/bprovider"
 14 | 	"github.com/op/go-logging"
 15 | )
 16 | 
 17 | var log *logging.Logger
 18 | 
 19 | func init() {
 20 | 	log = logging.MustGetLogger("log")
 21 | }
 22 | 
 23 | const NUM_RHANDLES = 200
 24 | 
 25 | //We know we won't get any addresses here, because this is the relocation base as well
 26 | const METADATA_BASE = 0xFF00000000000000
 27 | 
 28 | //4096 blocks per addr lock
 29 | const ADDR_LOCK_SIZE = 0x1000000000
 30 | const ADDR_OBJ_SIZE = 0x0001000000
 31 | 
 32 | //Just over the DBSIZE
 33 | const MAX_EXPECTED_OBJECT_SIZE = 20485
 34 | 
 35 | //The number of RADOS blocks to cache (up to 16MB each, probably only 1.6MB each)
 36 | const RADOS_CACHE_SIZE = NUM_RHANDLES * 2
 37 | 
 38 | const OFFSET_MASK = 0xFFFFFF
 39 | const R_CHUNKSIZE = 1 << 20
 40 | 
 41 | //This is how many uuid/address pairs we will keep to facilitate appending to segments
 42 | //instead of creating new ones.
 43 | const WORTH_CACHING = OFFSET_MASK - MAX_EXPECTED_OBJECT_SIZE
 44 | const SEGCACHE_SIZE = 1024
 45 | 
 46 | // 1MB for write cache, I doubt we will ever hit this tbh
 47 | const WCACHE_SIZE = 1 << 20
 48 | 
 49 | func UUIDSliceToArr(id []byte) [16]byte {
 50 | 	rv := [16]byte{}
 51 | 	copy(rv[:], id)
 52 | 	return rv
 53 | }
 54 | 
 55 | type CephSegment struct {
 56 | 	h           C.phandle_t
 57 | 	sp          *CephStorageProvider
 58 | 	ptr         uint64
 59 | 	naddr       uint64
 60 | 	base        uint64 //Not the same as the provider's base
 61 | 	warrs       [][]byte
 62 | 	uid         [16]byte
 63 | 	wcache      []byte
 64 | 	wcache_base uint64
 65 | }
 66 | 
 67 | type chunkreqindex struct {
 68 | 	UUID [16]byte
 69 | 	Addr uint64
 70 | }
 71 | 
 72 | type CephStorageProvider struct {
 73 | 	rh           []C.phandle_t
 74 | 	rhidx        chan int
 75 | 	rhidx_ret    chan int
 76 | 	rh_avail     []bool
 77 | 	ptr          uint64
 78 | 	alloc        chan uint64
 79 | 	segaddrcache map[[16]byte]uint64
 80 | 	segcachelock sync.Mutex
 81 | 
 82 | 	chunklock sync.Mutex
 83 | 	chunkgate map[chunkreqindex][]chan []byte
 84 | 
 85 | 	rcache *CephCache
 86 | }
 87 | 
 88 | //Returns the address of the first free word in the segment when it was locked
 89 | func (seg *CephSegment) BaseAddress() uint64 {
 90 | 	return seg.base
 91 | }
 92 | 
 93 | //Unlocks the segment for the StorageProvider to give to other consumers
 94 | //Implies a flush
 95 | func (seg *CephSegment) Unlock() {
 96 | 	seg.flushWrite()
 97 | 	_, err := C.handle_close(seg.h)
 98 | 	if err != nil {
 99 | 		log.Panic("CGO ERROR: %v", err)
100 | 	}
101 | 	seg.warrs = nil
102 | 	if (seg.naddr & OFFSET_MASK) < WORTH_CACHING {
103 | 		seg.sp.segcachelock.Lock()
104 | 		seg.sp.pruneSegCache()
105 | 		seg.sp.segaddrcache[seg.uid] = seg.naddr
106 | 		seg.sp.segcachelock.Unlock()
107 | 	}
108 | 
109 | }
110 | 
111 | func (seg *CephSegment) flushWrite() {
112 | 	if len(seg.wcache) == 0 {
113 | 		return
114 | 	}
115 | 	C.handle_write(seg.h, (*C.uint8_t)(unsafe.Pointer(&seg.uid[0])), C.uint64_t(seg.wcache_base),
116 | 		(*C.char)(unsafe.Pointer(&seg.wcache[0])), C.int(len(seg.wcache)), 0)
117 | 
118 | 	for i := 0; i < len(seg.wcache); i += R_CHUNKSIZE {
119 | 		seg.sp.rcache.cacheInvalidate((uint64(i) + seg.wcache_base) & R_ADDRMASK)
120 | 	}
121 | 	//The C code does not finish immediately, so we need to keep a reference to the old
122 | 	//wcache array until the segment is unlocked
123 | 	seg.warrs = append(seg.warrs, seg.wcache)
124 | 	seg.wcache = make([]byte, 0, WCACHE_SIZE)
125 | 	seg.wcache_base = seg.naddr
126 | 
127 | }
128 | 
129 | //Writes a slice to the segment, returns immediately
130 | //Returns nil if op is OK, otherwise ErrNoSpace or ErrInvalidArgument
131 | //It is up to the implementer to work out how to report no space immediately
132 | //The uint64 is the address to be used for the next write
133 | func (seg *CephSegment) Write(uuid []byte, address uint64, data []byte) (uint64, error) {
134 | 	//We don't put written blocks into the cache, because those will be
135 | 	//in the dblock cache much higher up.
136 | 	if address != seg.naddr {
137 | 		log.Panic("Non-sequential write")
138 | 	}
139 | 
140 | 	if len(seg.wcache)+len(data)+2 > cap(seg.wcache) {
141 | 		seg.flushWrite()
142 | 	}
143 | 
144 | 	base := len(seg.wcache)
145 | 	seg.wcache = seg.wcache[:base+2]
146 | 	seg.wcache[base] = byte(len(data))
147 | 	seg.wcache[base+1] = byte(len(data) >> 8)
148 | 	seg.wcache = append(seg.wcache, data...)
149 | 
150 | 	naddr := address + uint64(len(data)+2)
151 | 
152 | 	//OLD NOTE:
153 | 	//Note that it is ok for an object to "go past the end of the allocation". Naddr could be one byte before
154 | 	//the end of the allocation for example. This is not a problem as we never address anything except the
155 | 	//start of an object. This is why we do not add the object max size here
156 | 	//NEW NOTE:
157 | 	//We cannot go past the end of the allocation anymore because it would break the read cache
158 | 	if ((naddr + MAX_EXPECTED_OBJECT_SIZE + 2) >> 24) != (address >> 24) {
159 | 		//We are gonna need a new object addr
160 | 		naddr = <-seg.sp.alloc
161 | 		seg.naddr = naddr
162 | 		seg.flushWrite()
163 | 		return naddr, nil
164 | 	}
165 | 	seg.naddr = naddr
166 | 
167 | 	return naddr, nil
168 | }
169 | 
170 | //Block until all writes are complete. Note this does not imply a flush of the underlying files.
171 | func (seg *CephSegment) Flush() {
172 | 	//Not sure we need to do stuff here, we can do it in unlock
173 | }
174 | 
175 | //Must be called with the cache lock held
176 | func (sp *CephStorageProvider) pruneSegCache() {
177 | 	//This is extremely rare, so its best to handle it simply
178 | 	//If we drop the cache, we will get one shortsized object per stream,
179 | 	//and it won't necessarily be _very_ short.
180 | 	if len(sp.segaddrcache) >= SEGCACHE_SIZE {
181 | 		sp.segaddrcache = make(map[[16]byte]uint64, SEGCACHE_SIZE)
182 | 	}
183 | }
184 | 
185 | func (sp *CephStorageProvider) provideReadHandles() {
186 | 	for {
187 | 		//Read all returned read handles
188 | 	ldretfi:
189 | 		for {
190 | 			select {
191 | 			case fi := <-sp.rhidx_ret:
192 | 				sp.rh_avail[fi] = true
193 | 			default:
194 | 				break ldretfi
195 | 			}
196 | 		}
197 | 
198 | 		found := false
199 | 		for i := 0; i < NUM_RHANDLES; i++ {
200 | 			if sp.rh_avail[i] {
201 | 				sp.rhidx <- i
202 | 				sp.rh_avail[i] = false
203 | 				found = true
204 | 			}
205 | 		}
206 | 		//If we didn't find one, do a blocking read
207 | 		if !found {
208 | 			idx := <-sp.rhidx_ret
209 | 			sp.rh_avail[idx] = true
210 | 		}
211 | 	}
212 | }
213 | 
214 | func (sp *CephStorageProvider) provideAllocs() {
215 | 	base := sp.ptr
216 | 	for {
217 | 		sp.alloc <- sp.ptr
218 | 		sp.ptr += ADDR_OBJ_SIZE
219 | 		if sp.ptr >= base+ADDR_LOCK_SIZE {
220 | 			sp.ptr = sp.obtainBaseAddress()
221 | 			base = sp.ptr
222 | 		}
223 | 	}
224 | }
225 | 
226 | func (sp *CephStorageProvider) obtainBaseAddress() uint64 {
227 | 	h, err := C.handle_create()
228 | 	if err != nil {
229 | 		log.Panic("CGO ERROR: %v", err)
230 | 	}
231 | 	addr, err := C.handle_obtainrange(h)
232 | 	if err != nil {
233 | 		log.Panic("CGO ERROR: %v", err)
234 | 	}
235 | 	return uint64(addr)
236 | }
237 | 
238 | //Called at startup of a normal run
239 | func (sp *CephStorageProvider) Initialize(opts map[string]string) {
240 | 	//Allocate caches
241 | 	sp.rcache = &CephCache{}
242 | 	cachesz, _ := strconv.Atoi(opts["cephrcache"])
243 | 	if cachesz < 40 {
244 | 		cachesz = 40 //one per read handle: 40MB
245 | 	}
246 | 	sp.rcache.initCache(uint64(cachesz))
247 | 
248 | 	cephconf := C.CString(opts["cephconf"])
249 | 	cephpool := C.CString(opts["cephpool"])
250 | 	_, err := C.initialize_provider(cephconf, cephpool)
251 | 	if err != nil {
252 | 		log.Panic("CGO ERROR: %v", err)
253 | 	}
254 | 	C.free(unsafe.Pointer(cephconf))
255 | 	C.free(unsafe.Pointer(cephpool))
256 | 
257 | 	sp.rh = make([]C.phandle_t, NUM_RHANDLES)
258 | 	sp.rh_avail = make([]bool, NUM_RHANDLES)
259 | 	sp.rhidx = make(chan int, NUM_RHANDLES+1)
260 | 	sp.rhidx_ret = make(chan int, NUM_RHANDLES+1)
261 | 	sp.alloc = make(chan uint64, 128)
262 | 	sp.segaddrcache = make(map[[16]byte]uint64, SEGCACHE_SIZE)
263 | 	sp.chunkgate = make(map[chunkreqindex][]chan []byte)
264 | 
265 | 	for i := 0; i < NUM_RHANDLES; i++ {
266 | 		sp.rh_avail[i] = true
267 | 		h, err := C.handle_create()
268 | 		if err != nil {
269 | 			log.Panic("CGO ERROR: %v", err)
270 | 		}
271 | 		sp.rh[i] = h
272 | 	}
273 | 
274 | 	//Obtain base address
275 | 	sp.ptr = sp.obtainBaseAddress()
276 | 	if sp.ptr == 0 {
277 | 		log.Panic("Could not read allocator! DB not created properly?")
278 | 	}
279 | 	log.Info("Base address obtained as 0x%016x", sp.ptr)
280 | 
281 | 	//Start serving read handles
282 | 	go sp.provideReadHandles()
283 | 
284 | 	//Start providing address allocations
285 | 	go sp.provideAllocs()
286 | 
287 | }
288 | 
289 | //Called to create the database for the first time
290 | func (sp *CephStorageProvider) CreateDatabase(opts map[string]string) error {
291 | 	cephconf := C.CString(opts["cephconf"])
292 | 	cephpool := C.CString(opts["cephpool"])
293 | 	_, err := C.initialize_provider(cephconf, cephpool)
294 | 	if err != nil {
295 | 		log.Panic("CGO ERROR: %v", err)
296 | 	}
297 | 	C.free(unsafe.Pointer(cephconf))
298 | 	C.free(unsafe.Pointer(cephpool))
299 | 	h, err := C.handle_create()
300 | 	if err != nil {
301 | 		log.Panic("CGO ERROR: %v", err)
302 | 	}
303 | 	C.handle_init_allocator(h)
304 | 	_, err = C.handle_close(h)
305 | 	if err != nil {
306 | 		log.Panic("CGO ERROR: %v", err)
307 | 	}
308 | 	return nil
309 | }
310 | 
311 | // Lock a segment, or block until a segment can be locked
312 | // Returns a Segment struct
313 | // Implicit unchecked assumption: you cannot lock more than one segment
314 | // for a given uuid (without unlocking them in between). It will break
315 | // segcache
316 | func (sp *CephStorageProvider) LockSegment(uuid []byte) bprovider.Segment {
317 | 	rv := new(CephSegment)
318 | 	rv.sp = sp
319 | 	h, err := C.handle_create()
320 | 	if err != nil {
321 | 		log.Panic("CGO ERROR: %v", err)
322 | 	}
323 | 	rv.h = h
324 | 	rv.ptr = <-sp.alloc
325 | 	rv.uid = UUIDSliceToArr(uuid)
326 | 	rv.wcache = make([]byte, 0, WCACHE_SIZE)
327 | 	sp.segcachelock.Lock()
328 | 	cached_ptr, ok := sp.segaddrcache[rv.uid]
329 | 	if ok {
330 | 		delete(sp.segaddrcache, rv.uid)
331 | 	}
332 | 	sp.segcachelock.Unlock()
333 | 	//ok = false
334 | 	if ok {
335 | 		rv.base = cached_ptr
336 | 		rv.naddr = rv.base
337 | 	} else {
338 | 		rv.base = rv.ptr
339 | 		rv.naddr = rv.base
340 | 	}
341 | 	rv.wcache_base = rv.naddr
342 | 	//Although I don't know this for sure, I am concerned that when we pass the write array pointer to C
343 | 	//the Go GC may free it before C is done. I prevent this by pinning all the written arrays, which get
344 | 	//deref'd after the segment is unlocked
345 | 	rv.warrs = make([][]byte, 0, 64)
346 | 	return rv
347 | }
348 | 
349 | func (sp *CephStorageProvider) rawObtainChunk(uuid []byte, address uint64) []byte {
350 | 	chunk := sp.rcache.cacheGet(address)
351 | 	if chunk == nil {
352 | 		chunk = sp.rcache.getBlank()
353 | 		rhidx := <-sp.rhidx
354 | 		rc, err := C.handle_read(sp.rh[rhidx], (*C.uint8_t)(unsafe.Pointer(&uuid[0])), C.uint64_t(address), (*C.char)(unsafe.Pointer(&chunk[0])), R_CHUNKSIZE)
355 | 		if err != nil {
356 | 			log.Panic("CGO ERROR: %v", err)
357 | 		}
358 | 		chunk = chunk[0:rc]
359 | 		sp.rhidx_ret <- rhidx
360 | 		sp.rcache.cachePut(address, chunk)
361 | 	}
362 | 	return chunk
363 | }
364 | 
365 | func (sp *CephStorageProvider) obtainChunk(uuid []byte, address uint64) []byte {
366 | 	chunk := sp.rcache.cacheGet(address)
367 | 	if chunk != nil {
368 | 		return chunk
369 | 	}
370 | 	index := chunkreqindex{UUID: UUIDSliceToArr(uuid), Addr: address}
371 | 	rvc := make(chan []byte, 1)
372 | 	sp.chunklock.Lock()
373 | 	slc, ok := sp.chunkgate[index]
374 | 	if ok {
375 | 		sp.chunkgate[index] = append(slc, rvc)
376 | 		sp.chunklock.Unlock()
377 | 	} else {
378 | 		sp.chunkgate[index] = []chan []byte{rvc}
379 | 		sp.chunklock.Unlock()
380 | 		go func() {
381 | 			bslice := sp.rawObtainChunk(uuid, address)
382 | 			sp.chunklock.Lock()
383 | 			slc, ok := sp.chunkgate[index]
384 | 			if !ok {
385 | 				panic("inconsistency!!")
386 | 			}
387 | 			for _, chn := range slc {
388 | 				chn <- bslice
389 | 			}
390 | 			delete(sp.chunkgate, index)
391 | 			sp.chunklock.Unlock()
392 | 		}()
393 | 	}
394 | 	rv := <-rvc
395 | 	return rv
396 | }
397 | 
398 | // Read the blob into the given buffer: direct read
399 | /*
400 | func (sp *CephStorageProvider) Read(uuid []byte, address uint64, buffer []byte) []byte {
401 | 
402 | 	//Get a read handle
403 | 	rhidx := <-sp.rhidx
404 | 	if len(buffer) < MAX_EXPECTED_OBJECT_SIZE {
405 | 		log.Panic("That doesn't seem safe")
406 | 	}
407 | 	rc, err := C.handle_read(sp.rh[rhidx], (*C.uint8_t)(unsafe.Pointer(&uuid[0])), C.uint64_t(address), (*C.char)(unsafe.Pointer(&buffer[0])), MAX_EXPECTED_OBJECT_SIZE)
408 | 	if err != nil {
409 | 		log.Panic("CGO ERROR: %v", err)
410 | 	}
411 | 	sp.rhidx_ret <- rhidx
412 | 	ln := int(buffer[0]) + (int(buffer[1]) << 8)
413 | 	if int(rc) < ln+2 {
414 | 		//TODO this can happen, it is better to just go back a few superblocks
415 | 		log.Panic("Short read")
416 | 	}
417 | 	return buffer[2 : ln+2]
418 | }*/
419 | 
420 | // Read the blob into the given buffer
421 | func (sp *CephStorageProvider) Read(uuid []byte, address uint64, buffer []byte) []byte {
422 | 	//Get the first chunk for this object:
423 | 	chunk1 := sp.obtainChunk(uuid, address&R_ADDRMASK)[address&R_OFFSETMASK:]
424 | 	var chunk2 []byte
425 | 	var ln int
426 | 
427 | 	if len(chunk1) < 2 {
428 | 		//not even long enough for the prefix, must be one byte in the first chunk, one in teh second
429 | 		chunk2 = sp.obtainChunk(uuid, (address+R_CHUNKSIZE)&R_ADDRMASK)
430 | 		ln = int(chunk1[0]) + (int(chunk2[0]) << 8)
431 | 		chunk2 = chunk2[1:]
432 | 		chunk1 = chunk1[1:]
433 | 	} else {
434 | 		ln = int(chunk1[0]) + (int(chunk1[1]) << 8)
435 | 		chunk1 = chunk1[2:]
436 | 	}
437 | 
438 | 	if (ln) > MAX_EXPECTED_OBJECT_SIZE {
439 | 		log.Panic("WTUF: ", ln)
440 | 	}
441 | 
442 | 	copied := 0
443 | 	if len(chunk1) > 0 {
444 | 		//We need some bytes from chunk1
445 | 		end := ln
446 | 		if len(chunk1) < ln {
447 | 			end = len(chunk1)
448 | 		}
449 | 		copied = copy(buffer, chunk1[:end])
450 | 	}
451 | 	if copied < ln {
452 | 		//We need some bytes from chunk2
453 | 		if chunk2 == nil {
454 | 			chunk2 = sp.obtainChunk(uuid, (address+R_CHUNKSIZE)&R_ADDRMASK)
455 | 		}
456 | 		copy(buffer[copied:], chunk2[:ln-copied])
457 | 
458 | 	}
459 | 	if ln < 2 {
460 | 		log.Panic("This is unexpected")
461 | 	}
462 | 	return buffer[:ln]
463 | 
464 | }
465 | 


--------------------------------------------------------------------------------
/internal/bstore/blocktypes.go:
--------------------------------------------------------------------------------
  1 | package bstore
  2 | 
  3 | import (
  4 | 	"math"
  5 | 
  6 | 	"github.com/pborman/uuid"
  7 | )
  8 | 
  9 | type Superblock struct {
 10 | 	uuid     uuid.UUID
 11 | 	gen      uint64
 12 | 	root     uint64
 13 | 	unlinked bool
 14 | }
 15 | 
 16 | func (s *Superblock) Gen() uint64 {
 17 | 	return s.gen
 18 | }
 19 | 
 20 | func (s *Superblock) Root() uint64 {
 21 | 	return s.root
 22 | }
 23 | 
 24 | func (s *Superblock) Uuid() uuid.UUID {
 25 | 	return s.uuid
 26 | }
 27 | 
 28 | func (s *Superblock) Unlinked() bool {
 29 | 	return s.unlinked
 30 | }
 31 | 
 32 | func NewSuperblock(id uuid.UUID) *Superblock {
 33 | 	return &Superblock{
 34 | 		uuid: id,
 35 | 		gen:  1,
 36 | 		root: 0,
 37 | 	}
 38 | }
 39 | 
 40 | func (s *Superblock) Clone() *Superblock {
 41 | 	return &Superblock{
 42 | 		uuid: s.uuid,
 43 | 		gen:  s.gen,
 44 | 		root: s.root,
 45 | 	}
 46 | }
 47 | 
 48 | type BlockType uint64
 49 | 
 50 | const (
 51 | 	Vector BlockType = 1
 52 | 	Core   BlockType = 2
 53 | 	Bad    BlockType = 255
 54 | )
 55 | 
 56 | const FlagsMask uint8 = 3
 57 | 
 58 | type Datablock interface {
 59 | 	GetDatablockType() BlockType
 60 | }
 61 | 
 62 | // The leaf datablock type. The tags allow unit tests
 63 | // to work out if clone / serdes are working properly
 64 | // metadata is not copied when a node is cloned
 65 | // implicit is not serialised
 66 | type Vectorblock struct {
 67 | 
 68 | 	//Metadata, not copied on clone
 69 | 	Identifier uint64 "metadata,implicit"
 70 | 	Generation uint64 "metadata,implicit"
 71 | 
 72 | 	//Payload, copied on clone
 73 | 	Len        uint16
 74 | 	PointWidth uint8 "implicit"
 75 | 	StartTime  int64 "implicit"
 76 | 	Time       [VSIZE]int64
 77 | 	Value      [VSIZE]float64
 78 | }
 79 | 
 80 | type Coreblock struct {
 81 | 
 82 | 	//Metadata, not copied
 83 | 	Identifier uint64 "metadata,implicit"
 84 | 	Generation uint64 "metadata,implicit"
 85 | 
 86 | 	//Payload, copied
 87 | 	PointWidth  uint8 "implicit"
 88 | 	StartTime   int64 "implicit"
 89 | 	Addr        [KFACTOR]uint64
 90 | 	Count       [KFACTOR]uint64
 91 | 	Min         [KFACTOR]float64
 92 | 	Mean        [KFACTOR]float64
 93 | 	Max         [KFACTOR]float64
 94 | 	CGeneration [KFACTOR]uint64
 95 | }
 96 | 
 97 | func (*Vectorblock) GetDatablockType() BlockType {
 98 | 	return Vector
 99 | }
100 | 
101 | func (*Coreblock) GetDatablockType() BlockType {
102 | 	return Core
103 | }
104 | 
105 | //Copy a core block, only copying the payload, not the metadata
106 | func (src *Coreblock) CopyInto(dst *Coreblock) {
107 | 	dst.PointWidth = src.PointWidth
108 | 	dst.StartTime = src.StartTime
109 | 	dst.Addr = src.Addr
110 | 	//dst.Time = src.Time
111 | 	dst.Count = src.Count
112 | 	dst.Min = src.Min
113 | 	dst.Mean = src.Mean
114 | 	dst.Max = src.Max
115 | 	dst.CGeneration = src.CGeneration
116 | }
117 | 
118 | func (src *Vectorblock) CopyInto(dst *Vectorblock) {
119 | 	dst.PointWidth = src.PointWidth
120 | 	dst.StartTime = src.StartTime
121 | 	dst.Len = src.Len
122 | 	dst.Time = src.Time
123 | 	dst.Value = src.Value
124 | }
125 | 
126 | func DatablockGetBufferType(buf []byte) BlockType {
127 | 	switch BlockType(buf[0]) {
128 | 	case Vector:
129 | 		return Vector
130 | 	case Core:
131 | 		return Core
132 | 	}
133 | 	return Bad
134 | }
135 | 
136 | // The current algorithm is as follows:
137 | // entry 0: absolute time and value
138 | // entry 1: delta time and value since 0
139 | // entry 2: delta since delta 1
140 | // entry 3: delta from average delta (1+2)
141 | // enrty 4+ delta from average delta (n-1, n-2, n-3)
142 | 
143 | func (v *Vectorblock) Serialize(dst []byte) []byte {
144 | 	idx := 3
145 | 	dst[0] = byte(Vector)
146 | 	dst[1] = byte(v.Len)
147 | 	dst[2] = byte(v.Len >> 8)
148 | 
149 | 	if v.Len == 0 {
150 | 		return dst[:idx]
151 | 	}
152 | 	//First values are written in full
153 | 	e, m := decompose(v.Value[0])
154 | 	idx += writeUnsignedHuff(dst[idx:], m)
155 | 	idx += writeUnsignedHuff(dst[idx:], uint64(e))
156 | 
157 | 	//So we are taking a gamble here: I think I will never have negative times. If I do,
158 | 	//this will use 9 bytes for every time. But I won't.
159 | 	t := v.Time[0]
160 | 	idx += writeUnsignedHuff(dst[idx:], uint64(t))
161 | 	if v.Len == 1 {
162 | 		return dst[:idx]
163 | 	}
164 | 
165 | 	const delta_depth = 3
166 | 	hist_deltas_t := make([]int64, delta_depth)
167 | 	hist_deltas_e := make([]int64, delta_depth)
168 | 	hist_deltas_m := make([]int64, delta_depth)
169 | 	delta_idx := 0
170 | 	num_deltas := 0
171 | 
172 | 	em1 := int64(e)
173 | 	mm1 := int64(m)
174 | 	tm1 := t
175 | 	for i := 1; i < int(v.Len); i++ {
176 | 		var deltas int
177 | 		if num_deltas > delta_depth {
178 | 			deltas = delta_depth
179 | 		} else {
180 | 			deltas = num_deltas
181 | 		}
182 | 		var e, m int64
183 | 		tmpe, tmpm := decompose(v.Value[i])
184 | 		e = int64(tmpe)
185 | 		m = int64(tmpm)
186 | 		t := v.Time[i]
187 | 
188 | 		//Calculate the delta for this record
189 | 		dt := t - tm1
190 | 		de := e - em1
191 | 		dm := m - mm1
192 | 
193 | 		//Calculate average deltas
194 | 		var dt_total int64 = 0
195 | 		var dm_total int64 = 0
196 | 		var de_total int64 = 0
197 | 		for d := 0; d < deltas; d++ {
198 | 			dt_total += hist_deltas_t[d]
199 | 			dm_total += hist_deltas_m[d]
200 | 			de_total += hist_deltas_e[d]
201 | 		}
202 | 		var adt, ade, adm int64 = 0, 0, 0
203 | 		if deltas != 0 {
204 | 			adt = dt_total / int64(deltas)
205 | 			ade = de_total / int64(deltas)
206 | 			adm = dm_total / int64(deltas)
207 | 		}
208 | 		//Calculate the delta delta
209 | 		ddt := dt - adt
210 | 		dde := de - ade
211 | 		ddm := dm - adm
212 | 
213 | 		//Add in the delta for this record
214 | 		hist_deltas_t[delta_idx] = dt
215 | 		hist_deltas_e[delta_idx] = de
216 | 		hist_deltas_m[delta_idx] = dm
217 | 		delta_idx++
218 | 		if delta_idx == delta_depth {
219 | 			delta_idx = 0
220 | 		}
221 | 		num_deltas++
222 | 
223 | 		//Encode dde nz and ddt nz into ddm
224 | 		ddm <<= 2
225 | 		if dde != 0 {
226 | 			ddm |= 2
227 | 		}
228 | 		if ddt != 0 {
229 | 			ddm |= 1
230 | 		}
231 | 
232 | 		//Write it out
233 | 		idx += writeSignedHuff(dst[idx:], ddm)
234 | 		if dde != 0 {
235 | 			idx += writeSignedHuff(dst[idx:], dde)
236 | 		}
237 | 		if ddt != 0 {
238 | 			idx += writeSignedHuff(dst[idx:], ddt)
239 | 		}
240 | 
241 | 		em1 = e
242 | 		tm1 = t
243 | 		mm1 = m
244 | 	}
245 | 	return dst[:idx]
246 | }
247 | 
248 | func (v *Vectorblock) Deserialize(src []byte) {
249 | 	blocktype := src[0]
250 | 	if BlockType(blocktype) != Vector {
251 | 		lg.Panicf("This is not a vector block")
252 | 	}
253 | 
254 | 	v.Len = uint16(src[1]) + (uint16(src[2]) << 8)
255 | 	length := int(v.Len)
256 | 	idx := 3
257 | 
258 | 	m, l, _ := readUnsignedHuff(src[idx:])
259 | 	idx += l
260 | 	e, l, _ := readUnsignedHuff(src[idx:])
261 | 	idx += l
262 | 	t, l, _ := readUnsignedHuff(src[idx:])
263 | 	idx += l
264 | 	v.Time[0] = int64(t)
265 | 	v.Value[0] = recompose(uint16(e), uint64(m))
266 | 
267 | 	//Keep delta history
268 | 	const delta_depth = 3
269 | 	hist_deltas_t := make([]int64, delta_depth)
270 | 	hist_deltas_e := make([]int64, delta_depth)
271 | 	hist_deltas_m := make([]int64, delta_depth)
272 | 	delta_idx := 0
273 | 	num_deltas := 0
274 | 
275 | 	mm1 := int64(m)
276 | 	em1 := int64(e)
277 | 	tm1 := int64(t)
278 | 	for i := 1; i < length; i++ {
279 | 		//How many deltas do we have
280 | 		var deltas int
281 | 		if num_deltas > delta_depth {
282 | 			deltas = delta_depth
283 | 		} else {
284 | 			deltas = num_deltas
285 | 		}
286 | 
287 | 		//Calculate average deltas
288 | 		var dt_total int64 = 0
289 | 		var dm_total int64 = 0
290 | 		var de_total int64 = 0
291 | 		for d := 0; d < deltas; d++ {
292 | 			dt_total += hist_deltas_t[d]
293 | 			dm_total += hist_deltas_m[d]
294 | 			de_total += hist_deltas_e[d]
295 | 		}
296 | 		var adt, ade, adm int64 = 0, 0, 0
297 | 		if deltas != 0 {
298 | 			adt = dt_total / int64(deltas)
299 | 			ade = de_total / int64(deltas)
300 | 			adm = dm_total / int64(deltas)
301 | 		}
302 | 		//Read the dd's
303 | 		ddm, l, _ := readSignedHuff(src[idx:])
304 | 		idx += l
305 | 		var dde, ddt int64 = 0, 0
306 | 		if ddm&2 != 0 {
307 | 			//log.Warning("re")
308 | 			dde, l, _ = readSignedHuff(src[idx:])
309 | 			idx += l
310 | 		}
311 | 		if ddm&1 != 0 {
312 | 			//log.Warning("rt")
313 | 			ddt, l, _ = readSignedHuff(src[idx:])
314 | 			idx += l
315 | 		}
316 | 		ddm >>= 2
317 | 		//Convert dd's to d's
318 | 		dm := ddm + adm
319 | 		dt := ddt + adt
320 | 		de := dde + ade
321 | 
322 | 		//Save the deltas in the history
323 | 		hist_deltas_t[delta_idx] = dt
324 | 		hist_deltas_m[delta_idx] = dm
325 | 		hist_deltas_e[delta_idx] = de
326 | 		delta_idx++
327 | 		if delta_idx == delta_depth {
328 | 			delta_idx = 0
329 | 		}
330 | 		num_deltas++
331 | 
332 | 		//Save values
333 | 		e := em1 + de
334 | 		m := mm1 + dm
335 | 		v.Time[i] = tm1 + dt
336 | 		v.Value[i] = recompose(uint16(e), uint64(m))
337 | 		em1 += de
338 | 		mm1 += dm
339 | 		tm1 += dt
340 | 	}
341 | }
342 | 
343 | func (c *Coreblock) Serialize(dst []byte) []byte {
344 | 	/*
345 | 		Addr       delta-delta / abszero
346 | 		Count      delta +isnz(cgen)
347 | 		CGeneration delta-delta
348 | 		Mean       delta-delta (mantissa contains isnz(e))
349 | 		Min        delta-delta (mantissa contains isnz(e))
350 | 		Max        delta-delta (mantissa contains isnz(e))
351 | 
352 | 		TL;DR the code is the documentation MWAHAHAHA
353 | 	*/
354 | 
355 | 	idx := 1
356 | 	dst[0] = byte(Core)
357 | 
358 | 	const delta_depth = 3
359 | 
360 | 	deltadeltarizer := func(maxdepth int) func(value int64) int64 {
361 | 		hist_delta := make([]int64, maxdepth)
362 | 		var depth int = 0
363 | 		insidx := 0
364 | 		var last_value int64
365 | 		dd := func(value int64) int64 {
366 | 			var total_dt int64 = 0
367 | 			for i := 0; i < depth; i++ {
368 | 				total_dt += hist_delta[i]
369 | 			}
370 | 			var avg_dt int64 = 0
371 | 			if depth > 0 {
372 | 				avg_dt = total_dt / int64(depth)
373 | 			}
374 | 			curdelta := value - last_value
375 | 			last_value = value
376 | 			ddelta := curdelta - avg_dt
377 | 			hist_delta[insidx] = curdelta
378 | 			insidx = (insidx + 1) % maxdepth
379 | 			depth += 1
380 | 			if depth > maxdepth {
381 | 				depth = maxdepth
382 | 			}
383 | 			return ddelta
384 | 		}
385 | 		return dd
386 | 	}
387 | 	dd_addr := deltadeltarizer(delta_depth)
388 | 	dd_cgen := deltadeltarizer(delta_depth)
389 | 	dd_count := deltadeltarizer(delta_depth)
390 | 	dd_mean_m := deltadeltarizer(delta_depth)
391 | 	dd_mean_e := deltadeltarizer(delta_depth)
392 | 	dd_min_m := deltadeltarizer(delta_depth)
393 | 	dd_min_e := deltadeltarizer(delta_depth)
394 | 	dd_max_m := deltadeltarizer(delta_depth)
395 | 	dd_max_e := deltadeltarizer(delta_depth)
396 | 
397 | 	//Look for bottomable idx
398 | 	bottomidx := -1
399 | 	for i := KFACTOR - 1; i >= 0; i-- {
400 | 		if c.Addr[i] == 0 && c.CGeneration[i] == 0 {
401 | 			bottomidx = i
402 | 		} else {
403 | 			break
404 | 		}
405 | 	}
406 | 	for i := 0; i < KFACTOR; i++ {
407 | 		if i == bottomidx {
408 | 			idx += writeFullZero(dst[idx:])
409 | 			break
410 | 		}
411 | 		if c.Addr[i] == 0 {
412 | 			idx += writeAbsZero(dst[idx:])
413 | 			idx += writeSignedHuff(dst[idx:], dd_cgen(int64(c.CGeneration[i])))
414 | 		} else {
415 | 			idx += writeSignedHuff(dst[idx:], dd_addr(int64(c.Addr[i])))
416 | 
417 | 			min_e, min_m := decompose(c.Min[i])
418 | 			min_m_dd := dd_min_m(int64(min_m))
419 | 			min_e_dd := dd_min_e(int64(min_e))
420 | 			min_m_dd <<= 1
421 | 			if min_e_dd != 0 {
422 | 				min_m_dd |= 1
423 | 			}
424 | 
425 | 			mean_e, mean_m := decompose(c.Mean[i])
426 | 			mean_m_dd := dd_mean_m(int64(mean_m))
427 | 			mean_e_dd := dd_mean_e(int64(mean_e))
428 | 			mean_m_dd <<= 1
429 | 			if mean_e_dd != 0 {
430 | 				mean_m_dd |= 1
431 | 			}
432 | 
433 | 			max_e, max_m := decompose(c.Max[i])
434 | 			max_m_dd := dd_max_m(int64(max_m))
435 | 			max_e_dd := dd_max_e(int64(max_e))
436 | 			max_m_dd <<= 1
437 | 			if max_e_dd != 0 {
438 | 				max_m_dd |= 1
439 | 			}
440 | 
441 | 			cgen_dd := dd_cgen(int64(c.CGeneration[i]))
442 | 
443 | 			cnt := dd_count(int64(c.Count[i]))
444 | 			cnt <<= 1
445 | 			if cgen_dd != 0 {
446 | 				cnt |= 1
447 | 			}
448 | 			idx += writeSignedHuff(dst[idx:], cnt)
449 | 			if cgen_dd != 0 {
450 | 				idx += writeSignedHuff(dst[idx:], cgen_dd)
451 | 			}
452 | 			idx += writeSignedHuff(dst[idx:], min_m_dd)
453 | 			if min_e_dd != 0 {
454 | 				idx += writeSignedHuff(dst[idx:], min_e_dd)
455 | 			}
456 | 			idx += writeSignedHuff(dst[idx:], mean_m_dd)
457 | 			if mean_e_dd != 0 {
458 | 				idx += writeSignedHuff(dst[idx:], mean_e_dd)
459 | 			}
460 | 			idx += writeSignedHuff(dst[idx:], max_m_dd)
461 | 			if max_e_dd != 0 {
462 | 				idx += writeSignedHuff(dst[idx:], max_e_dd)
463 | 			}
464 | 		}
465 | 		//log.Warning("Finished SER %v, idx is %v", i, idx)
466 | 	}
467 | 	return dst[:idx]
468 | }
469 | 
470 | func (c *Coreblock) Deserialize(src []byte) {
471 | 	//check 0 for id
472 | 	if src[0] != byte(Core) {
473 | 		lg.Panic("This is not a core block")
474 | 	}
475 | 	idx := 1
476 | 	dedeltadeltarizer := func(maxdepth int) func(dd int64) int64 {
477 | 		hist_delta := make([]int64, maxdepth)
478 | 		depth := 0
479 | 		insidx := 0
480 | 		var last_value int64 = 0
481 | 		decode := func(dd int64) int64 {
482 | 			var total_dt int64 = 0
483 | 			for i := 0; i < depth; i++ {
484 | 				total_dt += hist_delta[i]
485 | 			}
486 | 			var avg_dt int64 = 0
487 | 			if depth > 0 {
488 | 				avg_dt = total_dt / int64(depth)
489 | 			}
490 | 			curdelta := avg_dt + dd
491 | 			curvalue := last_value + curdelta
492 | 			last_value = curvalue
493 | 			hist_delta[insidx] = curdelta
494 | 			insidx = (insidx + 1) % maxdepth
495 | 			depth += 1
496 | 			if depth > maxdepth {
497 | 				depth = maxdepth
498 | 			}
499 | 			return last_value
500 | 		}
501 | 		return decode
502 | 	}
503 | 
504 | 	const delta_depth = 3
505 | 	dd_addr := dedeltadeltarizer(delta_depth)
506 | 	dd_cgen := dedeltadeltarizer(delta_depth)
507 | 	dd_count := dedeltadeltarizer(delta_depth)
508 | 	dd_mean_m := dedeltadeltarizer(delta_depth)
509 | 	dd_mean_e := dedeltadeltarizer(delta_depth)
510 | 	dd_min_m := dedeltadeltarizer(delta_depth)
511 | 	dd_min_e := dedeltadeltarizer(delta_depth)
512 | 	dd_max_m := dedeltadeltarizer(delta_depth)
513 | 	dd_max_e := dedeltadeltarizer(delta_depth)
514 | 
515 | 	i := 0
516 | 	for ; i < KFACTOR; i++ {
517 | 
518 | 		//Get addr
519 | 		addr_dd, used, bottom := readSignedHuff(src[idx:])
520 | 		idx += used
521 | 		if bottom == ABSZERO {
522 | 			c.Addr[i] = 0
523 | 			c.Count[i] = 0
524 | 			//min/mean/max are undefined
525 | 			//Still have to decode cgen
526 | 			cgen_dd, used, _ := readSignedHuff(src[idx:])
527 | 			idx += used
528 | 			cgen := uint64(dd_cgen(cgen_dd))
529 | 			c.CGeneration[i] = cgen
530 | 		} else if bottom == FULLZERO {
531 | 			break
532 | 		} else {
533 | 			//Real value
534 | 			c.Addr[i] = uint64(dd_addr(addr_dd))
535 | 
536 | 			cnt_dd, used, _ := readSignedHuff(src[idx:])
537 | 			idx += used
538 | 
539 | 			var cgen_dd int64 = 0
540 | 			if cnt_dd&1 != 0 {
541 | 				cgen_dd, used, _ = readSignedHuff(src[idx:])
542 | 				idx += used
543 | 			}
544 | 			cnt_dd >>= 1
545 | 			c.CGeneration[i] = uint64(dd_cgen(cgen_dd))
546 | 			c.Count[i] = uint64(dd_count(cnt_dd))
547 | 
548 | 			min_m_dd, used, _ := readSignedHuff(src[idx:])
549 | 			idx += used
550 | 			var min_e_dd int64
551 | 			if min_m_dd&1 != 0 {
552 | 				min_e_dd, used, _ = readSignedHuff(src[idx:])
553 | 				idx += used
554 | 			} else {
555 | 				min_e_dd = 0
556 | 			}
557 | 			min_m_dd >>= 1
558 | 			c.Min[i] = recompose(uint16(dd_min_e(min_e_dd)), uint64(dd_min_m(min_m_dd)))
559 | 
560 | 			mean_m_dd, used, _ := readSignedHuff(src[idx:])
561 | 			idx += used
562 | 			var mean_e_dd int64
563 | 			if mean_m_dd&1 != 0 {
564 | 				mean_e_dd, used, _ = readSignedHuff(src[idx:])
565 | 				idx += used
566 | 			} else {
567 | 				mean_e_dd = 0
568 | 			}
569 | 			mean_m_dd >>= 1
570 | 			c.Mean[i] = recompose(uint16(dd_mean_e(mean_e_dd)), uint64(dd_mean_m(mean_m_dd)))
571 | 
572 | 			max_m_dd, used, _ := readSignedHuff(src[idx:])
573 | 			idx += used
574 | 			var max_e_dd int64
575 | 			if max_m_dd&1 != 0 {
576 | 				max_e_dd, used, _ = readSignedHuff(src[idx:])
577 | 				idx += used
578 | 			} else {
579 | 				max_e_dd = 0
580 | 			}
581 | 			max_m_dd >>= 1
582 | 			c.Max[i] = recompose(uint16(dd_max_e(max_e_dd)), uint64(dd_max_m(max_m_dd)))
583 | 		}
584 | 		//log.Warning("Finishing deser idx %v, idx is %v",i, idx)
585 | 	}
586 | 
587 | 	//Clear out from a FULLZERO
588 | 	for ; i < KFACTOR; i++ {
589 | 		c.Addr[i] = 0
590 | 		c.Count[i] = 0
591 | 		c.CGeneration[i] = 0
592 | 
593 | 	}
594 | }
595 | 
596 | //These functions allow us to read/write the packed numbers in the datablocks
597 | //These are huffman encoded in big endian
598 | // 0xxx xxxx           7  0x00
599 | // 10xx xxxx +1        14 0x80
600 | // 1100 xxxx +2        20 0xC0
601 | // 1101 xxxx +3        28 0xD0
602 | // 1110 xxxx +4        36 0xE0
603 | // 1111 00xx +5        42 0xF0
604 | // 1111 01xx +6        50 0xF4
605 | // 1111 10xx +7        58 0xF8
606 | // 1111 1100 +8        64 0xFC
607 | // 1111 1101 +0        ABSZERO (special symbol) 0xFD
608 | // 1111 1110 +0        FULLZERO (special symbol) 0xFE
609 | const VALUE = 0
610 | const ABSZERO = 1
611 | const FULLZERO = 2
612 | 
613 | func writeUnsignedHuff(dst []byte, val uint64) int {
614 | 	//log.Warning("wuh called dstlen %v",len(dst))
615 | 	i := 0
616 | 	var do_rest func(n uint8)
617 | 	do_rest = func(n uint8) {
618 | 		if n == 0 {
619 | 			return
620 | 		}
621 | 		dst[i] = byte((val >> ((n - 1) * 8)) & 0xFF)
622 | 		i++
623 | 		do_rest(n - 1)
624 | 	}
625 | 	if val < (1 << 7) {
626 | 		dst[i] = byte(val)
627 | 		i++
628 | 	} else if val < (1 << 14) {
629 | 		dst[i] = byte(0x80 | val>>8)
630 | 		i++
631 | 		do_rest(1)
632 | 	} else if val < (1 << 20) {
633 | 		dst[i] = byte(0xC0 | val>>16)
634 | 		i++
635 | 		do_rest(2)
636 | 	} else if val < (1 << 28) {
637 | 		dst[i] = byte(0xD0 | val>>24)
638 | 		i++
639 | 		do_rest(3)
640 | 	} else if val < (1 << 36) {
641 | 		dst[i] = byte(0xE0 | val>>32)
642 | 		i++
643 | 		do_rest(4)
644 | 	} else if val < (1 << 42) {
645 | 		dst[i] = byte(0xF0 | val>>40)
646 | 		i++
647 | 		do_rest(5)
648 | 	} else if val < (1 << 50) {
649 | 		dst[i] = byte(0xF4 | val>>48)
650 | 		i++
651 | 		do_rest(6)
652 | 	} else if val < (1 << 58) {
653 | 		dst[i] = byte(0xF8 | val>>56)
654 | 		i++
655 | 		do_rest(7)
656 | 	} else {
657 | 		dst[i] = 0xFC
658 | 		i++
659 | 		do_rest(8)
660 | 	}
661 | 	return i
662 | }
663 | func writeAbsZero(dst []byte) int {
664 | 	dst[0] = 0xFD
665 | 	return 1
666 | }
667 | func writeFullZero(dst []byte) int {
668 | 	dst[0] = 0xFE
669 | 	return 1
670 | }
671 | func writeSignedHuff(dst []byte, val int64) int {
672 | 	if val < 0 {
673 | 		return writeUnsignedHuff(dst, (uint64(-val)<<1 | 1))
674 | 	} else {
675 | 		return writeUnsignedHuff(dst, uint64(val)<<1)
676 | 	}
677 | }
678 | func readUnsignedHuff(src []byte) (uint64, int, int) {
679 | 	var rv uint64
680 | 	i := 1
681 | 	var do_rest func(n uint8)
682 | 	do_rest = func(n uint8) {
683 | 		if n == 0 {
684 | 			return
685 | 		}
686 | 		rv <<= 8
687 | 		rv |= uint64(src[i])
688 | 		i++
689 | 		do_rest(n - 1)
690 | 	}
691 | 	if src[0] > 0xFE {
692 | 		lg.Panicf("This huffman symbol is reserved: +v", src[0])
693 | 	} else if src[0] == 0xFD {
694 | 		return 0, 1, ABSZERO
695 | 	} else if src[0] == 0xFE {
696 | 		return 0, 1, FULLZERO
697 | 	} else if src[0] == 0xFC {
698 | 		do_rest(8)
699 | 	} else if src[0] >= 0xF8 {
700 | 		rv = uint64(src[0] & 0x03)
701 | 		do_rest(7)
702 | 	} else if src[0] >= 0xF4 {
703 | 		rv = uint64(src[0] & 0x03)
704 | 		do_rest(6)
705 | 	} else if src[0] >= 0xF0 {
706 | 		rv = uint64(src[0] & 0x03)
707 | 		do_rest(5)
708 | 	} else if src[0] >= 0xE0 {
709 | 		rv = uint64(src[0] & 0x0F)
710 | 		do_rest(4)
711 | 	} else if src[0] >= 0xD0 {
712 | 		rv = uint64(src[0] & 0x0F)
713 | 		do_rest(3)
714 | 	} else if src[0] >= 0xC0 {
715 | 		rv = uint64(src[0] & 0x0F)
716 | 		do_rest(2)
717 | 	} else if src[0] >= 0x80 {
718 | 		rv = uint64(src[0] & 0x3F)
719 | 		do_rest(1)
720 | 	} else {
721 | 		rv = uint64(src[0] & 0x7F)
722 | 	}
723 | 	return rv, i, VALUE
724 | }
725 | func readSignedHuff(src []byte) (int64, int, int) {
726 | 	v, l, bv := readUnsignedHuff(src)
727 | 	if bv != VALUE {
728 | 		return 0, 1, bv
729 | 	}
730 | 	s := v & 1
731 | 	v >>= 1
732 | 	if s == 1 {
733 | 		return -int64(v), l, VALUE
734 | 	}
735 | 	return int64(v), l, VALUE
736 | }
737 | 
738 | //This composes a float into a weird representation that was empirically determined to be
739 | //ideal for compression of Quasar streams.
740 | //First we split out the sign, exponent and mantissa from the float
741 | //Then we reverse the bytes in the mantissa (bits are better but slower)
742 | //Then we left shift it and stick the sign bit as the LSB
743 | //The result is the (unsigned) exponent and the mantissa-sortof-thingy
744 | func decompose(val float64) (e uint16, m uint64) {
745 | 	iv := math.Float64bits(val)
746 | 	s := iv >> 63
747 | 	exp := (iv >> 52) & 2047
748 | 	iv = iv & ((1 << 52) - 1)
749 | 	//Take the bottom 7 bytes and reverse them. Top byte is left zero
750 | 	//                 . . . . . .
751 | 	m = ((iv&0x00000000000000FF)<<(6*8) |
752 | 		(iv&0x000000000000FF00)<<(4*8) |
753 | 		(iv&0x0000000000FF0000)<<(2*8) |
754 | 		(iv & 0x00000000FF000000) |
755 | 		(iv&0x000000FF00000000)>>(2*8) |
756 | 		(iv&0x0000FF0000000000)>>(4*8) |
757 | 		(iv&0x00FF000000000000)>>(6*8))
758 | 	e = (uint16(exp) << 1) | uint16(s)
759 | 	return
760 | }
761 | 
762 | func recompose(e uint16, m uint64) float64 {
763 | 	s := e & 1
764 | 	e >>= 1
765 | 	iv := ((m&0x00000000000000FF)<<(6*8) |
766 | 		(m&0x000000000000FF00)<<(4*8) |
767 | 		(m&0x0000000000FF0000)<<(2*8) |
768 | 		(m & 0x00000000FF000000) |
769 | 		(m&0x000000FF00000000)>>(2*8) |
770 | 		(m&0x0000FF0000000000)>>(4*8) |
771 | 		(m&0x00FF000000000000)>>(6*8))
772 | 	iv |= uint64(e) << 52
773 | 	iv |= uint64(s) << 63
774 | 	return math.Float64frombits(iv)
775 | }
776 | 


--------------------------------------------------------------------------------