├── .dockerignore
├── .gitignore
├── CMakeLists.txt
├── Dockerfile
├── LICENSE
├── cmake
├── GreylockConfig.cmake.in
└── locate_library.cmake
├── conf
└── greylock.conf
├── debian
├── changelog
├── compat
├── control
├── copyright
├── dirs
├── docs
├── greylock-dev.install
├── greylock.install
└── rules
├── greylock-bf.spec
├── include
└── greylock
│ ├── database.hpp
│ ├── error.hpp
│ ├── id.hpp
│ ├── intersection.hpp
│ ├── iterator.hpp
│ ├── json.hpp
│ ├── jsonvalue.hpp
│ ├── types.hpp
│ └── utils.hpp
└── src
├── CMakeLists.txt
├── check.cpp
├── compact.cpp
├── exception.cpp
├── list.cpp
├── merge.cpp
├── meta.cpp
└── server.cpp
/.dockerignore:
--------------------------------------------------------------------------------
1 | build
2 | tags
3 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.a
2 | *.o
3 | *.py[co]
4 | *.so
5 | *.so.*
6 | *.tar.gz
7 | .*.sw*
8 | *~
9 | CMakeCache.txt
10 | CMakeFiles
11 | build
12 | cmake_install.cmake
13 | install_manifest.txt
14 | tags
15 |
--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required (VERSION 2.6)
2 | project (greylock)
3 |
4 | FILE (READ "${CMAKE_CURRENT_SOURCE_DIR}/debian/changelog" DEBCHANGELOG)
5 |
6 | string(REGEX MATCH "([0-9]+\\.[0-9]+\\.[0-9]+)" DEBFULLVERSION "${DEBCHANGELOG}")
7 | STRING (REGEX MATCH "([0-9]+\\.[0-9]+)" GREYLOCK_MAJOR_VERSION "${DEBFULLVERSION}")
8 | SET(GREYLOCK_FULL_VERSION ${DEBFULLVERSION})
9 |
10 | set(CMAKE_CXX_FLAGS "-g -std=c++0x -W -Wall -Wextra -fstack-protector-all")
11 |
12 | set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/")
13 |
14 | find_package(Boost REQUIRED COMPONENTS system program_options filesystem thread)
15 | find_package(Ribosome REQUIRED)
16 |
17 | INCLUDE(cmake/locate_library.cmake)
18 |
19 | LOCATE_LIBRARY(JEMALLOC "jemalloc/jemalloc.h" "jemalloc")
20 | LOCATE_LIBRARY(MSGPACK "msgpack.hpp" "msgpack")
21 | LOCATE_LIBRARY(THEVOID "thevoid/server.hpp" "thevoid")
22 | LOCATE_LIBRARY(SWARM "swarm/http_request.hpp" "swarm")
23 | LOCATE_LIBRARY(ROCKSDB "rocksdb/db.h" "rocksdb")
24 |
25 | FILE(GLOB headers
26 | "${CMAKE_CURRENT_SOURCE_DIR}/include/greylock/*.hpp"
27 | "${CMAKE_CURRENT_SOURCE_DIR}/include/greylock/*.h"
28 |
29 | )
30 | install(FILES ${headers} DESTINATION include/greylock)
31 |
32 | configure_file(cmake/GreylockConfig.cmake.in "${PROJECT_BINARY_DIR}/cmake/GreylockConfig.cmake" @ONLY)
33 | install(FILES "${PROJECT_BINARY_DIR}/cmake/GreylockConfig.cmake" DESTINATION share/greylock/cmake)
34 |
35 | include_directories(${PROJECT_SOURCE_DIR}/include
36 | ${Boost_INCLUDE_DIRS}
37 | ${MSGPACK_INCLUDE_DIRS}
38 | ${RIBOSOME_INCLUDE_DIRS}
39 | ${ROCKSDB_INCLUDE_DIRS}
40 | ${SWARM_INCLUDE_DIRS}
41 | ${THEVOID_INCLUDE_DIRS}
42 | )
43 |
44 | add_subdirectory(src)
45 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM reverbrain/xenial-dev
2 |
3 | #RUN echo "deb http://repo.reverbrain.com/trusty/ current/amd64/" > /etc/apt/sources.list.d/reverbrain.list && \
4 | # echo "deb http://repo.reverbrain.com/trusty/ current/all/" >> /etc/apt/sources.list.d/reverbrain.list && \
5 | # apt-get install -y curl tzdata && \
6 | # cp -f /usr/share/zoneinfo/posix/W-SU /etc/localtime && \
7 | # curl http://repo.reverbrain.com/REVERBRAIN.GPG | apt-key add - && \
8 | # apt-get update && \
9 | # apt-get upgrade -y && \
10 | # apt-get install -y git g++ liblz4-dev libsnappy-dev zlib1g-dev libbz2-dev libzstd-dev libgflags-dev libjemalloc-dev && \
11 | # apt-get install -y cmake debhelper cdbs devscripts && \
12 | # apt-get install -y libboost-system-dev libboost-filesystem-dev libboost-program-options-dev && \
13 | # apt-get install -y libmsgpack-dev libswarm3-dev libthevoid3-dev ribosome-dev && \
14 | # git config --global user.email "zbr@ioremap.net" && \
15 | # git config --global user.name "Evgeniy Polyakov"
16 |
17 | #RUN cd /tmp && \
18 | # git clone https://github.com/facebook/rocksdb && \
19 | # cd rocksdb && \
20 | # PORTABLE=1 make shared_lib && \
21 | # make INSTALL_PATH=/usr install-shared && \
22 | # echo "Rocksdb package has been updated and installed"
23 |
24 | RUN cd /tmp && \
25 | rm -rf ribosome && \
26 | git clone https://github.com/reverbrain/ribosome && \
27 | cd ribosome && \
28 | git branch -v && \
29 | dpkg-buildpackage -b && \
30 | dpkg -i ../ribosome*.deb && \
31 | echo "Ribosome package has been updated and installed" && \
32 |
33 | cd /tmp && \
34 | rm -rf greylock && \
35 | git clone https://github.com/reverbrain/greylock && \
36 | cd greylock && \
37 | git branch -v && \
38 | dpkg-buildpackage -b && \
39 | dpkg -i ../greylock_*.deb ../greylock-dev_*.deb && \
40 | echo "Greylock package has been updated and installed" && \
41 | rm -rf /var/lib/apt/lists/*
42 |
43 | EXPOSE 8080 8181 8111
44 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | GNU GENERAL PUBLIC LICENSE
2 | Version 3, 29 June 2007
3 |
4 | Copyright (C) 2007 Free Software Foundation, Inc.
5 | Everyone is permitted to copy and distribute verbatim copies
6 | of this license document, but changing it is not allowed.
7 |
8 | Preamble
9 |
10 | The GNU General Public License is a free, copyleft license for
11 | software and other kinds of works.
12 |
13 | The licenses for most software and other practical works are designed
14 | to take away your freedom to share and change the works. By contrast,
15 | the GNU General Public License is intended to guarantee your freedom to
16 | share and change all versions of a program--to make sure it remains free
17 | software for all its users. We, the Free Software Foundation, use the
18 | GNU General Public License for most of our software; it applies also to
19 | any other work released this way by its authors. You can apply it to
20 | your programs, too.
21 |
22 | When we speak of free software, we are referring to freedom, not
23 | price. Our General Public Licenses are designed to make sure that you
24 | have the freedom to distribute copies of free software (and charge for
25 | them if you wish), that you receive source code or can get it if you
26 | want it, that you can change the software or use pieces of it in new
27 | free programs, and that you know you can do these things.
28 |
29 | To protect your rights, we need to prevent others from denying you
30 | these rights or asking you to surrender the rights. Therefore, you have
31 | certain responsibilities if you distribute copies of the software, or if
32 | you modify it: responsibilities to respect the freedom of others.
33 |
34 | For example, if you distribute copies of such a program, whether
35 | gratis or for a fee, you must pass on to the recipients the same
36 | freedoms that you received. You must make sure that they, too, receive
37 | or can get the source code. And you must show them these terms so they
38 | know their rights.
39 |
40 | Developers that use the GNU GPL protect your rights with two steps:
41 | (1) assert copyright on the software, and (2) offer you this License
42 | giving you legal permission to copy, distribute and/or modify it.
43 |
44 | For the developers' and authors' protection, the GPL clearly explains
45 | that there is no warranty for this free software. For both users' and
46 | authors' sake, the GPL requires that modified versions be marked as
47 | changed, so that their problems will not be attributed erroneously to
48 | authors of previous versions.
49 |
50 | Some devices are designed to deny users access to install or run
51 | modified versions of the software inside them, although the manufacturer
52 | can do so. This is fundamentally incompatible with the aim of
53 | protecting users' freedom to change the software. The systematic
54 | pattern of such abuse occurs in the area of products for individuals to
55 | use, which is precisely where it is most unacceptable. Therefore, we
56 | have designed this version of the GPL to prohibit the practice for those
57 | products. If such problems arise substantially in other domains, we
58 | stand ready to extend this provision to those domains in future versions
59 | of the GPL, as needed to protect the freedom of users.
60 |
61 | Finally, every program is threatened constantly by software patents.
62 | States should not allow patents to restrict development and use of
63 | software on general-purpose computers, but in those that do, we wish to
64 | avoid the special danger that patents applied to a free program could
65 | make it effectively proprietary. To prevent this, the GPL assures that
66 | patents cannot be used to render the program non-free.
67 |
68 | The precise terms and conditions for copying, distribution and
69 | modification follow.
70 |
71 | TERMS AND CONDITIONS
72 |
73 | 0. Definitions.
74 |
75 | "This License" refers to version 3 of the GNU General Public License.
76 |
77 | "Copyright" also means copyright-like laws that apply to other kinds of
78 | works, such as semiconductor masks.
79 |
80 | "The Program" refers to any copyrightable work licensed under this
81 | License. Each licensee is addressed as "you". "Licensees" and
82 | "recipients" may be individuals or organizations.
83 |
84 | To "modify" a work means to copy from or adapt all or part of the work
85 | in a fashion requiring copyright permission, other than the making of an
86 | exact copy. The resulting work is called a "modified version" of the
87 | earlier work or a work "based on" the earlier work.
88 |
89 | A "covered work" means either the unmodified Program or a work based
90 | on the Program.
91 |
92 | To "propagate" a work means to do anything with it that, without
93 | permission, would make you directly or secondarily liable for
94 | infringement under applicable copyright law, except executing it on a
95 | computer or modifying a private copy. Propagation includes copying,
96 | distribution (with or without modification), making available to the
97 | public, and in some countries other activities as well.
98 |
99 | To "convey" a work means any kind of propagation that enables other
100 | parties to make or receive copies. Mere interaction with a user through
101 | a computer network, with no transfer of a copy, is not conveying.
102 |
103 | An interactive user interface displays "Appropriate Legal Notices"
104 | to the extent that it includes a convenient and prominently visible
105 | feature that (1) displays an appropriate copyright notice, and (2)
106 | tells the user that there is no warranty for the work (except to the
107 | extent that warranties are provided), that licensees may convey the
108 | work under this License, and how to view a copy of this License. If
109 | the interface presents a list of user commands or options, such as a
110 | menu, a prominent item in the list meets this criterion.
111 |
112 | 1. Source Code.
113 |
114 | The "source code" for a work means the preferred form of the work
115 | for making modifications to it. "Object code" means any non-source
116 | form of a work.
117 |
118 | A "Standard Interface" means an interface that either is an official
119 | standard defined by a recognized standards body, or, in the case of
120 | interfaces specified for a particular programming language, one that
121 | is widely used among developers working in that language.
122 |
123 | The "System Libraries" of an executable work include anything, other
124 | than the work as a whole, that (a) is included in the normal form of
125 | packaging a Major Component, but which is not part of that Major
126 | Component, and (b) serves only to enable use of the work with that
127 | Major Component, or to implement a Standard Interface for which an
128 | implementation is available to the public in source code form. A
129 | "Major Component", in this context, means a major essential component
130 | (kernel, window system, and so on) of the specific operating system
131 | (if any) on which the executable work runs, or a compiler used to
132 | produce the work, or an object code interpreter used to run it.
133 |
134 | The "Corresponding Source" for a work in object code form means all
135 | the source code needed to generate, install, and (for an executable
136 | work) run the object code and to modify the work, including scripts to
137 | control those activities. However, it does not include the work's
138 | System Libraries, or general-purpose tools or generally available free
139 | programs which are used unmodified in performing those activities but
140 | which are not part of the work. For example, Corresponding Source
141 | includes interface definition files associated with source files for
142 | the work, and the source code for shared libraries and dynamically
143 | linked subprograms that the work is specifically designed to require,
144 | such as by intimate data communication or control flow between those
145 | subprograms and other parts of the work.
146 |
147 | The Corresponding Source need not include anything that users
148 | can regenerate automatically from other parts of the Corresponding
149 | Source.
150 |
151 | The Corresponding Source for a work in source code form is that
152 | same work.
153 |
154 | 2. Basic Permissions.
155 |
156 | All rights granted under this License are granted for the term of
157 | copyright on the Program, and are irrevocable provided the stated
158 | conditions are met. This License explicitly affirms your unlimited
159 | permission to run the unmodified Program. The output from running a
160 | covered work is covered by this License only if the output, given its
161 | content, constitutes a covered work. This License acknowledges your
162 | rights of fair use or other equivalent, as provided by copyright law.
163 |
164 | You may make, run and propagate covered works that you do not
165 | convey, without conditions so long as your license otherwise remains
166 | in force. You may convey covered works to others for the sole purpose
167 | of having them make modifications exclusively for you, or provide you
168 | with facilities for running those works, provided that you comply with
169 | the terms of this License in conveying all material for which you do
170 | not control copyright. Those thus making or running the covered works
171 | for you must do so exclusively on your behalf, under your direction
172 | and control, on terms that prohibit them from making any copies of
173 | your copyrighted material outside their relationship with you.
174 |
175 | Conveying under any other circumstances is permitted solely under
176 | the conditions stated below. Sublicensing is not allowed; section 10
177 | makes it unnecessary.
178 |
179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
180 |
181 | No covered work shall be deemed part of an effective technological
182 | measure under any applicable law fulfilling obligations under article
183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
184 | similar laws prohibiting or restricting circumvention of such
185 | measures.
186 |
187 | When you convey a covered work, you waive any legal power to forbid
188 | circumvention of technological measures to the extent such circumvention
189 | is effected by exercising rights under this License with respect to
190 | the covered work, and you disclaim any intention to limit operation or
191 | modification of the work as a means of enforcing, against the work's
192 | users, your or third parties' legal rights to forbid circumvention of
193 | technological measures.
194 |
195 | 4. Conveying Verbatim Copies.
196 |
197 | You may convey verbatim copies of the Program's source code as you
198 | receive it, in any medium, provided that you conspicuously and
199 | appropriately publish on each copy an appropriate copyright notice;
200 | keep intact all notices stating that this License and any
201 | non-permissive terms added in accord with section 7 apply to the code;
202 | keep intact all notices of the absence of any warranty; and give all
203 | recipients a copy of this License along with the Program.
204 |
205 | You may charge any price or no price for each copy that you convey,
206 | and you may offer support or warranty protection for a fee.
207 |
208 | 5. Conveying Modified Source Versions.
209 |
210 | You may convey a work based on the Program, or the modifications to
211 | produce it from the Program, in the form of source code under the
212 | terms of section 4, provided that you also meet all of these conditions:
213 |
214 | a) The work must carry prominent notices stating that you modified
215 | it, and giving a relevant date.
216 |
217 | b) The work must carry prominent notices stating that it is
218 | released under this License and any conditions added under section
219 | 7. This requirement modifies the requirement in section 4 to
220 | "keep intact all notices".
221 |
222 | c) You must license the entire work, as a whole, under this
223 | License to anyone who comes into possession of a copy. This
224 | License will therefore apply, along with any applicable section 7
225 | additional terms, to the whole of the work, and all its parts,
226 | regardless of how they are packaged. This License gives no
227 | permission to license the work in any other way, but it does not
228 | invalidate such permission if you have separately received it.
229 |
230 | d) If the work has interactive user interfaces, each must display
231 | Appropriate Legal Notices; however, if the Program has interactive
232 | interfaces that do not display Appropriate Legal Notices, your
233 | work need not make them do so.
234 |
235 | A compilation of a covered work with other separate and independent
236 | works, which are not by their nature extensions of the covered work,
237 | and which are not combined with it such as to form a larger program,
238 | in or on a volume of a storage or distribution medium, is called an
239 | "aggregate" if the compilation and its resulting copyright are not
240 | used to limit the access or legal rights of the compilation's users
241 | beyond what the individual works permit. Inclusion of a covered work
242 | in an aggregate does not cause this License to apply to the other
243 | parts of the aggregate.
244 |
245 | 6. Conveying Non-Source Forms.
246 |
247 | You may convey a covered work in object code form under the terms
248 | of sections 4 and 5, provided that you also convey the
249 | machine-readable Corresponding Source under the terms of this License,
250 | in one of these ways:
251 |
252 | a) Convey the object code in, or embodied in, a physical product
253 | (including a physical distribution medium), accompanied by the
254 | Corresponding Source fixed on a durable physical medium
255 | customarily used for software interchange.
256 |
257 | b) Convey the object code in, or embodied in, a physical product
258 | (including a physical distribution medium), accompanied by a
259 | written offer, valid for at least three years and valid for as
260 | long as you offer spare parts or customer support for that product
261 | model, to give anyone who possesses the object code either (1) a
262 | copy of the Corresponding Source for all the software in the
263 | product that is covered by this License, on a durable physical
264 | medium customarily used for software interchange, for a price no
265 | more than your reasonable cost of physically performing this
266 | conveying of source, or (2) access to copy the
267 | Corresponding Source from a network server at no charge.
268 |
269 | c) Convey individual copies of the object code with a copy of the
270 | written offer to provide the Corresponding Source. This
271 | alternative is allowed only occasionally and noncommercially, and
272 | only if you received the object code with such an offer, in accord
273 | with subsection 6b.
274 |
275 | d) Convey the object code by offering access from a designated
276 | place (gratis or for a charge), and offer equivalent access to the
277 | Corresponding Source in the same way through the same place at no
278 | further charge. You need not require recipients to copy the
279 | Corresponding Source along with the object code. If the place to
280 | copy the object code is a network server, the Corresponding Source
281 | may be on a different server (operated by you or a third party)
282 | that supports equivalent copying facilities, provided you maintain
283 | clear directions next to the object code saying where to find the
284 | Corresponding Source. Regardless of what server hosts the
285 | Corresponding Source, you remain obligated to ensure that it is
286 | available for as long as needed to satisfy these requirements.
287 |
288 | e) Convey the object code using peer-to-peer transmission, provided
289 | you inform other peers where the object code and Corresponding
290 | Source of the work are being offered to the general public at no
291 | charge under subsection 6d.
292 |
293 | A separable portion of the object code, whose source code is excluded
294 | from the Corresponding Source as a System Library, need not be
295 | included in conveying the object code work.
296 |
297 | A "User Product" is either (1) a "consumer product", which means any
298 | tangible personal property which is normally used for personal, family,
299 | or household purposes, or (2) anything designed or sold for incorporation
300 | into a dwelling. In determining whether a product is a consumer product,
301 | doubtful cases shall be resolved in favor of coverage. For a particular
302 | product received by a particular user, "normally used" refers to a
303 | typical or common use of that class of product, regardless of the status
304 | of the particular user or of the way in which the particular user
305 | actually uses, or expects or is expected to use, the product. A product
306 | is a consumer product regardless of whether the product has substantial
307 | commercial, industrial or non-consumer uses, unless such uses represent
308 | the only significant mode of use of the product.
309 |
310 | "Installation Information" for a User Product means any methods,
311 | procedures, authorization keys, or other information required to install
312 | and execute modified versions of a covered work in that User Product from
313 | a modified version of its Corresponding Source. The information must
314 | suffice to ensure that the continued functioning of the modified object
315 | code is in no case prevented or interfered with solely because
316 | modification has been made.
317 |
318 | If you convey an object code work under this section in, or with, or
319 | specifically for use in, a User Product, and the conveying occurs as
320 | part of a transaction in which the right of possession and use of the
321 | User Product is transferred to the recipient in perpetuity or for a
322 | fixed term (regardless of how the transaction is characterized), the
323 | Corresponding Source conveyed under this section must be accompanied
324 | by the Installation Information. But this requirement does not apply
325 | if neither you nor any third party retains the ability to install
326 | modified object code on the User Product (for example, the work has
327 | been installed in ROM).
328 |
329 | The requirement to provide Installation Information does not include a
330 | requirement to continue to provide support service, warranty, or updates
331 | for a work that has been modified or installed by the recipient, or for
332 | the User Product in which it has been modified or installed. Access to a
333 | network may be denied when the modification itself materially and
334 | adversely affects the operation of the network or violates the rules and
335 | protocols for communication across the network.
336 |
337 | Corresponding Source conveyed, and Installation Information provided,
338 | in accord with this section must be in a format that is publicly
339 | documented (and with an implementation available to the public in
340 | source code form), and must require no special password or key for
341 | unpacking, reading or copying.
342 |
343 | 7. Additional Terms.
344 |
345 | "Additional permissions" are terms that supplement the terms of this
346 | License by making exceptions from one or more of its conditions.
347 | Additional permissions that are applicable to the entire Program shall
348 | be treated as though they were included in this License, to the extent
349 | that they are valid under applicable law. If additional permissions
350 | apply only to part of the Program, that part may be used separately
351 | under those permissions, but the entire Program remains governed by
352 | this License without regard to the additional permissions.
353 |
354 | When you convey a copy of a covered work, you may at your option
355 | remove any additional permissions from that copy, or from any part of
356 | it. (Additional permissions may be written to require their own
357 | removal in certain cases when you modify the work.) You may place
358 | additional permissions on material, added by you to a covered work,
359 | for which you have or can give appropriate copyright permission.
360 |
361 | Notwithstanding any other provision of this License, for material you
362 | add to a covered work, you may (if authorized by the copyright holders of
363 | that material) supplement the terms of this License with terms:
364 |
365 | a) Disclaiming warranty or limiting liability differently from the
366 | terms of sections 15 and 16 of this License; or
367 |
368 | b) Requiring preservation of specified reasonable legal notices or
369 | author attributions in that material or in the Appropriate Legal
370 | Notices displayed by works containing it; or
371 |
372 | c) Prohibiting misrepresentation of the origin of that material, or
373 | requiring that modified versions of such material be marked in
374 | reasonable ways as different from the original version; or
375 |
376 | d) Limiting the use for publicity purposes of names of licensors or
377 | authors of the material; or
378 |
379 | e) Declining to grant rights under trademark law for use of some
380 | trade names, trademarks, or service marks; or
381 |
382 | f) Requiring indemnification of licensors and authors of that
383 | material by anyone who conveys the material (or modified versions of
384 | it) with contractual assumptions of liability to the recipient, for
385 | any liability that these contractual assumptions directly impose on
386 | those licensors and authors.
387 |
388 | All other non-permissive additional terms are considered "further
389 | restrictions" within the meaning of section 10. If the Program as you
390 | received it, or any part of it, contains a notice stating that it is
391 | governed by this License along with a term that is a further
392 | restriction, you may remove that term. If a license document contains
393 | a further restriction but permits relicensing or conveying under this
394 | License, you may add to a covered work material governed by the terms
395 | of that license document, provided that the further restriction does
396 | not survive such relicensing or conveying.
397 |
398 | If you add terms to a covered work in accord with this section, you
399 | must place, in the relevant source files, a statement of the
400 | additional terms that apply to those files, or a notice indicating
401 | where to find the applicable terms.
402 |
403 | Additional terms, permissive or non-permissive, may be stated in the
404 | form of a separately written license, or stated as exceptions;
405 | the above requirements apply either way.
406 |
407 | 8. Termination.
408 |
409 | You may not propagate or modify a covered work except as expressly
410 | provided under this License. Any attempt otherwise to propagate or
411 | modify it is void, and will automatically terminate your rights under
412 | this License (including any patent licenses granted under the third
413 | paragraph of section 11).
414 |
415 | However, if you cease all violation of this License, then your
416 | license from a particular copyright holder is reinstated (a)
417 | provisionally, unless and until the copyright holder explicitly and
418 | finally terminates your license, and (b) permanently, if the copyright
419 | holder fails to notify you of the violation by some reasonable means
420 | prior to 60 days after the cessation.
421 |
422 | Moreover, your license from a particular copyright holder is
423 | reinstated permanently if the copyright holder notifies you of the
424 | violation by some reasonable means, this is the first time you have
425 | received notice of violation of this License (for any work) from that
426 | copyright holder, and you cure the violation prior to 30 days after
427 | your receipt of the notice.
428 |
429 | Termination of your rights under this section does not terminate the
430 | licenses of parties who have received copies or rights from you under
431 | this License. If your rights have been terminated and not permanently
432 | reinstated, you do not qualify to receive new licenses for the same
433 | material under section 10.
434 |
435 | 9. Acceptance Not Required for Having Copies.
436 |
437 | You are not required to accept this License in order to receive or
438 | run a copy of the Program. Ancillary propagation of a covered work
439 | occurring solely as a consequence of using peer-to-peer transmission
440 | to receive a copy likewise does not require acceptance. However,
441 | nothing other than this License grants you permission to propagate or
442 | modify any covered work. These actions infringe copyright if you do
443 | not accept this License. Therefore, by modifying or propagating a
444 | covered work, you indicate your acceptance of this License to do so.
445 |
446 | 10. Automatic Licensing of Downstream Recipients.
447 |
448 | Each time you convey a covered work, the recipient automatically
449 | receives a license from the original licensors, to run, modify and
450 | propagate that work, subject to this License. You are not responsible
451 | for enforcing compliance by third parties with this License.
452 |
453 | An "entity transaction" is a transaction transferring control of an
454 | organization, or substantially all assets of one, or subdividing an
455 | organization, or merging organizations. If propagation of a covered
456 | work results from an entity transaction, each party to that
457 | transaction who receives a copy of the work also receives whatever
458 | licenses to the work the party's predecessor in interest had or could
459 | give under the previous paragraph, plus a right to possession of the
460 | Corresponding Source of the work from the predecessor in interest, if
461 | the predecessor has it or can get it with reasonable efforts.
462 |
463 | You may not impose any further restrictions on the exercise of the
464 | rights granted or affirmed under this License. For example, you may
465 | not impose a license fee, royalty, or other charge for exercise of
466 | rights granted under this License, and you may not initiate litigation
467 | (including a cross-claim or counterclaim in a lawsuit) alleging that
468 | any patent claim is infringed by making, using, selling, offering for
469 | sale, or importing the Program or any portion of it.
470 |
471 | 11. Patents.
472 |
473 | A "contributor" is a copyright holder who authorizes use under this
474 | License of the Program or a work on which the Program is based. The
475 | work thus licensed is called the contributor's "contributor version".
476 |
477 | A contributor's "essential patent claims" are all patent claims
478 | owned or controlled by the contributor, whether already acquired or
479 | hereafter acquired, that would be infringed by some manner, permitted
480 | by this License, of making, using, or selling its contributor version,
481 | but do not include claims that would be infringed only as a
482 | consequence of further modification of the contributor version. For
483 | purposes of this definition, "control" includes the right to grant
484 | patent sublicenses in a manner consistent with the requirements of
485 | this License.
486 |
487 | Each contributor grants you a non-exclusive, worldwide, royalty-free
488 | patent license under the contributor's essential patent claims, to
489 | make, use, sell, offer for sale, import and otherwise run, modify and
490 | propagate the contents of its contributor version.
491 |
492 | In the following three paragraphs, a "patent license" is any express
493 | agreement or commitment, however denominated, not to enforce a patent
494 | (such as an express permission to practice a patent or covenant not to
495 | sue for patent infringement). To "grant" such a patent license to a
496 | party means to make such an agreement or commitment not to enforce a
497 | patent against the party.
498 |
499 | If you convey a covered work, knowingly relying on a patent license,
500 | and the Corresponding Source of the work is not available for anyone
501 | to copy, free of charge and under the terms of this License, through a
502 | publicly available network server or other readily accessible means,
503 | then you must either (1) cause the Corresponding Source to be so
504 | available, or (2) arrange to deprive yourself of the benefit of the
505 | patent license for this particular work, or (3) arrange, in a manner
506 | consistent with the requirements of this License, to extend the patent
507 | license to downstream recipients. "Knowingly relying" means you have
508 | actual knowledge that, but for the patent license, your conveying the
509 | covered work in a country, or your recipient's use of the covered work
510 | in a country, would infringe one or more identifiable patents in that
511 | country that you have reason to believe are valid.
512 |
513 | If, pursuant to or in connection with a single transaction or
514 | arrangement, you convey, or propagate by procuring conveyance of, a
515 | covered work, and grant a patent license to some of the parties
516 | receiving the covered work authorizing them to use, propagate, modify
517 | or convey a specific copy of the covered work, then the patent license
518 | you grant is automatically extended to all recipients of the covered
519 | work and works based on it.
520 |
521 | A patent license is "discriminatory" if it does not include within
522 | the scope of its coverage, prohibits the exercise of, or is
523 | conditioned on the non-exercise of one or more of the rights that are
524 | specifically granted under this License. You may not convey a covered
525 | work if you are a party to an arrangement with a third party that is
526 | in the business of distributing software, under which you make payment
527 | to the third party based on the extent of your activity of conveying
528 | the work, and under which the third party grants, to any of the
529 | parties who would receive the covered work from you, a discriminatory
530 | patent license (a) in connection with copies of the covered work
531 | conveyed by you (or copies made from those copies), or (b) primarily
532 | for and in connection with specific products or compilations that
533 | contain the covered work, unless you entered into that arrangement,
534 | or that patent license was granted, prior to 28 March 2007.
535 |
536 | Nothing in this License shall be construed as excluding or limiting
537 | any implied license or other defenses to infringement that may
538 | otherwise be available to you under applicable patent law.
539 |
540 | 12. No Surrender of Others' Freedom.
541 |
542 | If conditions are imposed on you (whether by court order, agreement or
543 | otherwise) that contradict the conditions of this License, they do not
544 | excuse you from the conditions of this License. If you cannot convey a
545 | covered work so as to satisfy simultaneously your obligations under this
546 | License and any other pertinent obligations, then as a consequence you may
547 | not convey it at all. For example, if you agree to terms that obligate you
548 | to collect a royalty for further conveying from those to whom you convey
549 | the Program, the only way you could satisfy both those terms and this
550 | License would be to refrain entirely from conveying the Program.
551 |
552 | 13. Use with the GNU Affero General Public License.
553 |
554 | Notwithstanding any other provision of this License, you have
555 | permission to link or combine any covered work with a work licensed
556 | under version 3 of the GNU Affero General Public License into a single
557 | combined work, and to convey the resulting work. The terms of this
558 | License will continue to apply to the part which is the covered work,
559 | but the special requirements of the GNU Affero General Public License,
560 | section 13, concerning interaction through a network will apply to the
561 | combination as such.
562 |
563 | 14. Revised Versions of this License.
564 |
565 | The Free Software Foundation may publish revised and/or new versions of
566 | the GNU General Public License from time to time. Such new versions will
567 | be similar in spirit to the present version, but may differ in detail to
568 | address new problems or concerns.
569 |
570 | Each version is given a distinguishing version number. If the
571 | Program specifies that a certain numbered version of the GNU General
572 | Public License "or any later version" applies to it, you have the
573 | option of following the terms and conditions either of that numbered
574 | version or of any later version published by the Free Software
575 | Foundation. If the Program does not specify a version number of the
576 | GNU General Public License, you may choose any version ever published
577 | by the Free Software Foundation.
578 |
579 | If the Program specifies that a proxy can decide which future
580 | versions of the GNU General Public License can be used, that proxy's
581 | public statement of acceptance of a version permanently authorizes you
582 | to choose that version for the Program.
583 |
584 | Later license versions may give you additional or different
585 | permissions. However, no additional obligations are imposed on any
586 | author or copyright holder as a result of your choosing to follow a
587 | later version.
588 |
589 | 15. Disclaimer of Warranty.
590 |
591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
599 |
600 | 16. Limitation of Liability.
601 |
602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
610 | SUCH DAMAGES.
611 |
612 | 17. Interpretation of Sections 15 and 16.
613 |
614 | If the disclaimer of warranty and limitation of liability provided
615 | above cannot be given local legal effect according to their terms,
616 | reviewing courts shall apply local law that most closely approximates
617 | an absolute waiver of all civil liability in connection with the
618 | Program, unless a warranty or assumption of liability accompanies a
619 | copy of the Program in return for a fee.
620 |
621 | END OF TERMS AND CONDITIONS
622 |
623 | How to Apply These Terms to Your New Programs
624 |
625 | If you develop a new program, and you want it to be of the greatest
626 | possible use to the public, the best way to achieve this is to make it
627 | free software which everyone can redistribute and change under these terms.
628 |
629 | To do so, attach the following notices to the program. It is safest
630 | to attach them to the start of each source file to most effectively
631 | state the exclusion of warranty; and each file should have at least
632 | the "copyright" line and a pointer to where the full notice is found.
633 |
634 | {one line to give the program's name and a brief idea of what it does.}
635 | Copyright (C) {year} {name of author}
636 |
637 | This program is free software: you can redistribute it and/or modify
638 | it under the terms of the GNU General Public License as published by
639 | the Free Software Foundation, either version 3 of the License, or
640 | (at your option) any later version.
641 |
642 | This program is distributed in the hope that it will be useful,
643 | but WITHOUT ANY WARRANTY; without even the implied warranty of
644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
645 | GNU General Public License for more details.
646 |
647 | You should have received a copy of the GNU General Public License
648 | along with this program. If not, see .
649 |
650 | Also add information on how to contact you by electronic and paper mail.
651 |
652 | If the program does terminal interaction, make it output a short
653 | notice like this when it starts in an interactive mode:
654 |
655 | {project} Copyright (C) {year} {fullname}
656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
657 | This is free software, and you are welcome to redistribute it
658 | under certain conditions; type `show c' for details.
659 |
660 | The hypothetical commands `show w' and `show c' should show the appropriate
661 | parts of the General Public License. Of course, your program's commands
662 | might be different; for a GUI interface, you would use an "about box".
663 |
664 | You should also get your employer (if you work as a programmer) or school,
665 | if any, to sign a "copyright disclaimer" for the program, if necessary.
666 | For more information on this, and how to apply and follow the GNU GPL, see
667 | .
668 |
669 | The GNU General Public License does not permit incorporating your program
670 | into proprietary programs. If your program is a subroutine library, you
671 | may consider it more useful to permit linking proprietary applications with
672 | the library. If this is what you want to do, use the GNU Lesser General
673 | Public License instead of this License. But first, please read
674 | .
675 |
--------------------------------------------------------------------------------
/cmake/GreylockConfig.cmake.in:
--------------------------------------------------------------------------------
1 | # - Config file for the Elliptics package
2 | # It defines the following variables
3 | # GREYLOCK_INCLUDE_DIRS - include directories for Elliptics
4 | # GREYLOCK_LIBRARY_DIRS - library directories
5 | # GREYLOCK_LIBRARIES - libraries to link against
6 |
7 | get_filename_component(GREYLOCK_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH)
8 |
9 | set(GREYLOCK_INCLUDE_DIRS @INSTALL_INCLUDE_DIR@
10 | @LZ4_INCLUDE_DIRS@
11 | @MSGPACK_INCLUDE_DIRS@
12 | @ROCKSDB_INCLUDE_DIRS@
13 | @SWARM_INCLUDE_DIRS@
14 | @THEVOID_INCLUDE_DIRS@
15 | )
16 |
17 | set(GREYLOCK_LIBRARY_DIRS
18 | @LZ4_LIBRARY_DIRS@
19 | @MSGPACK_LIBRARY_DIRS@
20 | @ROCKSDB_LIBRARY_DIRS@
21 | @SWARM_LIBRARY_DIRS@
22 | @THEVOID_LIBRARY_DIRS@
23 | )
24 |
25 | set(GREYLOCK_LIBRARIES
26 | @LZ4_LIBRARIES@
27 | @MSGPACK_LIBRARIES@
28 | @ROCKSDB_LIBRARIES@
29 | @SWARM_LIBRARIES@
30 | @THEVOID_LIBRARIES@
31 | greylock
32 | )
33 |
--------------------------------------------------------------------------------
/cmake/locate_library.cmake:
--------------------------------------------------------------------------------
1 | FUNCTION(LOCATE_LIBRARY VARIABLE HEADER LIBRARY)
2 | IF(${VARIABLE}_INCLUDE_DIRS AND ${VARIABLE}_LIBRARY_DIRS)
3 | RETURN()
4 | ENDIF()
5 | FIND_PATH(${VARIABLE}_INCLUDE_DIRS NAMES ${HEADER} PATH_SUFFIXES ${ARGN})
6 | message("header: ${HEADER}, arguments: ${ARGN} ==> ${${VARIABLE}_INCLUDE_DIRS}")
7 | FIND_LIBRARY(${VARIABLE}_LIBRARIES NAMES ${LIBRARY} PATH_SUFFIXES ${ARGN})
8 | message("library: ${LIBRARY}, arguments: ${ARGN} ==> ${${VARIABLE}_LIBRARIES}")
9 |
10 | STRING(TOLOWER ${VARIABLE} LIBRARY_NAME)
11 |
12 | IF(NOT ${VARIABLE}_INCLUDE_DIRS OR NOT ${VARIABLE}_LIBRARIES)
13 | MESSAGE(FATAL_ERROR "${LIBRARY_NAME} development files are required to build.")
14 | ELSE()
15 | MESSAGE(STATUS "Found ${LIBRARY_NAME}: ${${VARIABLE}_LIBRARIES} - ${${VARIABLE}_INCLUDE_DIRS}")
16 | ENDIF()
17 | ENDFUNCTION()
18 |
19 | FUNCTION(LOCATE_HEADERS VARIABLE HEADER)
20 | IF(${VARIABLE}_INCLUDE_DIRS)
21 | RETURN()
22 | ENDIF()
23 |
24 | FIND_PATH(${VARIABLE}_INCLUDE_DIRS NAMES ${HEADER} PATH_SUFFIXES ${ARGN})
25 | message("header: ${HEADER}, arguments: ${ARGN} ==> ${${VARIABLE}_INCLUDE_DIRS}")
26 |
27 | IF(NOT ${VARIABLE}_INCLUDE_DIRS)
28 | MESSAGE(FATAL_ERROR "${LIBRARY_NAME} development files (headers) are required to build.")
29 | ENDIF()
30 |
31 | MESSAGE(STATUS "Found ${HEADER}: ${${VARIABLE}_INCLUDE_DIRS}")
32 | ENDFUNCTION()
33 |
--------------------------------------------------------------------------------
/conf/greylock.conf:
--------------------------------------------------------------------------------
1 | {
2 | "endpoints": [
3 | "0.0.0.0:8181"
4 | ],
5 | "backlog": 512,
6 | "threads": 10,
7 | "buffer_size": 65536,
8 | "logger": {
9 | "level": "info",
10 | "frontends": [
11 | {
12 | "formatter": {
13 | "type": "string",
14 | "pattern": "%(timestamp)s %(request_id)s/%(lwp)s/%(pid)s %(severity)s: %(message)s, %(...L)s"
15 | },
16 | "sink": {
17 | "type": "files",
18 | "path": "/dev/stdout",
19 | "path1": "greylock.log",
20 | "autoflush": true,
21 | "rotation": { "move": 0 }
22 | }
23 | }
24 | ]
25 | },
26 | "daemon": {
27 | "fork": false,
28 | "uid": 1000
29 | },
30 | "monitor-port": 21235,
31 | "request_header": "X-Request",
32 | "trace_header": "X-Trace",
33 | "application": {
34 | "rocksdb.docs": {
35 | "read_only": false,
36 | "bulk_upload": false,
37 | "path": "/mnt/disk/search/lj/rocksdb.docs"
38 | },
39 | "rocksdb.indexes": {
40 | "read_only": false,
41 | "bulk_upload": false,
42 | "path": "/mnt/disk/search/lj/rocksdb.indexes"
43 | }
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/debian/changelog:
--------------------------------------------------------------------------------
1 | greylock (1.1.0) unstable; urgency=low
2 |
3 | * Added date/time search
4 | * Added exact phrase search
5 | * Added negation support
6 | * Added pagination support
7 |
8 | -- Evgeniy Polyakov Tue, 09 Aug 2016 01:24:04 +0400
9 |
10 | greylock (1.0.0) unstable; urgency=low
11 |
12 | * Rewrite greylock search engine to use local rocksdb storage. It is not distributed search so far.
13 |
14 | -- Evgeniy Polyakov Thu, 28 Jul 2016 08:59:06 +0400
15 |
16 |
--------------------------------------------------------------------------------
/debian/compat:
--------------------------------------------------------------------------------
1 | 7
2 |
--------------------------------------------------------------------------------
/debian/control:
--------------------------------------------------------------------------------
1 | Source: greylock
2 | Section: net
3 | Priority: optional
4 | Maintainer: Evgeniy Polyakov
5 | Build-Depends:
6 | cdbs,
7 | cmake (>= 2.6),
8 | debhelper (>= 7.0.50~),
9 | ribosome-dev (>= 0.2.8),
10 | libboost-dev,
11 | libboost-system-dev,
12 | libboost-program-options-dev,
13 | libboost-filesystem-dev,
14 | libjemalloc-dev,
15 | libmsgpack-dev,
16 | liblz4-dev,
17 | libswarm3-dev,
18 | libthevoid3-dev,
19 | zlib1g-dev,
20 | libbz2-dev,
21 | libsnappy-dev
22 | Standards-Version: 3.8.0
23 | Homepage: http://www.reverbrain.com/
24 | Vcs-Git: git://github.com/reverbrain/greylock.git
25 | Vcs-Browser: https://github.com/reverbrain/greylock
26 |
27 | Package: greylock
28 | Architecture: any
29 | Depends: ${shlibs:Depends}, ${misc:Depends}
30 | Description: Greylock is a local searching/indexing engine
31 |
32 | Package: greylock-dev
33 | Architecture: any
34 | Depends: ${shlibs:Depends}, ${misc:Depends},
35 | ribosome-dev (>= 0.2.8),
36 | libboost-dev,
37 | libboost-system-dev,
38 | libboost-program-options-dev,
39 | libboost-filesystem-dev,
40 | libjemalloc-dev,
41 | libmsgpack-dev,
42 | liblz4-dev,
43 | libswarm3-dev,
44 | libthevoid3-dev,
45 | zlib1g-dev,
46 | libbz2-dev,
47 | libsnappy-dev
48 | Description: Development files for greylock search engine
49 |
--------------------------------------------------------------------------------
/debian/copyright:
--------------------------------------------------------------------------------
1 | Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
2 | Upstream-Name: greylock
3 | Upstream-Contact: Evgeniy Polyakov
4 | Source: https://github.com/reverbrain/greylock
5 |
6 | Files: *
7 | Copyright: (C) 2015+ Evgeniy Polyakov
8 | License: GPL-3.0
9 |
--------------------------------------------------------------------------------
/debian/dirs:
--------------------------------------------------------------------------------
1 | usr/bin
2 | usr/sbin
3 |
--------------------------------------------------------------------------------
/debian/docs:
--------------------------------------------------------------------------------
1 | conf/
2 |
--------------------------------------------------------------------------------
/debian/greylock-dev.install:
--------------------------------------------------------------------------------
1 | usr/include/greylock/*
2 | usr/share/greylock/*
3 | usr/lib/libgreylock.so
4 |
--------------------------------------------------------------------------------
/debian/greylock.install:
--------------------------------------------------------------------------------
1 | usr/bin/greylock_*
2 | usr/lib/libgreylock.so.*
3 |
--------------------------------------------------------------------------------
/debian/rules:
--------------------------------------------------------------------------------
1 | #!/usr/bin/make -f
2 |
3 | include /usr/share/cdbs/1/rules/debhelper.mk
4 | include /usr/share/cdbs/1/class/cmake.mk
5 |
6 | DEB_CMAKE_EXTRA_FLAGS=
7 | DEB_DH_SHLIBDEPS_ARGS_ALL= --dpkg-shlibdeps-params=--ignore-missing-info
8 |
9 | install/greylock-dev::
10 |
11 |
--------------------------------------------------------------------------------
/greylock-bf.spec:
--------------------------------------------------------------------------------
1 | Summary: Greylock is an embedded search engine
2 | Name: greylock
3 | Version: 1.1.0
4 | Release: 1%{?dist}.1
5 |
6 | License: GPLv3
7 | Group: System Environment/Libraries
8 | URL: http://reverbrain.com/
9 | Source0: %{name}-%{version}.tar.bz2
10 | BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n)
11 |
12 |
13 | BuildRequires: ribosome-devel
14 | BuildRequires: libswarm3-devel, libthevoid3-devel
15 | BuildRequires: boost-devel, boost-system, boost-program-options, boost-filesystem
16 | BuildRequires: jemalloc-devel, msgpack-devel, lz4-devel
17 | BuildRequires: cmake >= 2.6
18 |
19 | %description
20 | Greylock is an embedded search engine which is aimed at index size and performace.
21 | Index of 200k livejournal.com entries (200Mb of uncompressed data) takes about 450Mb,
22 | index includes: full-text and per-author search indexes, original content, stemmed and original content.
23 |
24 | %package devel
25 | Summary: Development files for %{name}
26 | Group: Development/Libraries
27 | Requires: %{name} = %{version}-%{release}
28 |
29 |
30 | %description devel
31 | Greylock is an embedded search engine which is aimed at index size and performace.
32 |
33 | This package contains libraries, header files and developer documentation
34 | needed for developing software which uses greylock utils.
35 |
36 | %prep
37 | %setup -q
38 |
39 | %build
40 | export LDFLAGS="-Wl,-z,defs"
41 | export DESTDIR="%{buildroot}"
42 | %{cmake} .
43 | make %{?_smp_mflags}
44 |
45 | %install
46 | rm -rf %{buildroot}
47 | make install DESTDIR="%{buildroot}"
48 |
49 | %post -p /sbin/ldconfig
50 | %postun -p /sbin/ldconfig
51 |
52 | %clean
53 | rm -rf %{buildroot}
54 |
55 | %files
56 | %defattr(-,root,root,-)
57 | %{_bindir}/greylock_*
58 | %{_libdir}/libgreylock.so.*
59 | %doc conf/
60 |
61 |
62 | %files devel
63 | %defattr(-,root,root,-)
64 | %{_includedir}/*
65 | %{_datadir}/greylock/cmake/*
66 | %{_libdir}/libgreylock.so
67 |
68 | %changelog
69 | * Tue Aug 09 2016 Evgeniy Polyakov - 1.1.0
70 | - Added date/time search
71 | - Added exact phrase search
72 | - Added negation support
73 | - Added pagination support
74 |
75 | * Thu Jul 28 2016 Evgeniy Polyakov - 1.0.0
76 | - Rewrite greylock search engine to use local rocksdb storage. It is not distributed search so far.
77 |
78 |
--------------------------------------------------------------------------------
/include/greylock/database.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include "greylock/error.hpp"
4 | #include "greylock/id.hpp"
5 | #include "greylock/utils.hpp"
6 |
7 | #include
8 |
9 | #pragma GCC diagnostic push
10 | #pragma GCC diagnostic ignored "-Wunused-parameter"
11 | #include
12 | #include
13 | #include
14 | #include
15 | #include
16 | #include
17 | #include
18 | #include
19 | #include
20 | #include
21 | #pragma GCC diagnostic pop
22 |
23 | #include
24 |
25 | #include
26 | #include
27 | #include
28 | #include
29 | #include
30 |
31 | namespace ioremap { namespace greylock {
32 |
33 | struct options {
34 | size_t tokens_shard_size = 3600 * 1 * 24;
35 |
36 | int max_threads = 8;
37 |
38 | int bits_per_key = 10; // bloom filter parameter
39 |
40 | long lru_cache_size = 100 * 1024 * 1024; // 100 MB of uncompressed data cache
41 |
42 | long sync_metadata_timeout = 60000; // 60 seconds
43 |
44 | // mininmum size of the token which will go into separate index,
45 | // if token size is smaller, it will be combined into 2 indexes
46 | // with the previous and next tokens.
47 | // This options greatly speeds up requests with small words (like [to be or not to be]),
48 | // but heavily increases index size.
49 | unsigned int ngram_index_size = 0;
50 |
51 | enum {
52 | default_column = 0,
53 | documents_column,
54 | document_ids_column,
55 | token_shards_column,
56 | indexes_column,
57 | meta_column,
58 | __column_size,
59 | };
60 |
61 | std::vector column_names;
62 | std::string metadata_key;
63 |
64 | options(): metadata_key("greylock.meta.key") {
65 | column_names.resize(__column_size);
66 | column_names[default_column] = rocksdb::kDefaultColumnFamilyName;
67 | column_names[documents_column] = "documents";
68 | column_names[document_ids_column] = "document_ids";
69 | column_names[token_shards_column] = "token_shards";
70 | column_names[indexes_column] = "indexes";
71 | column_names[meta_column] = "meta";
72 | }
73 |
74 | std::string column_name(int cnum) const {
75 | if (cnum < 0 || cnum >= __column_size)
76 | return "";
77 |
78 | return column_names[cnum];
79 | }
80 | };
81 |
82 | class metadata {
83 | public:
84 | metadata() : m_dirty(false), m_seq(0) {}
85 |
86 | bool dirty() const {
87 | return m_dirty;
88 | }
89 | void clear_dirty() {
90 | m_dirty = false;
91 | }
92 |
93 | long get_sequence() {
94 | m_dirty = true;
95 | return m_seq++;
96 | }
97 |
98 | void set_sequence(long seq) {
99 | m_dirty = true;
100 | m_seq = seq;
101 | }
102 |
103 | enum {
104 | serialize_version_2 = 2,
105 | };
106 |
107 | template
108 | void msgpack_pack(msgpack::packer &o) const {
109 | o.pack_array(metadata::serialize_version_2);
110 | o.pack((int)metadata::serialize_version_2);
111 | o.pack(m_seq.load());
112 | }
113 |
114 | void msgpack_unpack(msgpack::object o) {
115 | if (o.type != msgpack::type::ARRAY) {
116 | std::ostringstream ss;
117 | ss << "could not unpack metadata, object type is " << o.type <<
118 | ", must be array (" << msgpack::type::ARRAY << ")";
119 | throw std::runtime_error(ss.str());
120 | }
121 |
122 | int version;
123 | long seq;
124 |
125 | msgpack::object *p = o.via.array.ptr;
126 | p[0].convert(&version);
127 |
128 | if (version != (int)o.via.array.size) {
129 | std::ostringstream ss;
130 | ss << "could not unpack document, invalid version: " << version << ", array size: " << o.via.array.size;
131 | throw std::runtime_error(ss.str());
132 | }
133 |
134 | switch (version) {
135 | case metadata::serialize_version_2:
136 | p[1].convert(&seq);
137 | m_seq.store(seq);
138 | break;
139 | default: {
140 | std::ostringstream ss;
141 | ss << "could not unpack metadata, invalid version " << version;
142 | throw std::runtime_error(ss.str());
143 | }
144 | }
145 | }
146 |
147 | private:
148 | bool m_dirty;
149 | std::atomic_long m_seq;
150 | };
151 |
152 | struct document_for_index {
153 | id_t indexed_id;
154 | MSGPACK_DEFINE(indexed_id);
155 |
156 | bool operator<(const document_for_index &other) const {
157 | return indexed_id < other.indexed_id;
158 | }
159 | };
160 |
161 | namespace {
162 | static const uint32_t disk_cookie = 0x45589560;
163 | }
164 |
165 | struct disk_index {
166 | typedef document_for_index value_type;
167 | typedef document_for_index& reference;
168 | typedef document_for_index* pointer;
169 |
170 | std::vector ids;
171 |
172 | template
173 | void msgpack_pack(msgpack::packer &o) const {
174 | o.pack_array(2);
175 | o.pack(disk_cookie);
176 | o.pack(ids);
177 | }
178 |
179 | void msgpack_unpack(msgpack::object o) {
180 | if (o.type != msgpack::type::ARRAY) {
181 | std::ostringstream ss;
182 | ss << "could not unpack disk index, object type is " << o.type <<
183 | ", must be array (" << msgpack::type::ARRAY << ")";
184 | throw std::runtime_error(ss.str());
185 | }
186 |
187 | uint32_t cookie;
188 |
189 | msgpack::object *p = o.via.array.ptr;
190 | p[0].convert(&cookie);
191 |
192 | if (cookie != disk_cookie) {
193 | std::ostringstream ss;
194 | ss << "could not unpack disk index, cookie mismatch: " << std::hex << cookie <<
195 | ", must be: " << std::hex << disk_cookie;
196 | throw std::runtime_error(ss.str());
197 | }
198 |
199 | p[1].convert(&ids);
200 | }
201 | };
202 |
203 | struct disk_token {
204 | std::vector shards;
205 | MSGPACK_DEFINE(shards);
206 |
207 | disk_token() {}
208 | disk_token(const std::set &s): shards(s.begin(), s.end()) {}
209 | disk_token(const std::vector &s): shards(s) {}
210 | };
211 |
212 | class indexes_merge_operator : public rocksdb::MergeOperator {
213 | public:
214 | virtual const char* Name() const override {
215 | return "indexes_merge_operator";
216 | }
217 |
218 | bool merge_indexes(const rocksdb::Slice& key, const rocksdb::Slice* old_value,
219 | const std::deque& operand_list,
220 | std::string* new_value,
221 | rocksdb::Logger *logger) const {
222 |
223 | disk_index index;
224 | greylock::error_info err;
225 | std::set unique_index;
226 | size_t ocount = 0;
227 |
228 | if (old_value) {
229 | err = deserialize(index, old_value->data(), old_value->size());
230 | if (err) {
231 | rocksdb::Error(logger, "merge: key: %s, index deserialize failed: %s [%d]",
232 | key.ToString().c_str(), err.message().c_str(), err.code());
233 | return false;
234 | }
235 |
236 | unique_index.insert(index.ids.begin(), index.ids.end());
237 | ocount = unique_index.size();
238 | }
239 |
240 | for (const auto& value : operand_list) {
241 | msgpack::unpacked msg;
242 | msgpack::unpack(&msg, value.data(), value.size());
243 |
244 | try {
245 | msgpack::object o = msg.get();
246 |
247 | if (o.type != msgpack::type::ARRAY) {
248 | document_for_index did;
249 | o.convert(&did);
250 | unique_index.emplace(did);
251 | continue;
252 | }
253 |
254 | disk_index idx;
255 | o.convert(&idx);
256 |
257 | unique_index.insert(idx.ids.begin(), idx.ids.end());
258 | } catch (const std::exception &e) {
259 | rocksdb::Error(logger, "merge: key: %s, document deserialize failed: %s",
260 | key.ToString().c_str(), e.what());
261 | return false;
262 | }
263 | }
264 |
265 | index.ids.clear();
266 | index.ids.insert(index.ids.end(), unique_index.begin(), unique_index.end());
267 | *new_value = serialize(index);
268 |
269 | if (new_value->size() > 1024 * 1024) {
270 | size_t osize = 0;
271 | if (old_value)
272 | osize = old_value->size();
273 | rocksdb::Info(logger, "index_merge: key: %s, size: %ld -> %ld, counts: %ld -> %ld",
274 | key.ToString().c_str(), osize, new_value->size(), ocount, index.ids.size());
275 | }
276 |
277 | return true;
278 | }
279 |
280 | virtual bool FullMerge(const rocksdb::Slice& key, const rocksdb::Slice* old_value,
281 | const std::deque& operand_list,
282 | std::string* new_value,
283 | rocksdb::Logger *logger) const override {
284 | return merge_indexes(key, old_value, operand_list, new_value, logger);
285 | }
286 |
287 | virtual bool PartialMerge(const rocksdb::Slice& key,
288 | const rocksdb::Slice& left_operand, const rocksdb::Slice& right_operand,
289 | std::string* new_value,
290 | rocksdb::Logger* logger) const {
291 | #if 0
292 | auto dump = [](const rocksdb::Slice &v) {
293 | std::ostringstream ss;
294 |
295 | msgpack::unpacked msg;
296 | msgpack::unpack(&msg, v.data(), v.size());
297 |
298 | ss << msg.get();
299 | return ss.str();
300 | };
301 |
302 | printf("partial merge: key: %s, left: %s, right: %s\n",
303 | key.ToString().c_str(), dump(left_operand).c_str(), dump(right_operand).c_str());
304 | #endif
305 | (void) key;
306 | (void) left_operand;
307 | (void) right_operand;
308 | (void) new_value;
309 | (void) logger;
310 |
311 | return false;
312 | }
313 | };
314 |
315 | class token_shards_merge_operator : public rocksdb::MergeOperator {
316 | public:
317 | virtual const char* Name() const override {
318 | return "token_shards_merge_operator";
319 | }
320 |
321 | bool merge_token_shards(const rocksdb::Slice& key, const rocksdb::Slice* old_value,
322 | const std::deque& operand_list,
323 | std::string* new_value,
324 | rocksdb::Logger *logger) const {
325 |
326 | disk_token dt;
327 | std::set shards;
328 | greylock::error_info err;
329 |
330 | if (old_value) {
331 | err = deserialize(dt, old_value->data(), old_value->size());
332 | if (err) {
333 | rocksdb::Error(logger, "merge: key: %s, disk_token deserialize failed: %s [%d]",
334 | key.ToString().c_str(), err.message().c_str(), err.code());
335 | return false;
336 | }
337 |
338 | shards.insert(dt.shards.begin(), dt.shards.end());
339 | }
340 |
341 | for (const auto& value : operand_list) {
342 | disk_token s;
343 | err = deserialize(s, value.data(), value.size());
344 | if (err) {
345 | rocksdb::Error(logger, "merge: key: %s, disk_token operand deserialize failed: %s [%d]",
346 | key.ToString().c_str(), err.message().c_str(), err.code());
347 | return false;
348 | }
349 |
350 | shards.insert(s.shards.begin(), s.shards.end());
351 | }
352 |
353 | dt.shards = std::vector(shards.begin(), shards.end());
354 | *new_value = serialize(dt);
355 |
356 | if (new_value->size() > 1024 * 1024) {
357 | size_t osize = 0;
358 | if (old_value) {
359 | osize = old_value->size();
360 | }
361 |
362 | rocksdb::Warn(logger, "shard_merge: key: %s, size: %ld -> %ld",
363 | key.ToString().c_str(), osize, new_value->size());
364 | }
365 |
366 | return true;
367 | }
368 |
369 | virtual bool FullMerge(const rocksdb::Slice& key, const rocksdb::Slice* old_value,
370 | const std::deque& operand_list,
371 | std::string* new_value,
372 | rocksdb::Logger *logger) const override {
373 | return merge_token_shards(key, old_value, operand_list, new_value, logger);
374 | }
375 |
376 | virtual bool PartialMerge(const rocksdb::Slice& key,
377 | const rocksdb::Slice& left_operand, const rocksdb::Slice& right_operand,
378 | std::string* new_value,
379 | rocksdb::Logger* logger) const {
380 | #if 0
381 | auto dump = [](const rocksdb::Slice &v) {
382 | std::ostringstream ss;
383 |
384 | msgpack::unpacked msg;
385 | msgpack::unpack(&msg, v.data(), v.size());
386 |
387 | ss << msg.get();
388 | return ss.str();
389 | };
390 |
391 | printf("partial merge: key: %s, left: %s, right: %s\n",
392 | key.ToString().c_str(), dump(left_operand).c_str(), dump(right_operand).c_str());
393 | #endif
394 | (void) key;
395 | (void) left_operand;
396 | (void) right_operand;
397 | (void) new_value;
398 | (void) logger;
399 |
400 | return false;
401 | }
402 | };
403 |
404 | class database {
405 | public:
406 | ~database() {
407 | if (!m_ro) {
408 | m_expiration_timer.stop();
409 | sync_metadata(NULL);
410 | }
411 | }
412 |
413 | const greylock::options &options() const {
414 | return m_opts;
415 | }
416 | greylock::metadata &metadata() {
417 | return m_meta;
418 | }
419 |
420 | rocksdb::ColumnFamilyHandle *cfhandle(int c) {
421 | return m_handles[c];
422 | }
423 |
424 | void compact() {
425 | if (m_db) {
426 | for (auto h: m_handles) {
427 | struct rocksdb::CompactRangeOptions opts;
428 | opts.change_level = true;
429 | opts.target_level = 0;
430 | m_db->CompactRange(opts, h, NULL, NULL);
431 | }
432 | }
433 | }
434 |
435 | void compact(size_t c, const rocksdb::Slice &start, const rocksdb::Slice &end) {
436 | if (m_db && c < m_handles.size()) {
437 | const rocksdb::Slice *b = NULL;
438 | const rocksdb::Slice *e = NULL;
439 |
440 | if (start != rocksdb::Slice()) {
441 | b = &start;
442 | }
443 | if (end != rocksdb::Slice()) {
444 | e = &end;
445 | }
446 |
447 | struct rocksdb::CompactRangeOptions opts;
448 | opts.change_level = true;
449 | opts.target_level = 0;
450 | m_db->CompactRange(opts, cfhandle(c), b, e);
451 | }
452 | }
453 |
454 | greylock::error_info sync_metadata(rocksdb::WriteBatch *batch) {
455 | if (m_ro) {
456 | return greylock::create_error(-EROFS, "read-only database");
457 | }
458 |
459 | if (!m_db) {
460 | return greylock::create_error(-EINVAL, "database is not opened");
461 | }
462 |
463 | if (!m_meta.dirty())
464 | return greylock::error_info();
465 |
466 | std::string meta_serialized = serialize(m_meta);
467 |
468 | rocksdb::Status s;
469 | if (batch) {
470 | batch->Put(m_handles[options::meta_column], rocksdb::Slice(m_opts.metadata_key), rocksdb::Slice(meta_serialized));
471 | } else {
472 | s = m_db->Put(rocksdb::WriteOptions(), m_handles[options::meta_column],
473 | rocksdb::Slice(m_opts.metadata_key), rocksdb::Slice(meta_serialized));
474 | }
475 |
476 | if (!s.ok()) {
477 | return greylock::create_error(-s.code(), "could not write metadata key: %s, error: %s",
478 | m_opts.metadata_key.c_str(), s.ToString().c_str());
479 | }
480 |
481 | m_meta.clear_dirty();
482 | return greylock::error_info();
483 | }
484 |
485 | greylock::error_info open_read_only(const std::string &path) {
486 | return open(path, true, false);
487 | }
488 | greylock::error_info open_read_write(const std::string &path) {
489 | return open(path, false, false);
490 | }
491 |
492 | greylock::error_info open(const std::string &path, bool ro, bool bulk) {
493 | if (m_db) {
494 | return greylock::create_error(-EINVAL, "database is already opened");
495 | }
496 |
497 | rocksdb::Options dbo;
498 | dbo.max_open_files = 1000;
499 | //dbo.disableDataSync = true;
500 | dbo.IncreaseParallelism(m_opts.max_threads);
501 |
502 | dbo.max_bytes_for_level_base = 1024 * 1024 * 1024 * 100UL;
503 | //dbo.write_buffer_size = 1024 * 1024 * 1024UL;
504 | //dbo.max_write_buffer_number = 10;
505 | //dbo.min_write_buffer_number_to_merge = 4;
506 |
507 | dbo.compression = rocksdb::kZSTDNotFinalCompression;
508 | dbo.num_levels = 10;
509 | #if 0
510 | dbo.compression_per_level =
511 | std::vector({
512 | rocksdb::kZSTDNotFinalCompression,
513 | rocksdb::kZSTDNotFinalCompression,
514 | rocksdb::kZSTDNotFinalCompression,
515 | rocksdb::kZSTDNotFinalCompression,
516 | rocksdb::kZSTDNotFinalCompression,
517 | });
518 | #endif
519 | dbo.compression_opts = rocksdb::CompressionOptions(-14, 5, 0, 0);
520 |
521 | dbo.create_if_missing = true;
522 | dbo.create_missing_column_families = true;
523 |
524 | if (!ro && bulk) {
525 | dbo.PrepareForBulkLoad();
526 | }
527 |
528 | dbo.statistics = rocksdb::CreateDBStatistics();
529 | dbo.stats_dump_period_sec = 60;
530 |
531 | rocksdb::BlockBasedTableOptions table_options;
532 | table_options.block_cache = rocksdb::NewLRUCache(m_opts.lru_cache_size);
533 | table_options.filter_policy.reset(rocksdb::NewBloomFilterPolicy(m_opts.bits_per_key, true));
534 | dbo.table_factory.reset(rocksdb::NewBlockBasedTableFactory(table_options));
535 |
536 | rocksdb::DB *db;
537 | rocksdb::Status s;
538 |
539 | rocksdb::ColumnFamilyOptions cfo(dbo);
540 |
541 | std::vector column_families;
542 |
543 | for (size_t i = 0; i < options().column_names.size(); ++i) {
544 | auto cname = options().column_names[i];
545 |
546 | cfo.merge_operator.reset();
547 |
548 | if (i == greylock::options::token_shards_column) {
549 | cfo.merge_operator.reset(new token_shards_merge_operator);
550 | }
551 | if (i == greylock::options::indexes_column) {
552 | cfo.merge_operator.reset(new indexes_merge_operator);
553 | }
554 |
555 | column_families.push_back(rocksdb::ColumnFamilyDescriptor(cname, cfo));
556 | }
557 |
558 | if (ro) {
559 | s = rocksdb::DB::OpenForReadOnly(dbo, path, column_families, &m_handles, &db);
560 | } else {
561 | s = rocksdb::DB::Open(dbo, path, column_families, &m_handles, &db);
562 | }
563 | if (!s.ok()) {
564 | return greylock::create_error(-s.code(), "failed to open rocksdb database: '%s', read-only: %d, error: %s",
565 | path.c_str(), ro, s.ToString().c_str());
566 | }
567 | m_db.reset(db);
568 | m_ro = ro;
569 |
570 | std::string meta;
571 | s = m_db->Get(rocksdb::ReadOptions(), m_handles[options::meta_column], rocksdb::Slice(m_opts.metadata_key), &meta);
572 | if (!s.ok() && !s.IsNotFound()) {
573 | return greylock::create_error(-s.code(), "could not read key: %s, error: %s",
574 | m_opts.metadata_key.c_str(), s.ToString().c_str());
575 | }
576 |
577 | if (s.ok()) {
578 | auto err = deserialize(m_meta, meta.data(), meta.size());
579 | if (err)
580 | return greylock::create_error(err.code(), "metadata deserialization failed, key: %s, error: %s",
581 | m_opts.metadata_key.c_str(), err.message().c_str());
582 | }
583 |
584 | if (m_opts.sync_metadata_timeout > 0 && !ro) {
585 | sync_metadata_callback();
586 | }
587 |
588 | return greylock::error_info();
589 | }
590 |
591 | std::vector get_shards(const std::string &key) {
592 | disk_token dt;
593 | if (!m_db) {
594 | return dt.shards;
595 | }
596 |
597 | std::string ser_shards;
598 | auto err = read(options::token_shards_column, key, &ser_shards);
599 | if (err)
600 | return dt.shards;
601 |
602 | err = deserialize(dt, ser_shards.data(), ser_shards.size());
603 | if (err)
604 | return dt.shards;
605 |
606 | return dt.shards;
607 | }
608 |
609 | rocksdb::Iterator *iterator(int column, const rocksdb::ReadOptions &ro) {
610 | return m_db->NewIterator(ro, m_handles[column]);
611 | }
612 |
613 | greylock::error_info read(int column, const std::string &key, std::string *ret) {
614 | if (!m_db) {
615 | return greylock::create_error(-EINVAL, "database is not opened");
616 | }
617 |
618 | auto s = m_db->Get(rocksdb::ReadOptions(), m_handles[column], rocksdb::Slice(key), ret);
619 | if (!s.ok()) {
620 | return greylock::create_error(-s.code(), "could not read key: %s, error: %s", key.c_str(), s.ToString().c_str());
621 | }
622 | return greylock::error_info();
623 | }
624 |
625 | greylock::error_info write(rocksdb::WriteBatch *batch) {
626 | if (!m_db) {
627 | return greylock::create_error(-EINVAL, "database is not opened");
628 | }
629 |
630 | if (m_ro) {
631 | return greylock::create_error(-EROFS, "read-only database");
632 | }
633 |
634 | auto wo = rocksdb::WriteOptions();
635 |
636 | auto s = m_db->Write(wo, batch);
637 | if (!s.ok()) {
638 | return greylock::create_error(-s.code(), "could not write batch: %s", s.ToString().c_str());
639 | }
640 |
641 | return greylock::error_info();
642 | }
643 |
644 | greylock::error_info write(int column, const std::string &key, const std::string &value) {
645 | if (!m_db) {
646 | return greylock::create_error(-EINVAL, "database is not opened");
647 | }
648 |
649 | if (m_ro) {
650 | return greylock::create_error(-EROFS, "read-only database");
651 | }
652 |
653 | auto wo = rocksdb::WriteOptions();
654 |
655 | auto s = m_db->Merge(wo, m_handles[column], rocksdb::Slice(key), rocksdb::Slice(value));
656 | if (!s.ok()) {
657 | return greylock::create_error(-s.code(), "could not write batch: %s", s.ToString().c_str());
658 | }
659 |
660 | return greylock::error_info();
661 | }
662 |
663 | private:
664 | bool m_ro = false;
665 | std::vector m_handles;
666 | std::unique_ptr m_db;
667 | greylock::options m_opts;
668 | greylock::metadata m_meta;
669 |
670 | ribosome::expiration m_expiration_timer;
671 |
672 | void sync_metadata_callback() {
673 | sync_metadata(NULL);
674 |
675 | auto expires_at = std::chrono::system_clock::now() + std::chrono::milliseconds(m_opts.sync_metadata_timeout);
676 | m_expiration_timer.insert(expires_at, std::bind(&database::sync_metadata_callback, this));
677 | }
678 | };
679 |
680 | }} // namespace ioremap::greylock
681 |
--------------------------------------------------------------------------------
/include/greylock/error.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include
4 | #include
5 |
6 | namespace ioremap { namespace greylock {
7 |
8 | class error : public std::exception
9 | {
10 | public:
11 | // err must be negative value
12 | explicit error(int err, const std::string &message) throw();
13 | ~error() throw() {}
14 |
15 | int error_code() const;
16 |
17 | virtual const char *what() const throw();
18 |
19 | std::string error_message() const throw();
20 |
21 | private:
22 | int m_errno;
23 | std::string m_message;
24 | };
25 |
26 | class not_found_error : public error
27 | {
28 | public:
29 | explicit not_found_error(const std::string &message) throw();
30 | };
31 |
32 | class timeout_error : public error
33 | {
34 | public:
35 | explicit timeout_error(const std::string &message) throw();
36 | };
37 |
38 | class no_such_address_error : public error
39 | {
40 | public:
41 | explicit no_such_address_error(const std::string &message) throw();
42 | };
43 |
44 | class error_info
45 | {
46 | public:
47 | inline error_info() : m_code(0) {}
48 | inline error_info(int code, const std::string &&message)
49 | : m_code(code), m_message(message) {}
50 | inline error_info(int code, const std::string &message)
51 | : m_code(code), m_message(message) {}
52 | inline ~error_info() {}
53 |
54 | inline int code() const { return m_code; }
55 | inline const std::string &message() const { return m_message; }
56 | inline operator bool() const { return m_code != 0; }
57 | inline bool operator !() const { return !operator bool(); }
58 | operator int() const = delete; // disable implicit cast to int
59 |
60 | void throw_error() const;
61 | private:
62 | int m_code;
63 | std::string m_message;
64 | };
65 |
66 | // err must be negative value
67 | void throw_error(int err, const char *format, ...)
68 | __attribute__ ((format (printf, 2, 3)));
69 |
70 | // err must be negative value
71 | error_info create_error(int err, const char *format, ...)
72 | __attribute__ ((format (printf, 2, 3)));
73 |
74 | }} /* namespace ioremap::greylock */
75 |
--------------------------------------------------------------------------------
/include/greylock/id.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include
4 |
5 | #include
6 | #include
7 |
8 | #include
9 |
10 | namespace ioremap { namespace greylock {
11 |
12 | namespace {
13 | static const uint32_t start_date = 0;
14 | static const uint32_t date_div = 3600 * 24;
15 | }
16 |
17 | struct id_t {
18 | uint64_t timestamp = 0;
19 |
20 | MSGPACK_DEFINE(timestamp);
21 |
22 | void set_timestamp(long tsec, long aux) {
23 | tsec = (tsec - start_date) / date_div;
24 |
25 | timestamp = tsec << 32;
26 | timestamp |= aux & ((1UL << 32) - 1);
27 | }
28 |
29 | void get_timestamp(long *tsec, long *aux) const {
30 | *tsec = (timestamp >> 32) * date_div + start_date;
31 | *aux = timestamp & ((1UL << 32) - 1);
32 | }
33 |
34 | bool operator<(const id_t &other) const {
35 | return timestamp < other.timestamp;
36 | }
37 | bool operator>(const id_t &other) const {
38 | return timestamp > other.timestamp;
39 | }
40 |
41 | bool operator==(const id_t &other) const {
42 | return (timestamp == other.timestamp);
43 | }
44 | bool operator!=(const id_t &other) const {
45 | return !operator==(other);
46 | }
47 |
48 | std::string to_string() const {
49 | char buf[64];
50 | size_t sz = snprintf(buf, sizeof(buf), "%016lx", timestamp);
51 | return std::string(buf, sz);
52 | }
53 |
54 | id_t(): timestamp(0) {
55 | }
56 |
57 | id_t(const id_t &other) {
58 | timestamp = other.timestamp;
59 | }
60 |
61 | id_t(const char *str) {
62 | if (!str) {
63 | id_t();
64 | return;
65 | }
66 |
67 | timestamp = strtoull(str, NULL, 16);
68 | }
69 |
70 | void set_next_id(const id_t &other) {
71 | timestamp = other.timestamp + 1;
72 | }
73 |
74 | };
75 |
76 | }} // namespace ioremap::greylock
77 |
--------------------------------------------------------------------------------
/include/greylock/intersection.hpp:
--------------------------------------------------------------------------------
1 | #ifndef __INDEXES_INTERSECTION_HPP
2 | #define __INDEXES_INTERSECTION_HPP
3 |
4 | #include "greylock/iterator.hpp"
5 | #include "greylock/types.hpp"
6 |
7 | namespace ioremap { namespace greylock {
8 |
9 | struct single_doc_result {
10 | document doc;
11 |
12 | float relevance = 0;
13 | };
14 |
15 | struct search_result {
16 | bool completed = true;
17 |
18 | // This will contain a cookie which must be used for the next intersection request,
19 | // if current request is not complete. This may happen when client has requested limited
20 | // maximum number of keys in reply and there are more keys.
21 | id_t next_document_id;
22 | long max_number_of_documents = ~0UL;
23 |
24 | // array of documents which contain all requested indexes
25 | std::vector docs;
26 | };
27 |
28 | // check whether given result matches query, may also set or change some result parameters like relevance field
29 | typedef std::function check_result_function_t;
30 |
31 | struct mailbox_query {
32 | std::string mbox;
33 | greylock::indexes idx;
34 |
35 | greylock::error_info parse_error;
36 |
37 | mailbox_query(const greylock::options &options, const rapidjson::Value &doc) {
38 | const rapidjson::Value &query_and = greylock::get_object(doc, "query");
39 | if (query_and.IsObject()) {
40 | auto ireq = indexes::get_indexes(options, query_and);
41 | idx.merge_query(ireq);
42 | }
43 |
44 | const rapidjson::Value &query_exact = greylock::get_object(doc, "exact");
45 | if (query_exact.IsObject()) {
46 | auto ireq = indexes::get_indexes(options, query_exact);
47 |
48 | // merge these indexes into intersection set,
49 | // since exact phrase match implies document contains all tokens
50 | idx.merge_exact(ireq);
51 | }
52 |
53 | const rapidjson::Value &query_negation = greylock::get_object(doc, "negation");
54 | if (query_negation.IsObject()) {
55 | auto ireq = indexes::get_indexes(options, query_negation);
56 | // do not merge these indexes into intersection set, put them into own container
57 | idx.merge_negation(ireq);
58 | }
59 |
60 | if (idx.attributes.empty()) {
61 | parse_error = greylock::create_error(-ENOENT,
62 | "search: mailbox: %s, there are no queries suitable for search", mbox.c_str());
63 | return;
64 | }
65 | }
66 | };
67 |
68 | struct intersection_query {
69 | id_t range_start, range_end;
70 |
71 | std::vector se;
72 |
73 | id_t next_document_id;
74 | size_t max_number = LONG_MAX;
75 |
76 | std::string to_string() const {
77 | std::ostringstream ss;
78 |
79 | ss << "[ ";
80 | for (const auto &ent: se) {
81 | ss << "mailbox: " << ent.mbox << ", indexes: " << ent.idx.to_string() << "| ";
82 | }
83 | ss << "]";
84 |
85 | return ss.str();
86 | }
87 | };
88 |
89 | template
90 | class intersector {
91 | public:
92 | intersector(DBT &db_docs, DBT &db_indexes) : m_db_docs(db_docs), m_db_indexes(db_indexes) {}
93 |
94 | search_result intersect(const intersection_query &iq) const {
95 | return intersect(iq, [&] (single_doc_result &) -> bool {
96 | return true;
97 | });
98 | }
99 |
100 | // search for intersections between all @indexes
101 | // starting with the key @start, returning at most @num entries
102 | //
103 | // after @intersect() completes, it sets @start to the next key to start searching from
104 | // user should not change that token, otherwise @intersect() may skip some entries or
105 | // return duplicates.
106 | //
107 | // if number of returned entries is less than requested number @num or if @start has been set to empty string
108 | // after call to this function returns, then intersection is completed.
109 | //
110 | // @search_result.completed will be set to true in this case.
111 | search_result intersect(const intersection_query &iq, check_result_function_t check) const {
112 | search_result res;
113 | #ifdef STDOUT_DEBUG
114 | auto dump_vector = [] (const std::vector &sh) -> std::string {
115 | std::ostringstream ss;
116 | for (size_t i = 0; i < sh.size(); ++i) {
117 | ss << sh[i];
118 | if (i != sh.size() - 1)
119 | ss << " ";
120 | }
121 |
122 | return ss.str();
123 | };
124 |
125 | #endif
126 |
127 |
128 | std::vector common_shards;
129 | bool init = true;
130 | for (const auto &ent: iq.se) {
131 | for (const auto &attr: ent.idx.attributes) {
132 | for (const auto &t: attr.tokens) {
133 | std::string shard_key = document::generate_shard_key(m_db_indexes.options(), ent.mbox, attr.name, t.name);
134 | auto shards = m_db_indexes.get_shards(shard_key);
135 | #ifdef STDOUT_DEBUG
136 | printf("common_shards: %s, key: %s, shards: %s\n",
137 | dump_vector(common_shards).c_str(), shard_key.c_str(),
138 | dump_vector(shards).c_str());
139 | #endif
140 | // one index is empty, intersection will be empty, return early
141 | if (shards.size() == 0) {
142 | return res;
143 | }
144 |
145 | if (init) {
146 | common_shards = shards;
147 | init = false;
148 | } else {
149 | std::vector intersection;
150 | std::set_intersection(common_shards.begin(), common_shards.end(),
151 | shards.begin(), shards.end(),
152 | std::back_inserter(intersection));
153 | common_shards = intersection;
154 | }
155 |
156 | // intersection is empty, return early
157 | if (common_shards.size() == 0) {
158 | return res;
159 | }
160 | }
161 | }
162 | }
163 |
164 | struct iter {
165 | greylock::index_iterator begin, end;
166 |
167 | iter(DBT &db, const std::string &mbox, const std::string &attr, const std::string &token,
168 | const std::vector &shards) :
169 | begin(greylock::index_iterator::begin(db, mbox, attr, token, shards)),
170 | end(greylock::index_iterator::end(db, mbox, attr, token))
171 | {
172 | }
173 | };
174 |
175 | // contains vector of iterators pointing to the requested indexes
176 | // iterator always points to the smallest document ID not yet pushed into resulting structure (or to client)
177 | // or discarded (if other index iterators point to larger document IDs)
178 | std::vector idata;
179 | std::vector inegation;
180 |
181 | for (const auto &ent: iq.se) {
182 | for (const auto &attr: ent.idx.attributes) {
183 | for (const auto &t: attr.tokens) {
184 | iter itr(m_db_indexes, ent.mbox, attr.name, t.name, common_shards);
185 |
186 | if (iq.next_document_id != 0) {
187 | itr.begin.rewind_to_index(iq.next_document_id);
188 | } else {
189 | itr.begin.rewind_to_index(iq.range_start);
190 | }
191 |
192 | idata.emplace_back(itr);
193 | }
194 | }
195 |
196 | for (const auto &attr: ent.idx.negation) {
197 | for (const auto &t: attr.tokens) {
198 | std::string shard_key = document::generate_shard_key(m_db_indexes.options(), ent.mbox, attr.name, t.name);
199 | auto shards = m_db_indexes.get_shards(shard_key);
200 | #ifdef STDOUT_DEBUG
201 | printf("negation: key: %s, shards: %s\n",
202 | shard_key.c_str(),
203 | dump_vector(shards).c_str());
204 | #endif
205 |
206 | iter itr(m_db_indexes, ent.mbox, attr.name, t.name, shards);
207 | inegation.emplace_back(itr);
208 | }
209 | }
210 | }
211 |
212 | while (true) {
213 | // contains indexes within @idata array of iterators,
214 | // each iterator contains the same and smallest to the known moment reference to the document (i.e. document ID)
215 | //
216 | // if checking @idata array yelds smaller document ID than that in iterators referenced in @pos,
217 | // then we clear @pos and starts pushing the new smallest iterator indexes
218 | //
219 | // we could break out of the @idata processing, increase the smallest pointing iterator and start over,
220 | // but we optimize @idata processing - if there are other iterators in @idata which equal to the smallest
221 | // iterator value (document ID), we put them into @pos
222 | // Since @pos doesn't contain all indexes (its size doesn't equal to the size of @idata), we will increase
223 | // all iterators where we have found the smallest document ID, hopefully they will point to the new document ID,
224 | // which might be the same for all iterator among @idata and thus we will push this document ID to the result
225 | // structure returned to the client
226 | //
227 | // Here is an example:
228 | //
229 | // 1. @idata iterators 0 1 2 3
230 | // -------------------------
231 | // document ids d0 d2 d3 d3
232 | // d2 d3 d4 d4
233 | // d3 d4 d5 d5
234 | // d4 - - -
235 | // d5 - - -
236 | //
237 | // We start from the top of this table, i.e. row after 'document ids' string
238 | // @pos will contain following values during iteration over @idata iterators
239 | // 0 - select the first value
240 | // 0 - skip iterator 1 (d2 document id) since its value is greater than that 0'th iterator value (d0)
241 | // 0 - skip iterator 2
242 | // 0 - skip iterator 3
243 | //
244 | // @pos contains only 0 index, it is not equal to the size of @idata (4), thus we have to increase 0'th iterator
245 | // discarding its first value
246 | //
247 | // 2. @idata iterators 0 1 2 3
248 | // -------------------------
249 | // document ids d2 d2 d3 d3
250 | // d3 d3 d4 d4
251 | // d4 d4 d5 d5
252 | // d5 - - -
253 | // @pos:
254 | // 0 - select the first iterator
255 | // 0 1 - 1'th iterator value equals to the value of the 0'th iterator, append it to the array
256 | // 0 1 - 2'th iterator value (d3) is greater than that of the 0'th iterator (d2)
257 | // 0 1 - the same as above
258 | // since size of the @pos is not equal to the size of @idata we increment all iterators which are indexed in @pos
259 | //
260 | // 3. @idata iterators 0 1 2 3
261 | // -------------------------
262 | // document ids d3 d3 d3 d3
263 | // d4 d4 d4 d4
264 | // d5 - d5 d5
265 | // @pos will contain all 4 indexes, since all iterator's value are the same (d3)
266 | // We will increment all iterators and push d3 into resulting array which will be returned to the client,
267 | // since size of the @pos array equals to the @idata size
268 | //
269 | // 4. @idata iterators 0 1 2 3
270 | // -------------------------
271 | // document ids d4 d4 d4 d4
272 | // d5 - d5 d5
273 | // We put d4 into resulting array and increment all iterators as above
274 | //
275 | // 5. @idata iterators 0 1 2 3
276 | // -------------------------
277 | // document ids d5 - d5 d5
278 | //
279 | // @pos:
280 | // 0 - select the first iterator
281 | // Stop processing, since 1'th iterator is empty.
282 | // This means no further iteration checks can contain all 4 the same value,
283 | // thus it is not possible to find any other document with higher ID
284 | // which will contain all 4 requested indexes.
285 | //
286 | // 6. Return [d3, d4] values to the client
287 | std::vector pos;
288 |
289 | id_t next_id;
290 |
291 | int current = -1;
292 | for (auto &itr: idata) {
293 | auto &it = itr.begin;
294 | auto &e = itr.end;
295 | ++current;
296 |
297 | if (it == e) {
298 | res.completed = true;
299 | break;
300 | }
301 |
302 | if (it->indexed_id > iq.range_end) {
303 | res.completed = true;
304 | break;
305 | }
306 |
307 | res.completed = false;
308 | res.next_document_id.set_next_id(it->indexed_id);
309 |
310 | if (pos.size() == 0) {
311 | pos.push_back(current);
312 | continue;
313 | }
314 |
315 | auto &min_it = idata[pos[0]].begin;
316 | #if 0
317 | BH_LOG(m_bp.logger(), INDEXES_LOG_INFO, "intersection: min-index: %s, id: %s, it-index: %s, id: %s",
318 | idata[pos[0]].idx.start_key().str(), min_it->str(),
319 | idata_it->idx.start_key().str(), it->str());
320 | #endif
321 | if (it->indexed_id == min_it->indexed_id) {
322 | pos.push_back(current);
323 | continue;
324 | }
325 |
326 | next_id = std::max(it->indexed_id, min_it->indexed_id);
327 | res.next_document_id.set_next_id(next_id);
328 |
329 | pos.clear();
330 | break;
331 | }
332 |
333 | // this can only happen if one of the iterators has been finished,
334 | // which means number of found positions will not be equal to the number
335 | // of indexes to intersect, and thus there is no more data to push into result.
336 | // Just break out of the processing loop - nothing can be added anymore.
337 | if (res.completed) {
338 | break;
339 | }
340 |
341 | // number of entries with the same document ID doesn't match number of indexes,
342 | // this means some index doesn't have this docuement and thus it has to be skipped
343 | // and iteration check process has to be started over
344 | if (pos.size() != idata.size()) {
345 | for (auto &it: idata) {
346 | auto &min_it = it.begin;
347 |
348 | min_it.rewind_to_index(next_id);
349 | }
350 |
351 | continue;
352 | }
353 |
354 | auto &min_it = idata[pos.front()].begin;
355 | id_t indexed_id = min_it->indexed_id;
356 |
357 | bool negation_match = false;
358 | for (auto &neg: inegation) {
359 | auto &it = neg.begin;
360 | it.rewind_to_index(indexed_id);
361 | if (it != neg.end) {
362 | if (it->indexed_id == indexed_id) {
363 | negation_match = true;
364 | break;
365 | }
366 | }
367 | }
368 |
369 | auto increment_all_iterators = [&] () {
370 | for (auto it = pos.begin(); it != pos.end(); ++it) {
371 | auto &idata_iter = idata[*it].begin;
372 | ++idata_iter;
373 | }
374 | };
375 |
376 | if (negation_match) {
377 | increment_all_iterators();
378 | continue;
379 | }
380 |
381 | single_doc_result rs;
382 | auto err = min_it.document(m_db_docs, &rs.doc);
383 | if (err) {
384 | #if 0
385 | printf("could not read document id: %ld, err: %s [%d]\n",
386 | min_it->indexed_id, err.message().c_str(), err.code());
387 | #endif
388 | increment_all_iterators();
389 | continue;
390 | }
391 | rs.doc.indexed_id = indexed_id;
392 |
393 | // increment all iterators
394 | increment_all_iterators();
395 |
396 | if (!check(rs)) {
397 | continue;
398 | }
399 |
400 | res.docs.emplace_back(rs);
401 | if (res.docs.size() == iq.max_number)
402 | break;
403 | }
404 |
405 | return res;
406 | }
407 | private:
408 | DBT &m_db_docs;
409 | DBT &m_db_indexes;
410 | };
411 |
412 | }} // namespace ioremap::greylock
413 |
414 | #endif // __INDEXES_INTERSECTION_HPP
415 |
--------------------------------------------------------------------------------
/include/greylock/iterator.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include "greylock/types.hpp"
4 |
5 | #include
6 |
7 | //#define STDOUT_DEBUG
8 | #ifdef STDOUT_DEBUG
9 | #define dprintf(fmt, a...) printf(fmt, ##a)
10 | #else
11 | #define dprintf(fmt, ...)
12 | #endif
13 |
14 | namespace ioremap { namespace greylock {
15 |
16 | template
17 | class index_iterator {
18 | private:
19 | disk_index m_current;
20 | typename decltype(m_current.ids)::iterator m_idx_current, m_idx_end;
21 | public:
22 | typedef index_iterator self_type;
23 | typedef disk_index::value_type value_type;
24 | typedef typename decltype(m_current.ids)::iterator::reference reference;
25 | typedef typename decltype(m_current.ids)::iterator::pointer pointer;
26 | typedef std::forward_iterator_tag iterator_category;
27 | typedef std::ptrdiff_t difference_type;
28 |
29 | static index_iterator begin(DBT &db, const std::string &mbox, const std::string &attr, const std::string &token) {
30 | std::string index_base = document::generate_index_base(db.options(), mbox, attr, token);
31 | std::vector shards(db.get_shards(document::generate_shard_key(db.options(), mbox, attr, token)));
32 | if (shards.size() == 0) {
33 | return end(db, index_base);
34 | }
35 |
36 | return index_iterator(db, index_base, shards);
37 | }
38 | static index_iterator begin(DBT &db, const std::string &mbox, const std::string &attr, const std::string &token,
39 | const std::vector &shards) {
40 | std::string index_base = document::generate_index_base(db.options(), mbox, attr, token);
41 | if (shards.size() == 0) {
42 | return end(db, index_base);
43 | }
44 |
45 | return index_iterator(db, index_base, shards);
46 | }
47 |
48 | static index_iterator end(DBT &db, const std::string &base) {
49 | return index_iterator(db, base);
50 | }
51 | static index_iterator end(DBT &db, const std::string &mbox, const std::string &attr, const std::string &token) {
52 | std::string index_base = document::generate_index_base(db.options(), mbox, attr, token);
53 | return index_iterator(db, index_base);
54 | }
55 |
56 | index_iterator(const index_iterator &src): m_db(src.m_db) {
57 | m_current = src.m_current;
58 | if (src.m_idx_current == src.m_idx_end) {
59 | m_idx_current = m_idx_end = m_current.ids.end();
60 | } else {
61 | typename decltype(src.m_current.ids)::const_iterator sib = src.m_current.ids.begin();
62 | typename decltype(src.m_current.ids)::const_iterator sic = src.m_idx_current;
63 |
64 | auto diff = std::distance(sib, sic);
65 | dprintf("src: %s, diff: %ld\n", src.to_string().c_str(), diff);
66 |
67 | m_idx_current = std::next(m_current.ids.begin(), diff);
68 | m_idx_end = m_current.ids.end();
69 | }
70 |
71 | m_base = src.m_base;
72 | m_shards = src.m_shards;
73 | m_shards_idx = src.m_shards_idx;
74 | }
75 |
76 | self_type &operator++() {
77 | ++m_idx_current;
78 | if (m_idx_current == m_idx_end) {
79 | load_next();
80 | }
81 | return *this;
82 | }
83 |
84 | self_type &rewind_to_index(const id_t &idx) {
85 | size_t rewind_shard = document::generate_shard_number(m_db.options(), idx);
86 | dprintf("rewind: %s, idx: %s, rewind_shard: %ld\n", to_string().c_str(), idx.to_string().c_str(), rewind_shard);
87 |
88 | auto rewind_shard_it = std::lower_bound(m_shards.begin(), m_shards.end(), rewind_shard);
89 | if (rewind_shard_it == m_shards.end()) {
90 | set_shard_index(-1);
91 | dprintf("could not increase iterator: %s\n", to_string().c_str());
92 | return *this;
93 | }
94 |
95 | int rewind_shard_idx = std::distance(m_shards.begin(), rewind_shard_it);
96 | if (rewind_shard_idx != m_shards_idx - 1) {
97 | set_shard_index(rewind_shard_idx);
98 | load_next();
99 | }
100 |
101 | if (m_shards_idx >= 0) {
102 | document_for_index did;
103 | did.indexed_id = idx;
104 |
105 | do {
106 | m_idx_current = std::lower_bound(m_idx_current, m_idx_end, did);
107 | if (m_idx_current == m_idx_end) {
108 | load_next();
109 | if (m_shards_idx < 0)
110 | break;
111 | }
112 |
113 | } while (m_idx_current->indexed_id < idx);
114 | }
115 |
116 | dprintf("increased iterator: %s\n", to_string().c_str());
117 | return *this;
118 | }
119 |
120 | reference operator*() {
121 | return *m_idx_current;
122 | }
123 | pointer operator->() {
124 | return &(*m_idx_current);
125 | }
126 |
127 | error_info document(DBT &db, document *doc) {
128 | std::string doc_data;
129 | auto err = db.read(greylock::options::documents_column, m_idx_current->indexed_id.to_string(), &doc_data);
130 | if (err)
131 | return err;
132 |
133 | deserialize(*doc, doc_data.data(), doc_data.size());
134 | return greylock::error_info();
135 | }
136 |
137 | std::string to_string() const {
138 | auto dump_shards = [&]() -> std::string {
139 | std::ostringstream out;
140 | for (size_t i = 0; i < m_shards.size(); ++i) {
141 | out << m_shards[i];
142 | if (i != m_shards.size() - 1)
143 | out << " ";
144 | }
145 | return out.str();
146 | };
147 | std::ostringstream ss;
148 | ss << "base: " << m_base <<
149 | ", next_shard_idx: " << m_shards_idx <<
150 | ", shards: [" << dump_shards() << "] " <<
151 | ", ids_size: " << m_current.ids.size() <<
152 | ", current_is_end: " << (m_idx_current == m_idx_end) <<
153 | ", indexed_id: " << ((m_idx_current == m_idx_end) ? "none" : m_idx_current->indexed_id.to_string());
154 | return ss.str();
155 | }
156 |
157 | bool operator==(const self_type& rhs) {
158 | if (m_base != rhs.m_base)
159 | return false;
160 | if (m_shards.size() != rhs.m_shards.size())
161 | return false;
162 | if (m_shards != rhs.m_shards)
163 | return false;
164 | if (m_shards_idx != rhs.m_shards_idx)
165 | return false;
166 |
167 | if ((m_idx_current == m_idx_end) && (rhs.m_idx_current == rhs.m_idx_end))
168 | return true;
169 |
170 | if (m_idx_current->indexed_id != rhs.m_idx_current->indexed_id)
171 | return false;
172 |
173 | return true;
174 | }
175 | bool operator!=(const self_type& rhs) {
176 | return !operator==(rhs);
177 | }
178 |
179 | private:
180 | DBT &m_db;
181 | std::string m_base;
182 | std::vector m_shards;
183 | int m_shards_idx = -1;
184 |
185 | index_iterator(DBT &db, const std::string &base): m_db(db), m_base(base) {
186 | }
187 |
188 | index_iterator(DBT &db, const std::string &base, const std::vector shards): m_db(db), m_base(base), m_shards(shards) {
189 | set_shard_index(0);
190 | load_next();
191 | }
192 |
193 | void set_shard_index(int idx) {
194 | m_shards_idx = idx;
195 | if (idx < 0) {
196 | m_shards.clear();
197 |
198 | m_current.ids.clear();
199 | m_idx_current = m_current.ids.begin();
200 | m_idx_end = m_current.ids.end();
201 | }
202 | }
203 |
204 | void load_next() {
205 | do {
206 | load_next_one();
207 | } while (m_shards_idx >= 0 && m_current.ids.empty());
208 | }
209 |
210 | void load_next_one() {
211 | dprintf("loading: %s\n", to_string().c_str());
212 | m_current.ids.clear();
213 | m_idx_current = m_current.ids.begin();
214 | m_idx_end = m_current.ids.end();
215 |
216 | if (m_shards_idx < 0 || m_shards_idx >= (int)m_shards.size()) {
217 | set_shard_index(-1);
218 | return;
219 | }
220 |
221 | std::string key = document::generate_index_key_shard_number(m_base, m_shards[m_shards_idx]);
222 | std::string data;
223 | auto err = m_db.read(greylock::options::indexes_column, key, &data);
224 | if (err) {
225 | set_shard_index(-1);
226 | return;
227 | }
228 |
229 | try {
230 | deserialize(m_current, data.data(), data.size());
231 |
232 | m_idx_current = m_current.ids.begin();
233 | m_idx_end = m_current.ids.end();
234 | } catch (...) {
235 | set_shard_index(-1);
236 | return;
237 | }
238 |
239 | set_shard_index(m_shards_idx + 1);
240 | dprintf("loaded: %s\n", to_string().c_str());
241 | }
242 | };
243 | }} // namespace ioremap::greylock
244 |
--------------------------------------------------------------------------------
/include/greylock/json.hpp:
--------------------------------------------------------------------------------
1 | #ifndef __INDEXES_JSON_HPP
2 | #define __INDEXES_JSON_HPP
3 |
4 | #include
5 |
6 | #include
7 |
8 | namespace ioremap { namespace greylock {
9 |
10 | static inline const char *get_string(const rapidjson::Value &entry, const char *name, const char *def = NULL) {
11 | if (entry.HasMember(name)) {
12 | const rapidjson::Value &v = entry[name];
13 | if (v.IsString()) {
14 | return v.GetString();
15 | }
16 | }
17 |
18 | return def;
19 | }
20 |
21 | static inline int64_t get_int64(const rapidjson::Value &entry, const char *name, int64_t def = -1) {
22 | if (entry.HasMember(name)) {
23 | const rapidjson::Value &v = entry[name];
24 | if (v.IsInt()) {
25 | return v.GetInt();
26 | }
27 | if (v.IsUint()) {
28 | return v.GetUint();
29 | }
30 | if (v.IsInt64()) {
31 | return v.GetInt64();
32 | }
33 | if (v.IsUint()) {
34 | return v.GetUint64();
35 | }
36 | }
37 |
38 | return def;
39 | }
40 |
41 | static inline const rapidjson::Value &get_object(const rapidjson::Value &entry, const char *name,
42 | const rapidjson::Value &def = rapidjson::Value()) {
43 | if (entry.HasMember(name)) {
44 | const rapidjson::Value &v = entry[name];
45 |
46 | if (v.IsObject())
47 | return v;
48 | }
49 |
50 | return def;
51 | }
52 |
53 | static inline const rapidjson::Value &get_array(const rapidjson::Value &entry, const char *name,
54 | const rapidjson::Value &def = rapidjson::Value()) {
55 | if (entry.HasMember(name)) {
56 | const rapidjson::Value &v = entry[name];
57 |
58 | if (v.IsArray())
59 | return v;
60 | }
61 |
62 | return def;
63 | }
64 |
65 | static inline bool get_bool(const rapidjson::Value &entry, const char *name, bool def = true) {
66 | if (entry.HasMember(name)) {
67 | const rapidjson::Value &v = entry[name];
68 |
69 | if (v.IsBool())
70 | return v.GetBool();
71 | }
72 |
73 | return def;
74 | }
75 |
76 | }} // namespace ioremap::greylock
77 |
78 | #endif // __INDEXES_JSON_HPP
79 |
--------------------------------------------------------------------------------
/include/greylock/jsonvalue.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include
4 | #include
5 | #include
6 |
7 | #include
8 |
9 | #include
10 |
11 | namespace ioremap { namespace greylock {
12 |
13 | class JsonValue : public rapidjson::Value
14 | {
15 | public:
16 | JsonValue() {
17 | SetObject();
18 | }
19 |
20 | ~JsonValue() {
21 | }
22 |
23 | static void set_time(rapidjson::Value &obj, rapidjson::Document::AllocatorType &alloc, long tsec, long usec) {
24 | char str[64];
25 | struct tm tm;
26 |
27 | localtime_r((time_t *)&tsec, &tm);
28 | strftime(str, sizeof(str), "%F %Z %R:%S", &tm);
29 |
30 | char time_str[128];
31 | snprintf(time_str, sizeof(time_str), "%s.%06lu", str, usec);
32 |
33 | obj.SetObject();
34 |
35 | rapidjson::Value tobj(time_str, strlen(time_str), alloc);
36 | obj.AddMember("time", tobj, alloc);
37 |
38 | std::string raw_time = std::to_string(tsec) + "." + std::to_string(usec);
39 | rapidjson::Value tobj_raw(raw_time.c_str(), raw_time.size(), alloc);
40 | obj.AddMember("time-raw", tobj_raw, alloc);
41 | }
42 |
43 | std::string ToString() const {
44 | rapidjson::StringBuffer buffer;
45 | rapidjson::PrettyWriter writer(buffer);
46 |
47 | Accept(writer);
48 | buffer.Put('\n');
49 |
50 | return std::string(buffer.GetString(), buffer.Size());
51 | }
52 |
53 | rapidjson::MemoryPoolAllocator<> &GetAllocator() {
54 | return m_allocator;
55 | }
56 |
57 | private:
58 | rapidjson::MemoryPoolAllocator<> m_allocator;
59 | };
60 |
61 |
62 | }} // namespace ioremap::greylock
63 |
--------------------------------------------------------------------------------
/include/greylock/types.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include "greylock/database.hpp"
4 | #include "greylock/json.hpp"
5 | #include "greylock/id.hpp"
6 |
7 | #include
8 |
9 | #include
10 | #include
11 | #include
12 | #include