├── .gitignore
├── Dockerfile
├── DockerfileServer
├── LICENSE
├── README.md
├── api.py
├── cache
    ├── __init__.py
    ├── aio.py
    ├── cluster.py
    └── twemproxy.py
├── digests.py
├── docker-compose-api.yml
├── docker-compose-server.yml
├── docker-compose.yml
├── docker_config.py
├── lib
    ├── add_get_triple.lua
    ├── get_triple.lua
    ├── redis.conf
    └── triple_pattern_search.lua
├── requirements.txt
├── server.py
├── setup.py
└── tests.py


/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | *.swp
3 | *.rdb
4 | *DS_STORE
5 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | #Dockerfile for Linked Data Fragments Base
 2 | FROM python:3.5.1
 3 | MAINTAINER Jeremy Nelson <jermnelson@gmail.com>
 4 | 
 5 | # Set environmental variables
 6 | ENV LDFS_HOME /opt/ldfs
 7 | 
 8 | # Update Ubuntu and install Python 3 setuptools, git and other
 9 | # packages
10 | RUN apt-get update && apt-get install -y && \
11 |   apt-get install -y python3-setuptools &&\
12 |   apt-get install -y git &&\
13 |   apt-get install -y python3-pip
14 | 
15 | 
16 | # Retrieve latest development branch of Linked Data Fragments project on 
17 | # github.com
18 | RUN git clone https://github.com/jermnelson/linked-data-fragments.git $LDFS_HOME \
19 |     && cd $LDFS_HOME \
20 |     && git checkout -b development \
21 |     && git pull origin development \
22 |     && pip3 install -r requirements.txt \
23 |     && touch __init__.py 
24 |    
25 | WORKDIR $LDFS_HOME
26 | CMD ["nohup", "python", "server.py", "&"]
27 | 


--------------------------------------------------------------------------------
/DockerfileServer:
--------------------------------------------------------------------------------
1 | # This Dockerfile run the asynco Linked Data Fragments Server
2 | FROM jermnelson/ldfs-base
3 | MAINTAINER Jeremy Nelson <jermnelson@gmail.com>
4 | 
5 | EXPOSE 7000
6 | WORKDIR $LDFS_HOME
7 | #CMD ['python server.py run']
8 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                     GNU AFFERO GENERAL PUBLIC LICENSE
  2 |                        Version 3, 19 November 2007
  3 | 
  4 |  Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
  5 |  Everyone is permitted to copy and distribute verbatim copies
  6 |  of this license document, but changing it is not allowed.
  7 | 
  8 |                             Preamble
  9 | 
 10 |   The GNU Affero General Public License is a free, copyleft license for
 11 | software and other kinds of works, specifically designed to ensure
 12 | cooperation with the community in the case of network server software.
 13 | 
 14 |   The licenses for most software and other practical works are designed
 15 | to take away your freedom to share and change the works.  By contrast,
 16 | our General Public Licenses are intended to guarantee your freedom to
 17 | share and change all versions of a program--to make sure it remains free
 18 | software for all its users.
 19 | 
 20 |   When we speak of free software, we are referring to freedom, not
 21 | price.  Our General Public Licenses are designed to make sure that you
 22 | have the freedom to distribute copies of free software (and charge for
 23 | them if you wish), that you receive source code or can get it if you
 24 | want it, that you can change the software or use pieces of it in new
 25 | free programs, and that you know you can do these things.
 26 | 
 27 |   Developers that use our General Public Licenses protect your rights
 28 | with two steps: (1) assert copyright on the software, and (2) offer
 29 | you this License which gives you legal permission to copy, distribute
 30 | and/or modify the software.
 31 | 
 32 |   A secondary benefit of defending all users' freedom is that
 33 | improvements made in alternate versions of the program, if they
 34 | receive widespread use, become available for other developers to
 35 | incorporate.  Many developers of free software are heartened and
 36 | encouraged by the resulting cooperation.  However, in the case of
 37 | software used on network servers, this result may fail to come about.
 38 | The GNU General Public License permits making a modified version and
 39 | letting the public access it on a server without ever releasing its
 40 | source code to the public.
 41 | 
 42 |   The GNU Affero General Public License is designed specifically to
 43 | ensure that, in such cases, the modified source code becomes available
 44 | to the community.  It requires the operator of a network server to
 45 | provide the source code of the modified version running there to the
 46 | users of that server.  Therefore, public use of a modified version, on
 47 | a publicly accessible server, gives the public access to the source
 48 | code of the modified version.
 49 | 
 50 |   An older license, called the Affero General Public License and
 51 | published by Affero, was designed to accomplish similar goals.  This is
 52 | a different license, not a version of the Affero GPL, but Affero has
 53 | released a new version of the Affero GPL which permits relicensing under
 54 | this license.
 55 | 
 56 |   The precise terms and conditions for copying, distribution and
 57 | modification follow.
 58 | 
 59 |                        TERMS AND CONDITIONS
 60 | 
 61 |   0. Definitions.
 62 | 
 63 |   "This License" refers to version 3 of the GNU Affero General Public License.
 64 | 
 65 |   "Copyright" also means copyright-like laws that apply to other kinds of
 66 | works, such as semiconductor masks.
 67 | 
 68 |   "The Program" refers to any copyrightable work licensed under this
 69 | License.  Each licensee is addressed as "you".  "Licensees" and
 70 | "recipients" may be individuals or organizations.
 71 | 
 72 |   To "modify" a work means to copy from or adapt all or part of the work
 73 | in a fashion requiring copyright permission, other than the making of an
 74 | exact copy.  The resulting work is called a "modified version" of the
 75 | earlier work or a work "based on" the earlier work.
 76 | 
 77 |   A "covered work" means either the unmodified Program or a work based
 78 | on the Program.
 79 | 
 80 |   To "propagate" a work means to do anything with it that, without
 81 | permission, would make you directly or secondarily liable for
 82 | infringement under applicable copyright law, except executing it on a
 83 | computer or modifying a private copy.  Propagation includes copying,
 84 | distribution (with or without modification), making available to the
 85 | public, and in some countries other activities as well.
 86 | 
 87 |   To "convey" a work means any kind of propagation that enables other
 88 | parties to make or receive copies.  Mere interaction with a user through
 89 | a computer network, with no transfer of a copy, is not conveying.
 90 | 
 91 |   An interactive user interface displays "Appropriate Legal Notices"
 92 | to the extent that it includes a convenient and prominently visible
 93 | feature that (1) displays an appropriate copyright notice, and (2)
 94 | tells the user that there is no warranty for the work (except to the
 95 | extent that warranties are provided), that licensees may convey the
 96 | work under this License, and how to view a copy of this License.  If
 97 | the interface presents a list of user commands or options, such as a
 98 | menu, a prominent item in the list meets this criterion.
 99 | 
100 |   1. Source Code.
101 | 
102 |   The "source code" for a work means the preferred form of the work
103 | for making modifications to it.  "Object code" means any non-source
104 | form of a work.
105 | 
106 |   A "Standard Interface" means an interface that either is an official
107 | standard defined by a recognized standards body, or, in the case of
108 | interfaces specified for a particular programming language, one that
109 | is widely used among developers working in that language.
110 | 
111 |   The "System Libraries" of an executable work include anything, other
112 | than the work as a whole, that (a) is included in the normal form of
113 | packaging a Major Component, but which is not part of that Major
114 | Component, and (b) serves only to enable use of the work with that
115 | Major Component, or to implement a Standard Interface for which an
116 | implementation is available to the public in source code form.  A
117 | "Major Component", in this context, means a major essential component
118 | (kernel, window system, and so on) of the specific operating system
119 | (if any) on which the executable work runs, or a compiler used to
120 | produce the work, or an object code interpreter used to run it.
121 | 
122 |   The "Corresponding Source" for a work in object code form means all
123 | the source code needed to generate, install, and (for an executable
124 | work) run the object code and to modify the work, including scripts to
125 | control those activities.  However, it does not include the work's
126 | System Libraries, or general-purpose tools or generally available free
127 | programs which are used unmodified in performing those activities but
128 | which are not part of the work.  For example, Corresponding Source
129 | includes interface definition files associated with source files for
130 | the work, and the source code for shared libraries and dynamically
131 | linked subprograms that the work is specifically designed to require,
132 | such as by intimate data communication or control flow between those
133 | subprograms and other parts of the work.
134 | 
135 |   The Corresponding Source need not include anything that users
136 | can regenerate automatically from other parts of the Corresponding
137 | Source.
138 | 
139 |   The Corresponding Source for a work in source code form is that
140 | same work.
141 | 
142 |   2. Basic Permissions.
143 | 
144 |   All rights granted under this License are granted for the term of
145 | copyright on the Program, and are irrevocable provided the stated
146 | conditions are met.  This License explicitly affirms your unlimited
147 | permission to run the unmodified Program.  The output from running a
148 | covered work is covered by this License only if the output, given its
149 | content, constitutes a covered work.  This License acknowledges your
150 | rights of fair use or other equivalent, as provided by copyright law.
151 | 
152 |   You may make, run and propagate covered works that you do not
153 | convey, without conditions so long as your license otherwise remains
154 | in force.  You may convey covered works to others for the sole purpose
155 | of having them make modifications exclusively for you, or provide you
156 | with facilities for running those works, provided that you comply with
157 | the terms of this License in conveying all material for which you do
158 | not control copyright.  Those thus making or running the covered works
159 | for you must do so exclusively on your behalf, under your direction
160 | and control, on terms that prohibit them from making any copies of
161 | your copyrighted material outside their relationship with you.
162 | 
163 |   Conveying under any other circumstances is permitted solely under
164 | the conditions stated below.  Sublicensing is not allowed; section 10
165 | makes it unnecessary.
166 | 
167 |   3. Protecting Users' Legal Rights From Anti-Circumvention Law.
168 | 
169 |   No covered work shall be deemed part of an effective technological
170 | measure under any applicable law fulfilling obligations under article
171 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
172 | similar laws prohibiting or restricting circumvention of such
173 | measures.
174 | 
175 |   When you convey a covered work, you waive any legal power to forbid
176 | circumvention of technological measures to the extent such circumvention
177 | is effected by exercising rights under this License with respect to
178 | the covered work, and you disclaim any intention to limit operation or
179 | modification of the work as a means of enforcing, against the work's
180 | users, your or third parties' legal rights to forbid circumvention of
181 | technological measures.
182 | 
183 |   4. Conveying Verbatim Copies.
184 | 
185 |   You may convey verbatim copies of the Program's source code as you
186 | receive it, in any medium, provided that you conspicuously and
187 | appropriately publish on each copy an appropriate copyright notice;
188 | keep intact all notices stating that this License and any
189 | non-permissive terms added in accord with section 7 apply to the code;
190 | keep intact all notices of the absence of any warranty; and give all
191 | recipients a copy of this License along with the Program.
192 | 
193 |   You may charge any price or no price for each copy that you convey,
194 | and you may offer support or warranty protection for a fee.
195 | 
196 |   5. Conveying Modified Source Versions.
197 | 
198 |   You may convey a work based on the Program, or the modifications to
199 | produce it from the Program, in the form of source code under the
200 | terms of section 4, provided that you also meet all of these conditions:
201 | 
202 |     a) The work must carry prominent notices stating that you modified
203 |     it, and giving a relevant date.
204 | 
205 |     b) The work must carry prominent notices stating that it is
206 |     released under this License and any conditions added under section
207 |     7.  This requirement modifies the requirement in section 4 to
208 |     "keep intact all notices".
209 | 
210 |     c) You must license the entire work, as a whole, under this
211 |     License to anyone who comes into possession of a copy.  This
212 |     License will therefore apply, along with any applicable section 7
213 |     additional terms, to the whole of the work, and all its parts,
214 |     regardless of how they are packaged.  This License gives no
215 |     permission to license the work in any other way, but it does not
216 |     invalidate such permission if you have separately received it.
217 | 
218 |     d) If the work has interactive user interfaces, each must display
219 |     Appropriate Legal Notices; however, if the Program has interactive
220 |     interfaces that do not display Appropriate Legal Notices, your
221 |     work need not make them do so.
222 | 
223 |   A compilation of a covered work with other separate and independent
224 | works, which are not by their nature extensions of the covered work,
225 | and which are not combined with it such as to form a larger program,
226 | in or on a volume of a storage or distribution medium, is called an
227 | "aggregate" if the compilation and its resulting copyright are not
228 | used to limit the access or legal rights of the compilation's users
229 | beyond what the individual works permit.  Inclusion of a covered work
230 | in an aggregate does not cause this License to apply to the other
231 | parts of the aggregate.
232 | 
233 |   6. Conveying Non-Source Forms.
234 | 
235 |   You may convey a covered work in object code form under the terms
236 | of sections 4 and 5, provided that you also convey the
237 | machine-readable Corresponding Source under the terms of this License,
238 | in one of these ways:
239 | 
240 |     a) Convey the object code in, or embodied in, a physical product
241 |     (including a physical distribution medium), accompanied by the
242 |     Corresponding Source fixed on a durable physical medium
243 |     customarily used for software interchange.
244 | 
245 |     b) Convey the object code in, or embodied in, a physical product
246 |     (including a physical distribution medium), accompanied by a
247 |     written offer, valid for at least three years and valid for as
248 |     long as you offer spare parts or customer support for that product
249 |     model, to give anyone who possesses the object code either (1) a
250 |     copy of the Corresponding Source for all the software in the
251 |     product that is covered by this License, on a durable physical
252 |     medium customarily used for software interchange, for a price no
253 |     more than your reasonable cost of physically performing this
254 |     conveying of source, or (2) access to copy the
255 |     Corresponding Source from a network server at no charge.
256 | 
257 |     c) Convey individual copies of the object code with a copy of the
258 |     written offer to provide the Corresponding Source.  This
259 |     alternative is allowed only occasionally and noncommercially, and
260 |     only if you received the object code with such an offer, in accord
261 |     with subsection 6b.
262 | 
263 |     d) Convey the object code by offering access from a designated
264 |     place (gratis or for a charge), and offer equivalent access to the
265 |     Corresponding Source in the same way through the same place at no
266 |     further charge.  You need not require recipients to copy the
267 |     Corresponding Source along with the object code.  If the place to
268 |     copy the object code is a network server, the Corresponding Source
269 |     may be on a different server (operated by you or a third party)
270 |     that supports equivalent copying facilities, provided you maintain
271 |     clear directions next to the object code saying where to find the
272 |     Corresponding Source.  Regardless of what server hosts the
273 |     Corresponding Source, you remain obligated to ensure that it is
274 |     available for as long as needed to satisfy these requirements.
275 | 
276 |     e) Convey the object code using peer-to-peer transmission, provided
277 |     you inform other peers where the object code and Corresponding
278 |     Source of the work are being offered to the general public at no
279 |     charge under subsection 6d.
280 | 
281 |   A separable portion of the object code, whose source code is excluded
282 | from the Corresponding Source as a System Library, need not be
283 | included in conveying the object code work.
284 | 
285 |   A "User Product" is either (1) a "consumer product", which means any
286 | tangible personal property which is normally used for personal, family,
287 | or household purposes, or (2) anything designed or sold for incorporation
288 | into a dwelling.  In determining whether a product is a consumer product,
289 | doubtful cases shall be resolved in favor of coverage.  For a particular
290 | product received by a particular user, "normally used" refers to a
291 | typical or common use of that class of product, regardless of the status
292 | of the particular user or of the way in which the particular user
293 | actually uses, or expects or is expected to use, the product.  A product
294 | is a consumer product regardless of whether the product has substantial
295 | commercial, industrial or non-consumer uses, unless such uses represent
296 | the only significant mode of use of the product.
297 | 
298 |   "Installation Information" for a User Product means any methods,
299 | procedures, authorization keys, or other information required to install
300 | and execute modified versions of a covered work in that User Product from
301 | a modified version of its Corresponding Source.  The information must
302 | suffice to ensure that the continued functioning of the modified object
303 | code is in no case prevented or interfered with solely because
304 | modification has been made.
305 | 
306 |   If you convey an object code work under this section in, or with, or
307 | specifically for use in, a User Product, and the conveying occurs as
308 | part of a transaction in which the right of possession and use of the
309 | User Product is transferred to the recipient in perpetuity or for a
310 | fixed term (regardless of how the transaction is characterized), the
311 | Corresponding Source conveyed under this section must be accompanied
312 | by the Installation Information.  But this requirement does not apply
313 | if neither you nor any third party retains the ability to install
314 | modified object code on the User Product (for example, the work has
315 | been installed in ROM).
316 | 
317 |   The requirement to provide Installation Information does not include a
318 | requirement to continue to provide support service, warranty, or updates
319 | for a work that has been modified or installed by the recipient, or for
320 | the User Product in which it has been modified or installed.  Access to a
321 | network may be denied when the modification itself materially and
322 | adversely affects the operation of the network or violates the rules and
323 | protocols for communication across the network.
324 | 
325 |   Corresponding Source conveyed, and Installation Information provided,
326 | in accord with this section must be in a format that is publicly
327 | documented (and with an implementation available to the public in
328 | source code form), and must require no special password or key for
329 | unpacking, reading or copying.
330 | 
331 |   7. Additional Terms.
332 | 
333 |   "Additional permissions" are terms that supplement the terms of this
334 | License by making exceptions from one or more of its conditions.
335 | Additional permissions that are applicable to the entire Program shall
336 | be treated as though they were included in this License, to the extent
337 | that they are valid under applicable law.  If additional permissions
338 | apply only to part of the Program, that part may be used separately
339 | under those permissions, but the entire Program remains governed by
340 | this License without regard to the additional permissions.
341 | 
342 |   When you convey a copy of a covered work, you may at your option
343 | remove any additional permissions from that copy, or from any part of
344 | it.  (Additional permissions may be written to require their own
345 | removal in certain cases when you modify the work.)  You may place
346 | additional permissions on material, added by you to a covered work,
347 | for which you have or can give appropriate copyright permission.
348 | 
349 |   Notwithstanding any other provision of this License, for material you
350 | add to a covered work, you may (if authorized by the copyright holders of
351 | that material) supplement the terms of this License with terms:
352 | 
353 |     a) Disclaiming warranty or limiting liability differently from the
354 |     terms of sections 15 and 16 of this License; or
355 | 
356 |     b) Requiring preservation of specified reasonable legal notices or
357 |     author attributions in that material or in the Appropriate Legal
358 |     Notices displayed by works containing it; or
359 | 
360 |     c) Prohibiting misrepresentation of the origin of that material, or
361 |     requiring that modified versions of such material be marked in
362 |     reasonable ways as different from the original version; or
363 | 
364 |     d) Limiting the use for publicity purposes of names of licensors or
365 |     authors of the material; or
366 | 
367 |     e) Declining to grant rights under trademark law for use of some
368 |     trade names, trademarks, or service marks; or
369 | 
370 |     f) Requiring indemnification of licensors and authors of that
371 |     material by anyone who conveys the material (or modified versions of
372 |     it) with contractual assumptions of liability to the recipient, for
373 |     any liability that these contractual assumptions directly impose on
374 |     those licensors and authors.
375 | 
376 |   All other non-permissive additional terms are considered "further
377 | restrictions" within the meaning of section 10.  If the Program as you
378 | received it, or any part of it, contains a notice stating that it is
379 | governed by this License along with a term that is a further
380 | restriction, you may remove that term.  If a license document contains
381 | a further restriction but permits relicensing or conveying under this
382 | License, you may add to a covered work material governed by the terms
383 | of that license document, provided that the further restriction does
384 | not survive such relicensing or conveying.
385 | 
386 |   If you add terms to a covered work in accord with this section, you
387 | must place, in the relevant source files, a statement of the
388 | additional terms that apply to those files, or a notice indicating
389 | where to find the applicable terms.
390 | 
391 |   Additional terms, permissive or non-permissive, may be stated in the
392 | form of a separately written license, or stated as exceptions;
393 | the above requirements apply either way.
394 | 
395 |   8. Termination.
396 | 
397 |   You may not propagate or modify a covered work except as expressly
398 | provided under this License.  Any attempt otherwise to propagate or
399 | modify it is void, and will automatically terminate your rights under
400 | this License (including any patent licenses granted under the third
401 | paragraph of section 11).
402 | 
403 |   However, if you cease all violation of this License, then your
404 | license from a particular copyright holder is reinstated (a)
405 | provisionally, unless and until the copyright holder explicitly and
406 | finally terminates your license, and (b) permanently, if the copyright
407 | holder fails to notify you of the violation by some reasonable means
408 | prior to 60 days after the cessation.
409 | 
410 |   Moreover, your license from a particular copyright holder is
411 | reinstated permanently if the copyright holder notifies you of the
412 | violation by some reasonable means, this is the first time you have
413 | received notice of violation of this License (for any work) from that
414 | copyright holder, and you cure the violation prior to 30 days after
415 | your receipt of the notice.
416 | 
417 |   Termination of your rights under this section does not terminate the
418 | licenses of parties who have received copies or rights from you under
419 | this License.  If your rights have been terminated and not permanently
420 | reinstated, you do not qualify to receive new licenses for the same
421 | material under section 10.
422 | 
423 |   9. Acceptance Not Required for Having Copies.
424 | 
425 |   You are not required to accept this License in order to receive or
426 | run a copy of the Program.  Ancillary propagation of a covered work
427 | occurring solely as a consequence of using peer-to-peer transmission
428 | to receive a copy likewise does not require acceptance.  However,
429 | nothing other than this License grants you permission to propagate or
430 | modify any covered work.  These actions infringe copyright if you do
431 | not accept this License.  Therefore, by modifying or propagating a
432 | covered work, you indicate your acceptance of this License to do so.
433 | 
434 |   10. Automatic Licensing of Downstream Recipients.
435 | 
436 |   Each time you convey a covered work, the recipient automatically
437 | receives a license from the original licensors, to run, modify and
438 | propagate that work, subject to this License.  You are not responsible
439 | for enforcing compliance by third parties with this License.
440 | 
441 |   An "entity transaction" is a transaction transferring control of an
442 | organization, or substantially all assets of one, or subdividing an
443 | organization, or merging organizations.  If propagation of a covered
444 | work results from an entity transaction, each party to that
445 | transaction who receives a copy of the work also receives whatever
446 | licenses to the work the party's predecessor in interest had or could
447 | give under the previous paragraph, plus a right to possession of the
448 | Corresponding Source of the work from the predecessor in interest, if
449 | the predecessor has it or can get it with reasonable efforts.
450 | 
451 |   You may not impose any further restrictions on the exercise of the
452 | rights granted or affirmed under this License.  For example, you may
453 | not impose a license fee, royalty, or other charge for exercise of
454 | rights granted under this License, and you may not initiate litigation
455 | (including a cross-claim or counterclaim in a lawsuit) alleging that
456 | any patent claim is infringed by making, using, selling, offering for
457 | sale, or importing the Program or any portion of it.
458 | 
459 |   11. Patents.
460 | 
461 |   A "contributor" is a copyright holder who authorizes use under this
462 | License of the Program or a work on which the Program is based.  The
463 | work thus licensed is called the contributor's "contributor version".
464 | 
465 |   A contributor's "essential patent claims" are all patent claims
466 | owned or controlled by the contributor, whether already acquired or
467 | hereafter acquired, that would be infringed by some manner, permitted
468 | by this License, of making, using, or selling its contributor version,
469 | but do not include claims that would be infringed only as a
470 | consequence of further modification of the contributor version.  For
471 | purposes of this definition, "control" includes the right to grant
472 | patent sublicenses in a manner consistent with the requirements of
473 | this License.
474 | 
475 |   Each contributor grants you a non-exclusive, worldwide, royalty-free
476 | patent license under the contributor's essential patent claims, to
477 | make, use, sell, offer for sale, import and otherwise run, modify and
478 | propagate the contents of its contributor version.
479 | 
480 |   In the following three paragraphs, a "patent license" is any express
481 | agreement or commitment, however denominated, not to enforce a patent
482 | (such as an express permission to practice a patent or covenant not to
483 | sue for patent infringement).  To "grant" such a patent license to a
484 | party means to make such an agreement or commitment not to enforce a
485 | patent against the party.
486 | 
487 |   If you convey a covered work, knowingly relying on a patent license,
488 | and the Corresponding Source of the work is not available for anyone
489 | to copy, free of charge and under the terms of this License, through a
490 | publicly available network server or other readily accessible means,
491 | then you must either (1) cause the Corresponding Source to be so
492 | available, or (2) arrange to deprive yourself of the benefit of the
493 | patent license for this particular work, or (3) arrange, in a manner
494 | consistent with the requirements of this License, to extend the patent
495 | license to downstream recipients.  "Knowingly relying" means you have
496 | actual knowledge that, but for the patent license, your conveying the
497 | covered work in a country, or your recipient's use of the covered work
498 | in a country, would infringe one or more identifiable patents in that
499 | country that you have reason to believe are valid.
500 | 
501 |   If, pursuant to or in connection with a single transaction or
502 | arrangement, you convey, or propagate by procuring conveyance of, a
503 | covered work, and grant a patent license to some of the parties
504 | receiving the covered work authorizing them to use, propagate, modify
505 | or convey a specific copy of the covered work, then the patent license
506 | you grant is automatically extended to all recipients of the covered
507 | work and works based on it.
508 | 
509 |   A patent license is "discriminatory" if it does not include within
510 | the scope of its coverage, prohibits the exercise of, or is
511 | conditioned on the non-exercise of one or more of the rights that are
512 | specifically granted under this License.  You may not convey a covered
513 | work if you are a party to an arrangement with a third party that is
514 | in the business of distributing software, under which you make payment
515 | to the third party based on the extent of your activity of conveying
516 | the work, and under which the third party grants, to any of the
517 | parties who would receive the covered work from you, a discriminatory
518 | patent license (a) in connection with copies of the covered work
519 | conveyed by you (or copies made from those copies), or (b) primarily
520 | for and in connection with specific products or compilations that
521 | contain the covered work, unless you entered into that arrangement,
522 | or that patent license was granted, prior to 28 March 2007.
523 | 
524 |   Nothing in this License shall be construed as excluding or limiting
525 | any implied license or other defenses to infringement that may
526 | otherwise be available to you under applicable patent law.
527 | 
528 |   12. No Surrender of Others' Freedom.
529 | 
530 |   If conditions are imposed on you (whether by court order, agreement or
531 | otherwise) that contradict the conditions of this License, they do not
532 | excuse you from the conditions of this License.  If you cannot convey a
533 | covered work so as to satisfy simultaneously your obligations under this
534 | License and any other pertinent obligations, then as a consequence you may
535 | not convey it at all.  For example, if you agree to terms that obligate you
536 | to collect a royalty for further conveying from those to whom you convey
537 | the Program, the only way you could satisfy both those terms and this
538 | License would be to refrain entirely from conveying the Program.
539 | 
540 |   13. Remote Network Interaction; Use with the GNU General Public License.
541 | 
542 |   Notwithstanding any other provision of this License, if you modify the
543 | Program, your modified version must prominently offer all users
544 | interacting with it remotely through a computer network (if your version
545 | supports such interaction) an opportunity to receive the Corresponding
546 | Source of your version by providing access to the Corresponding Source
547 | from a network server at no charge, through some standard or customary
548 | means of facilitating copying of software.  This Corresponding Source
549 | shall include the Corresponding Source for any work covered by version 3
550 | of the GNU General Public License that is incorporated pursuant to the
551 | following paragraph.
552 | 
553 |   Notwithstanding any other provision of this License, you have
554 | permission to link or combine any covered work with a work licensed
555 | under version 3 of the GNU General Public License into a single
556 | combined work, and to convey the resulting work.  The terms of this
557 | License will continue to apply to the part which is the covered work,
558 | but the work with which it is combined will remain governed by version
559 | 3 of the GNU General Public License.
560 | 
561 |   14. Revised Versions of this License.
562 | 
563 |   The Free Software Foundation may publish revised and/or new versions of
564 | the GNU Affero General Public License from time to time.  Such new versions
565 | will be similar in spirit to the present version, but may differ in detail to
566 | address new problems or concerns.
567 | 
568 |   Each version is given a distinguishing version number.  If the
569 | Program specifies that a certain numbered version of the GNU Affero General
570 | Public License "or any later version" applies to it, you have the
571 | option of following the terms and conditions either of that numbered
572 | version or of any later version published by the Free Software
573 | Foundation.  If the Program does not specify a version number of the
574 | GNU Affero General Public License, you may choose any version ever published
575 | by the Free Software Foundation.
576 | 
577 |   If the Program specifies that a proxy can decide which future
578 | versions of the GNU Affero General Public License can be used, that proxy's
579 | public statement of acceptance of a version permanently authorizes you
580 | to choose that version for the Program.
581 | 
582 |   Later license versions may give you additional or different
583 | permissions.  However, no additional obligations are imposed on any
584 | author or copyright holder as a result of your choosing to follow a
585 | later version.
586 | 
587 |   15. Disclaimer of Warranty.
588 | 
589 |   THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
590 | APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
591 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
592 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
593 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
594 | PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
595 | IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
596 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
597 | 
598 |   16. Limitation of Liability.
599 | 
600 |   IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
601 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
602 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
603 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
604 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
605 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
606 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
607 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
608 | SUCH DAMAGES.
609 | 
610 |   17. Interpretation of Sections 15 and 16.
611 | 
612 |   If the disclaimer of warranty and limitation of liability provided
613 | above cannot be given local legal effect according to their terms,
614 | reviewing courts shall apply local law that most closely approximates
615 | an absolute waiver of all civil liability in connection with the
616 | Program, unless a warranty or assumption of liability accompanies a
617 | copy of the Program in return for a fee.
618 | 
619 |                      END OF TERMS AND CONDITIONS
620 | 
621 |             How to Apply These Terms to Your New Programs
622 | 
623 |   If you develop a new program, and you want it to be of the greatest
624 | possible use to the public, the best way to achieve this is to make it
625 | free software which everyone can redistribute and change under these terms.
626 | 
627 |   To do so, attach the following notices to the program.  It is safest
628 | to attach them to the start of each source file to most effectively
629 | state the exclusion of warranty; and each file should have at least
630 | the "copyright" line and a pointer to where the full notice is found.
631 | 
632 |     <one line to give the program's name and a brief idea of what it does.>
633 |     Copyright (C) <year>  <name of author>
634 | 
635 |     This program is free software: you can redistribute it and/or modify
636 |     it under the terms of the GNU Affero General Public License as published
637 |     by the Free Software Foundation, either version 3 of the License, or
638 |     (at your option) any later version.
639 | 
640 |     This program is distributed in the hope that it will be useful,
641 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
642 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
643 |     GNU Affero General Public License for more details.
644 | 
645 |     You should have received a copy of the GNU Affero General Public License
646 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
647 | 
648 | Also add information on how to contact you by electronic and paper mail.
649 | 
650 |   If your software can interact with users remotely through a computer
651 | network, you should also make sure that it provides a way for users to
652 | get its source.  For example, if your program is a web application, its
653 | interface could display a "Source" link that leads users to an archive
654 | of the code.  There are many ways you could offer source, and different
655 | solutions will be better for different programs; see section 13 for the
656 | specific requirements.
657 | 
658 |   You should also get your employer (if you work as a programmer) or school,
659 | if any, to sign a "copyright disclaimer" for the program, if necessary.
660 | For more information on this, and how to apply and follow the GNU AGPL, see
661 | <http://www.gnu.org/licenses/>.
662 | 
663 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # linked-data-fragments
2 | Python Linked Data Fragment server using asyncio and Redis; See
3 | https://docs.google.com/presentation/d/1oCbMKO0iwulkiqgDarfcto9naYVzl6rcCsxukGLVDLk/edit?usp=sharing
4 | for more information.
5 | 


--------------------------------------------------------------------------------
/api.py:
--------------------------------------------------------------------------------
  1 | __author__ = "Jeremy Nelson"
  2 | 
  3 | 
  4 | import digests 
  5 | import falcon
  6 | import hashlib
  7 | import json
  8 | import os
  9 | import rdflib
 10 | import requests
 11 | 
 12 | try:
 13 |     from config import config
 14 | except ImportError:
 15 |     config = {"debug": True,
 16 |               "cache": "Cache",
 17 |               "redis": {"host": "localhost",
 18 | 		        "port": 6379,
 19 | 		        "ttl": 604800},
 20 |               "rest_api": {"host": "localhost",
 21 |                            "port": 18150},
 22 |               # Blazegraph SPARQL Endpoint
 23 |               "triplestore": {"host": "localhost",
 24 |                               "port": 8080,
 25 |                               "path": "bigdata"},
 26 |               
 27 |     }
 28 | 
 29 | if config['cache'].startswith("TwemproxyCache"):
 30 |     from cache.twemproxy import TwemproxyCache
 31 |     CACHE = TwemproxyCache(**config)
 32 | elif config['cache'].startswith("ClusterCache"):
 33 |     from cache.cluster import ClusterCache
 34 |     CACHE = ClusterCache(**config)
 35 | else:
 36 |     from cache import Cache
 37 |     CACHE = Cache(**config)
 38 |     print("CACHE is {}".format(CACHE))
 39 | 
 40 | 
 41 | rest = falcon.API()
 42 | 
 43 | 
 44 | # Hooks
 45 | def triple_key(req, resp, params):
 46 |     if len(params) < 1:
 47 |         params = req.params
 48 |     subj = params.get('s', None)
 49 |     pred = params.get('p', None)
 50 |     obj = params.get('o', None)
 51 |     triple_str, resp.body = None, None
 52 |     print("In triple key {} {} {}".format(subj, pred, obj))
 53 |     if subj and pred and obj:
 54 |         triple_str = CACHE.datastore.evalsha(
 55 |             CACHE.add_get_triple,
 56 |             3,
 57 |             subj,
 58 |             pred,
 59 |             obj)
 60 |         if triple_str and CACHE.datastore.exists(triple_str):
 61 |             triple_key = triple_str.decode()
 62 |             triple_digests = triple_key.split(":")
 63 |             resp.body = json.dumps(
 64 |                 {"key": triple_str.decode(),
 65 |                  "subject_sha1": triple_digests[0],
 66 |                  "predicate_sha1": triple_digests[1],
 67 |                  "object_sha1": triple_digests[2]}
 68 |             )
 69 |         elif triple_str:
 70 |             resp.body = json.dumps(
 71 |                 {"missing-triple-key": triple_str.decode()}
 72 |             )
 73 |         else:
 74 |             raise falcon.HTTPNotFound()
 75 |     output = {"metadata": {"p": "void:triples",
 76 |                            "o": 0 },
 77 |               "data": []}
 78 |     # Subject search
 79 |     if subj and (pred is None or obj is None):
 80 |         print("Before subject key")
 81 |         subject_key = "{}:pred-obj".format(hashlib.sha1(str(subj).encode()).hexdigest())
 82 |         if not pred and not obj:
 83 |             # Retrieve the entire set
 84 |             results = CACHE.datastore.smembers(subject_key)
 85 |         else:
 86 |             if not pred:
 87 |                 pattern = "*:{}".format(hashlib.sha1(str(obj).encode()).hexdigest())
 88 |             else:
 89 |                 pattern = "{}:*".format(hashlib.sha1(str(pred).encode()).hexdigest())
 90 |             cursor, results = CACHE.datastore.sscan(subject_key, 0, match=pattern)
 91 |             while cursor:
 92 |                 cursor, shard_results = CACHE.datastore.sscan(
 93 |                     subject_key,
 94 |                     cursor,
 95 |                     match=pattern)
 96 |                 results.extend(shard_results)
 97 |                 if len(results) >= 100:
 98 |                     ouput["metadata"]["cursor"] = cursor
 99 |                     break
100 |         output["metadata"]["o"] = len(results)
101 |         for triple_key in results:
102 |             triples = triple_key.decode().split(":")
103 |             output["data"].append({"p": CACHE.datastore.get(triples[0]).decode(),
104 |                                    "o": CACHE.datastore.get(triples[-1]).decode(),
105 |                                    "s": subj})
106 |         
107 |     if pred and (subj is None or obj is None) and len(output["data"]) < 1:
108 |         predicate_key = "{}:subj-obj".format(
109 |             hashlib.sha1(str(pred).encode()).hexdigest())
110 |         if not obj and not subj:
111 |             results = CACHE.datastore.smembers(predicate_key)
112 |         else:
113 |             if not obj:
114 |                 pattern = "{}:*".format(
115 |                     hashlib.sha1(str(subj).encode()).hexdigest())
116 |             else:
117 |                 pattern = "*:{}".format(
118 |                     hashlib.sha1(str(obj).encode()).hexdigest())
119 |             cursor, results = CACHE.datastore.sscan(
120 |                 predicate_key,
121 |                 0,
122 |                 match=pattern)
123 |             while cursor:
124 |                 cursor, shard_results = CACHE.datastore.sscan(
125 |                     predicate_key,
126 |                     cursor,
127 |                     match=pattern)
128 |                 results.extend(shard_results)
129 |                 if len(results) >= 100:
130 |                     output["metadata"]["cursor"] = cursor
131 |                     break
132 |         for triple_key in results:
133 |             triples = triple_key.decode().split(":")
134 |             output["data"].append({"p": pred,
135 |                                    "o": CACHE.datastore.get(triples[-1]).decode(),
136 |                                    "s": CACHE.datastore.get(triples[0]).decode()})
137 |     if obj and (subj is None or pred is None) and len(output["data"]) < 1:
138 |         obj_key = "{}:subj-pred".format(
139 |             hashlib.sha1(str(obj).encode()).hexdigest())
140 |         if not subj and not pred:
141 |             results = CACHE.datastore.smembers(obj_key)
142 |         else:
143 |             if not subj:
144 |                 pattern = "*:{}".format(
145 |                     hashlib.sha1(str(pred).encode()).hexdigest())
146 |             else:
147 |                 pattern = "{}:*".format(
148 |                    hashlib.sha1(str(obj).encode()).hexdigest())
149 |             cursor, results = CACHE.datastore.sscan(
150 |                 obj_key,
151 |                 0,
152 |                 match=pattern)
153 |             while cursor:
154 |                 cursor, shard_results = CACHE.datastore.sscan(
155 |                     obj_key,
156 |                     cursor,
157 |                     match=pattern)
158 |                 results.extend(shard_results)
159 |                 if len(results) >= 100:
160 |                     output["metadata"]["cursor"] = cursor
161 |                     break
162 |         for triple_key in results:
163 |             triples = triple_key.decode().split(":")
164 |             output["data"].append({"p": CACHE.datastore.get(triples[-1]).decode(),
165 |                                    "o": obj,
166 |                                    "s": CACHE.datastore.get(triples[0]).decode()})
167 |     resp.body = json.dumps(output)
168 |     
169 | def get_triples(pattern):
170 |     cursor = -1
171 |     output = []
172 |     iterations = 0
173 |     while 1:
174 |         iterations += 1
175 |         if cursor == 0:
176 |             break
177 |         elif cursor < 0:
178 |             cursor = 0
179 |         cursor, resources = CACHE.datastore.scan(
180 | 	    cursor=cursor,
181 | 	    match=pattern,
182 | 	    count=1000)
183 |         cursor = int(cursor)
184 |         if len(resources) > 0:
185 |             output.extend(resources)
186 |     return output
187 | 
188 | 
189 | def get_types(type_uri):
190 |     """Function takes a type uri and returns all triple keys that
191 |     matches that RDF type for that uri.
192 | 
193 |     Args:
194 |         type_uri -- URI to search for
195 | 
196 |     Returns:
197 |         A list of all triples that match the RDF type of the
198 |         type_uri 
199 |     """
200 |     pattern = "*:{}:{}".format(digests.RDF.get(str(rdflib.RDF.type)),
201 |                                digests.get_sha1_digest(type_uri))
202 |     return get_triples(pattern)
203 | 
204 | 
205 | def get_graph(pattern):
206 |     graph = rdflib.Graph()
207 |     transaction = CACHE.datastore.pipeline(transaction=True)
208 |     for key in get_triples(pattern):
209 |         transaction.get(key)
210 |     json_triples = transaction.execute()
211 |         
212 | 
213 | def get_subject_graph(subject):
214 |     """Function takes a subject URI and scans through cache for all
215 |     triples matching the subject
216 | 
217 |     Args:
218 |         subject -- subject URI
219 |  
220 |     Returns:
221 |         rdflib.Graph made up all triples
222 |     """
223 |     
224 |     pattern = "{}:*:*".format(digests.get_sha1_digest(subject))
225 |     transaction = CACHE.datastore.pipeline
226 |     for key in get_triples(pattern): 
227 |         pass 
228 |  
229 | class Triple:
230 | 
231 |     def __init__(self, **kwargs):
232 |         self.triplestore_url = kwargs.get("triplestore_url", None)
233 |         if not self.triplestore_url:
234 |             self.triplestore_url = "{}:{}/{}".format(
235 |                 config.get('triplestore').get('host'),
236 |                 config.get('triplestore').get('port'),
237 |                 config.get('triplestore').get('path'))
238 | 
239 |     @falcon.before(triple_key)
240 |     def on_get(self, req, resp):
241 |         if not resp.body:
242 |             # Should search SPARQL endpoint and add to cache
243 |             # if found
244 |             result = requests.post(self.triplestore_url,
245 |                 data={"query": TRIPLE_SPARQL.format(req.args.get('s'),
246 |                                                     req.args.get('p'),
247 |                                                     req.args.get('o')),
248 |                       "format": "json"})
249 |             if result.status_code < 399:
250 |                 bindings = result.get('results').get('bindings')
251 |                 if len(bindings) > 0:
252 |                     for binding in bindings:
253 |                         print(binding)
254 |                         
255 |             else:        
256 |                 raise falcon.HTTPNotFound()
257 |         resp.status = falcon.HTTP_200        
258 |             
259 |    
260 | #       raise falcon.HTTPInternalServerError(
261 | #            "Failed to retrieve triple key",
262 | #            "Subject={} Predicate={} Object={}".format(req.args.get('s'), 
263 | #                                                       req.args.get('p'),
264 | #                                                       req.args.get('o')))
265 |          
266 |     @falcon.before(triple_key)          
267 |     def on_post(self, req, resp):
268 |         if resp.body:
269 |             if 'missing-triple-key' in resp.body:
270 |                 print(resp.body)
271 |         else:
272 |             raise falcon.HTTPInternalServerError("Error with server", "Could not set triple")    
273 |         resp.status = falcon.HTTP_201     
274 | 
275 | triple = Triple()
276 | rest.add_route("/", triple)
277 | 
278 | if __name__ == '__main__':
279 |     if config.get('debug'):
280 |         from werkzeug.serving import run_simple
281 |         run_simple(
282 |             config.get('rest_api').get('host'),
283 |             config.get('rest_api').get('port'),
284 |             rest,
285 |             use_reloader=True)
286 |     else:
287 |         print("Production mode not support")
288 |     
289 | 


--------------------------------------------------------------------------------
/cache/__init__.py:
--------------------------------------------------------------------------------
  1 | __author__ = "Jeremy Nelson"
  2 | 
  3 | import json
  4 | import hashlib
  5 | import os
  6 | import redis
  7 | 
  8 | # Different strategies for storing triple information in
  9 | # Redis data structures; 
 10 | def hash_pattern(transaction, 
 11 |                  subject_sha1, 
 12 |                  predicate_sha1,
 13 |                  object_sha1):
 14 |     pass
 15 | 
 16 | def string_pattern(transaction, 
 17 |                    subject_sha1, 
 18 |                    predicate_sha1,
 19 |                    object_sha1):
 20 |     """The string pattern is the simplest to implement
 21 |     but slow O(n) performance with KEYS and SCAN"""
 22 |     transaction.set("{}:{}:{}".format(
 23 |         subject_sha1,
 24 |         predicate_sha1,
 25 |         object_sha1),
 26 |         1)
 27 | 
 28 | def set_pattern(transaction,
 29 |                 subject_sha1, 
 30 |                 predicate_sha1,
 31 |                 object_sha1):
 32 |     transaction.sadd("{}:pred-obj".format(subject_sha1),
 33 |                      "{}:{}".format(predicate_sha1,
 34 |                                     object_sha1))
 35 |     transaction.sadd("{}:subj-obj".format(predicate_sha1),
 36 |                      "{}:{}".format(subject_sha1,
 37 |                                     object_sha1))
 38 |     transaction.sadd("{}:subj-pred".format(object_sha1),
 39 |                      "{}:{}".format(subject_sha1,
 40 |                                     predicate_sha1))
 41 | 
 42 | 
 43 | def add_triple(datastore, subject, predicate, object_, pattern="string"):
 44 |     subject_sha1 = hashlib.sha1(subject.encode()).hexdigest()
 45 |     predicate_sha1 = hashlib.sha1(predicate.encode()).hexdigest()
 46 |     object_sha1 = hashlib.sha1(object_.encode()).hexdigest()
 47 |     transaction = datastore.pipeline(transaction=True)
 48 |     transaction.set(subject_sha1, subject)
 49 |     transaction.set(predicate_sha1, predicate)
 50 |     transaction.set(object_sha1, object_)
 51 |     if pattern.startswith("string"):
 52 |         strategy = string_pattern
 53 |     elif pattern.startswith("hash"):
 54 |         strategy = hash_pattern
 55 |     elif pattern.startswith("set"):
 56 |         strategy = set_pattern
 57 |     strategy(transaction,
 58 |              subject_sha1,
 59 |              predicate_sha1,
 60 |              object_sha1)
 61 |     transaction.execute()
 62 | 
 63 | def remove_expired(**kwargs):
 64 |     datastore = kwargs.get("datastore", redis.StrictRedis())
 65 |     strategy= kwargs.get("strategy", "string")
 66 |     database = kwargs.get('db', 0)
 67 |     if strategy.startswith('string'):
 68 |         return
 69 |     expired_key_notification = "__keyevent@{}__:expired"
 70 |     expired_pubsub = datastore.pubsub()
 71 |     expired_pubsub.subscribe(expired_key_notification)
 72 |     for item in expired_pubsub.listen():
 73 |         sha1 = item.get("data")
 74 |         transaction = datastore.pipeline(transaction=True)
 75 |         remove_subject(sha1, transaction, datastore)
 76 |         remove_predicate(sha1, transaction, datastore)
 77 |         remove_object(sha1, transaction, datastore)
 78 |         transaction.execute()
 79 |     
 80 | def remove_object(digest, transaction, datastore=redis.StrictRedis()):
 81 |     object_key = "{}:subj-pred".format(digest)
 82 |     if not datastore.exists(object_key):
 83 |         return
 84 |     for row in datastore.smembers(object_key):
 85 |         subject_digest, predicate_digest = row.split(":")
 86 |         subj_pred_obj = "{}:pred-obj".format(subject_digest)
 87 |         if datastore.exists(subj_pred_obj):
 88 |             transaction.srem(
 89 |                 subj_pred_obj,
 90 |                 "{}:{}".format(predicate_digest, digest))
 91 |         pred_subj_obj = "{}:subj-obj".format(predicate_digest)
 92 |         if datastore.exists(pred_subj_obj):
 93 |             transaction.srem(
 94 |                 pred_subj_obj,
 95 |                 "{}:{}".format(subject_digest, digest))
 96 |     transaction.delete(object_key)
 97 | 
 98 | 
 99 | def remove_predicate(digest, transaction, datastore=redis.StrictRedis()):
100 |     predicate_key = "{}:subj-obj".format(digest)
101 |     if not datastore.exists(predicate_key):
102 |         return
103 |     for row in datastore.smembers(member_key):
104 |         subject_digest, object_digest = row.split(":")
105 |         subj_pred_obj = "{}:pred-obj".format(subject_digest)
106 |         if datastore.exists(subj_pred_obj):
107 |             transaction.srem(
108 |                 subj_pred_obj,
109 |                 "{}:{}".format(digest, object_digest))
110 |         obj_subj_pred = "{}:subj-pred".format(object_digest)
111 |         if datastore.exists(obj_subj_pred):
112 |             transaction.srem(
113 |                 obj_subj_pred,
114 |                 "{}:{}".format(subject_digest, digest))
115 |     transaction.delete(predicate_key)
116 |                 
117 | 
118 | def remove_subject(digest, transaction, datastore=redis.StrictRedis()):
119 |     subject_key = "{}:pred-obj".format(digest)
120 |     if not datastore.exists(subject_key):
121 |         return
122 |     for row in datastore.smembers(subject_key):
123 |         predicate, object_ = row.split(":")
124 |         pred_subj_obj = "{}:subj-obj".format(predicate)
125 |         if datastore.exists(pred_subj_obj):
126 |             transaction.srem(pred_subj_obj,
127 |                              "{}:{}".format(digest, object_))
128 |         obj_subj_pred = "{}:subj-pred".format(object_)
129 |         if datastore.exists(obj_subj_pred):
130 |             transaction.srem(
131 |                 obj_subj_pred,
132 |                 "{}:{}".format(digest, predicate))
133 |     transaction.delete(subject_key)
134 | 
135 | # SPARQL statements
136 | TRIPLE_SPARQL = """SELECT DISTINCT *
137 | WHERE {{{{
138 |   {} {} {} . 
139 | }}}}"""
140 | 
141 | class Cache(object):
142 | 
143 |     def __init__(self, **kwargs):
144 |         self.lua_scripts = dict()
145 |         redis_config = kwargs.get('redis', None) 
146 |         if redis_config:
147 |             self.datastore = redis.StrictRedis(
148 |                 host=redis_config.get('host'),
149 |                 port=redis_config.get('port'))
150 |         else:
151 |             self.datastore = redis.StrictRedis()
152 |         lua_location = kwargs.get('lua_location', None) 
153 |         if not lua_location:
154 |             base_dir = os.path.dirname(os.path.abspath(__name__))
155 |             lua_location = os.path.join(base_dir, "lib")
156 |         for name in ["get_triple", 
157 |                      "add_get_triple",
158 |                      "triple_pattern_search"]:
159 |             filepath = os.path.join(
160 |                 lua_location, "{}.lua".format(name))
161 |             with open(filepath) as fo:
162 |                 lua_script = fo.read()
163 |             sha1 = self.datastore.script_load(lua_script)
164 |             setattr(self, name, sha1)
165 | 
166 |     def __get_sha1__(self, entity):
167 |         return hashlib.sha1(entity.encode()).hexdigest()
168 | 
169 |     def triple_search(self, subject=None, predicate=None, object_=None):
170 |         triple_str = self.datastore.evalsha(
171 |             self.add_get_triple,
172 |             3,
173 |             subject,
174 |             predicate,
175 |             object_)
176 |         if triple_str:
177 |             if self.cache.exists(triple_str):
178 |                 return json.dumps(self.cache.get(triple_str))
179 |             else:
180 |                 return {"result": "Missing Triple Key {}".format(triple_str)}
181 |         
182 | 
183 | 


--------------------------------------------------------------------------------
/cache/aio.py:
--------------------------------------------------------------------------------
 1 | __author__ = "Jeremy Nelson, Aaron Coburn, Mark Matienzo"
 2 | 
 3 | import asyncio
 4 | import aioredis
 5 | import hashlib
 6 | import os
 7 | import redis
 8 | try:
 9 |     import config
10 | except ImportError:
11 |     config = {"redis": {"host": "localhost",
12 |                         "port": 6379,
13 |                         "ttl": 604800
14 |                         }}
15 | 
16 | LUA_SCRIPTS ={}
17 | BASE_DIR = os.path.dirname(os.path.abspath(__name__))
18 | LUA_LOCATION = os.path.join(BASE_DIR, "lib")
19 | DATASTORE = redis.StrictRedis(host=config.get("redis")["host"],
20 |                               port=config.get("redis")["port"])
21 | for name in ["add_get_triple", 
22 | 	     "get_triple",
23 | 	     "triple_pattern_search"]:
24 |     filepath = os.path.join(
25 | 	LUA_LOCATION, "{}.lua".format(name))
26 |     with open(filepath) as fo:
27 |         lua_script = fo.read()
28 |     sha1 = DATASTORE.script_load(lua_script)
29 |     LUA_SCRIPTS[name] = sha1
30 | 
31 | @asyncio.coroutine
32 | def get_digest(value):
33 |     """Get digest takes either an URI/URL or a Literal value and 
34 |     calls the SHA1 for the add_get_hash.lua script.
35 | 
36 |     Args:
37 |        value -- URI/URL or Literal value
38 |     """
39 |     if not value:
40 |         return None
41 |     connection = yield from aioredis.create_connection(
42 |        (config.get("redis")["host"], 
43 |         config.get("redis")["port"]),
44 |        encoding='utf-8')
45 |     sha1_digest = yield from connection.execute(
46 |         b'EVALSHA',
47 |         LUA_SCRIPTS['add_get_hash'], 
48 |         1, 
49 |         value,
50 |         config.get("redis").get('ttl'))
51 |     connection.close()
52 |     return sha1_digest
53 |     
54 | 
55 | @asyncio.coroutine
56 | def get_value(digest):
57 |     connection = yield from aioredis.create_redis(
58 |        (config.get("redis")["host"], 
59 |         config.get("redis")["port"]),
60 |        encoding='utf-8')
61 |     value = yield from connection.get(digest)
62 |     connection.close()
63 |     return value
64 | 
65 | @asyncio.coroutine
66 | def get_triple(subject_key, predicate_key, object_key):
67 |     connection = yield from aioredis.create_redis(
68 |        (config.get("redis")["host"], 
69 |         config.get("redis")["port"]))
70 | 
71 |     pattern = str()
72 |     for key in [subject_key, predicate_key, object_key]:
73 |         if key is None:
74 |             pattern += "*:"
75 |         else:
76 |             pattern += "{}:".format(key)
77 |     pattern = pattern[:-1]
78 |     cur = b'0'
79 |     results = yield from connection.keys(pattern)
80 |     results = []
81 |     while cur:
82 |         cur, keys = yield from connection.scan(cur, 
83 |             match=pattern, 
84 |             count=1000)
85 |         if len(keys) > 0:
86 |             results.extend(keys)
87 |     connection.close()
88 |     return results
89 | 


--------------------------------------------------------------------------------
/cache/cluster.py:
--------------------------------------------------------------------------------
 1 | __author__ = "Jeremy Nelson"
 2 | 
 3 | import hashlib
 4 | from rediscluster import StrictRedisCluster
 5 | from . import Cache
 6 | 
 7 | class ClusterCache(Cache):
 8 | 
 9 |     def __init__(self, **kwargs):
10 |         startup_nodes = kwargs.get("startup_nodes")
11 |         if not startup_nodes:
12 |             startup_nodes = [{"port": 30001, "host": "0.0.0.0"},
13 |                              {"port": 30002, "host": "0.0.0.0"}
14 |         self.cache = StrictRedisCluster(startup_nodes)
15 | 
16 | 
17 |     def __get_sha1__(self, value):
18 |         return hashlib.sha1(value).hexdigest() 
19 | 
20 |     def triple_search(self, subject, predicate, object_):
21 |         triple_str = "{}:{}:{}".format(
22 |             self.__get_sha1__(subject), 
23 |             self.__get_sha1__(predicate),
24 |             self.__get_sha1__(object_))
25 | 


--------------------------------------------------------------------------------
/cache/twemproxy.py:
--------------------------------------------------------------------------------
 1 | __author__ = "Jeremy Nelson"
 2 | 
 3 | import redis
 4 | import socket
 5 | from . import Cache
 6 | 
 7 | class TwemproxyCache(Cache):
 8 | 
 9 |     def __init__(self):
10 |         pass
11 | 
12 |     def triple_search(self, 
13 |             subject=None, 
14 |             predicate=None, 
15 |             object_=None):
16 |         pass
17 | 


--------------------------------------------------------------------------------
/digests.py:
--------------------------------------------------------------------------------
 1 | """Module contains commonly used SHA1 digests in the Linked Data Fragments
 2 | Data Store and helper functions"""
 3 | 
 4 | __author__ = "Jeremy Nelson"
 5 | 
 6 | import hashlib
 7 | import rdflib
 8 | 
 9 | def get_sha1_digest(value):
10 |     """Function takes a unicode string and returns it's sha1 digest
11 | 
12 |     Args:
13 |        value -- Unicode string
14 | 
15 |     Returns:
16 |        sha1 of value
17 |     """
18 |     return hashlib.sha1(value.encode()).hexdigest()
19 | 
20 | OWL = {
21 |     "http://www.w3.org/2002/07/owl#sameAs": "7bffe77e6f9af628763e215707119bc2dbc9b927"
22 | }
23 | 
24 | RDF = {
25 |     "http://www.w3.org/1999/02/22-rdf-syntax-ns#type": "3c197cb1f6842dc41aa48dc8b9032284bcf39a27"
26 | }
27 | 
28 | RDFS = {
29 |     "http://www.w3.org/2000/01/rdf-schema#label": "9ac796fdb3c1f82ad26a447b600262114a19983b"
30 | }
31 | 
32 | 


--------------------------------------------------------------------------------
/docker-compose-api.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jermnelson/linked-data-fragments/74fed07cdbfd7af17bdc21c90d4928ead4116687/docker-compose-api.yml


--------------------------------------------------------------------------------
/docker-compose-server.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jermnelson/linked-data-fragments/74fed07cdbfd7af17bdc21c90d4928ead4116687/docker-compose-server.yml


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | ldfs:
 2 |   build: .
 3 |   ports:
 4 |   - 7000:7000
 5 |   links:
 6 |   - redis
 7 |   - semanticServer
 8 | redis:
 9 |   image: redis
10 |   ports:
11 |   - 6379:6379
12 |   volumes:
13 |   - /opt/ldfs/data:/data
14 | semanticServer:
15 |   image: jermnelson/semantic-server-core:dev
16 |   ports:
17 |   - 8080:8080
18 |   - 9999:9999
19 |   volumes:
20 |   - /opt/bibcat_repository/fedora-data:/usr/share/fedora-data
21 |   - /opt/bibcat_triplestore:/usr/share/blazegraph
22 | 
23 | 
24 | 


--------------------------------------------------------------------------------
/docker_config.py:
--------------------------------------------------------------------------------
 1 | config = {"debug": True,
 2 |       "cache": "Cache",
 3 |       "redis": {"host": "redis",
 4 | 		"port": 6379,
 5 | 		"ttl": 604800},
 6 |       "rest_api": {"host": "localhost",
 7 | 		   "port": 18150},
 8 |       # Blazegraph SPARQL Endpoint
 9 |       "triplestore": {"host": "semantic_server",
10 | 		      "port": 8080,
11 | 		      "path": "bigdata"}
12 | }
13 | 


--------------------------------------------------------------------------------
/lib/add_get_triple.lua:
--------------------------------------------------------------------------------
 1 | --[[
 2 | add_get_triple
 3 | 
 4 | This script is licensed under the GNU Affero version 3. 
 5 | Copyrighted 2015 by Jeremy Nelson <jermnelson@gmail.com>
 6 | --]]
 7 | local function add(value) 
 8 |   local digest = redis.sha1hex(value) 
 9 |   redis.pcall('setnx', digest, value)
10 |   return digest
11 | end
12 | 
13 | local function add_string(subject_digest, predicate_digest, object_digest)
14 |   local triple_key = subject_digest..":"..predicate_digest..":"..object_digest
15 |   local triple_body = 1
16 |   if ARGV[1] then
17 |      triple_body = ARGV[1]
18 |   end
19 |   redis.pcall('setnx', triple_key, triple_body)
20 | end
21 | 
22 | local function add_hash(subject_digest, predicate_digest, object_digest)
23 |   local subject_key = subject_digest..":pred-obj"
24 |   redis.pcall('hset', subject_key, predicate_digest..":"..object_digest, 1)
25 |   local predicate_key = predicate_digest..":subj-obj"
26 |   redis.pcall('hset', predicate_key, subject_digest..":"..object_digest, 1)
27 |   local object_key = object_digest..":subj-pred"
28 |   redis.pcall('hset', object_key, subject_digest..":"..predicate_digest, 1)
29 | end
30 | 
31 | local function add_set(subject_digest, predicate_digest, object_digest)
32 |   local subject_key = subject_digest..":pred-obj"
33 |   redis.pcall('sadd', subject_key, predicate_digest..":"..object_digest)
34 |   local predicate_key = predicate_digest..":subj-obj"
35 |   redis.pcall('sadd', predicate_key, subject_digest..":"..object_digest)
36 |   local object_key = object_digest..":subj-pred"
37 |   redis.pcall('sadd', object_key, subject_digest..":"..predicate_digest)
38 | end
39 | 
40 | local subject_sha1 = add(KEYS[1])
41 | local predicate_sha1 = add(KEYS[2])
42 | local object_sha1 = add(KEYS[3])
43 | if KEYS[4] then
44 |   if KEYS[4] == "hash" then
45 |     add_hash(subject_sha1, predicate_sha1, object_sha1)
46 |   elseif KEYS[4] == "set" then
47 |     add_set(subject_sha1, predicate_sha1, object_sha1)
48 |   else
49 |     add_string(subject_sha1, predicate_sha1, object_sha1)
50 |   end
51 | else
52 |   add_set(subject_sha1, predicate_sha1, object_sha1)
53 | end
54 | return true
55 | 


--------------------------------------------------------------------------------
/lib/get_triple.lua:
--------------------------------------------------------------------------------
 1 | --[[
 2 | get_triple_search
 3 | 
 4 | This script is licensed under the GNU Affero version 3. Copyrighted
 5 | 2015 by Jeremy Nelson <jermnelson@gmail.com>
 6 | --]]
 7 | local subject_sha1, predicate_sha1, object_sha1 = split(KEYS[1], ":")
 8 | local output = '[{"@id": "'
 9 | output = output..redis.pcall('get', subject_sha1_)..'",'
10 | output = output..redis.pcall('get', predicate_sha1)..'":[{'
11 | local object = redis.pcall('get', object_sha1)
12 | if string.sub(object,1,string.len("http")) == 'http' then
13 |     output = output..'"@id": "'
14 | else
15 |    output =  output..'"@value": "'
16 | end
17 | output = output..'"'..object..'"}]}]'
18 | return output
19 | 


--------------------------------------------------------------------------------
/lib/redis.conf:
--------------------------------------------------------------------------------
  1 | # Redis configuration file example
  2 | 
  3 | # Note on units: when memory size is needed, it is possible to specify
  4 | # it in the usual form of 1k 5GB 4M and so forth:
  5 | #
  6 | # 1k => 1000 bytes
  7 | # 1kb => 1024 bytes
  8 | # 1m => 1000000 bytes
  9 | # 1mb => 1024*1024 bytes
 10 | # 1g => 1000000000 bytes
 11 | # 1gb => 1024*1024*1024 bytes
 12 | #
 13 | # units are case insensitive so 1GB 1Gb 1gB are all the same.
 14 | 
 15 | ################################## INCLUDES ###################################
 16 | 
 17 | # Include one or more other config files here.  This is useful if you
 18 | # have a standard template that goes to all Redis servers but also need
 19 | # to customize a few per-server settings.  Include files can include
 20 | # other files, so use this wisely.
 21 | #
 22 | # Notice option "include" won't be rewritten by command "CONFIG REWRITE"
 23 | # from admin or Redis Sentinel. Since Redis always uses the last processed
 24 | # line as value of a configuration directive, you'd better put includes
 25 | # at the beginning of this file to avoid overwriting config change at runtime.
 26 | #
 27 | # If instead you are interested in using includes to override configuration
 28 | # options, it is better to use include as the last line.
 29 | #
 30 | # include /path/to/local.conf
 31 | # include /path/to/other.conf
 32 | 
 33 | ################################ GENERAL  #####################################
 34 | 
 35 | # By default Redis does not run as a daemon. Use 'yes' if you need it.
 36 | # Note that Redis will write a pid file in /var/run/redis.pid when daemonized.
 37 | daemonize no
 38 | 
 39 | # When running daemonized, Redis writes a pid file in /var/run/redis.pid by
 40 | # default. You can specify a custom pid file location here.
 41 | pidfile /var/run/redis.pid
 42 | 
 43 | # Accept connections on the specified port, default is 6379.
 44 | # If port 0 is specified Redis will not listen on a TCP socket.
 45 | port 6379
 46 | 
 47 | # TCP listen() backlog.
 48 | #
 49 | # In high requests-per-second environments you need an high backlog in order
 50 | # to avoid slow clients connections issues. Note that the Linux kernel
 51 | # will silently truncate it to the value of /proc/sys/net/core/somaxconn so
 52 | # make sure to raise both the value of somaxconn and tcp_max_syn_backlog
 53 | # in order to get the desired effect.
 54 | tcp-backlog 511
 55 | 
 56 | # By default Redis listens for connections from all the network interfaces
 57 | # available on the server. It is possible to listen to just one or multiple
 58 | # interfaces using the "bind" configuration directive, followed by one or
 59 | # more IP addresses.
 60 | #
 61 | # Examples:
 62 | #
 63 | # bind 192.168.1.100 10.0.0.1
 64 | # bind 127.0.0.1
 65 | 
 66 | # Specify the path for the Unix socket that will be used to listen for
 67 | # incoming connections. There is no default, so Redis will not listen
 68 | # on a unix socket when not specified.
 69 | #
 70 | # unixsocket /tmp/redis.sock
 71 | # unixsocketperm 700
 72 | 
 73 | # Close the connection after a client is idle for N seconds (0 to disable)
 74 | timeout 0
 75 | 
 76 | # TCP keepalive.
 77 | #
 78 | # If non-zero, use SO_KEEPALIVE to send TCP ACKs to clients in absence
 79 | # of communication. This is useful for two reasons:
 80 | #
 81 | # 1) Detect dead peers.
 82 | # 2) Take the connection alive from the point of view of network
 83 | #    equipment in the middle.
 84 | #
 85 | # On Linux, the specified value (in seconds) is the period used to send ACKs.
 86 | # Note that to close the connection the double of the time is needed.
 87 | # On other kernels the period depends on the kernel configuration.
 88 | #
 89 | # A reasonable value for this option is 60 seconds.
 90 | tcp-keepalive 0
 91 | 
 92 | # Specify the server verbosity level.
 93 | # This can be one of:
 94 | # debug (a lot of information, useful for development/testing)
 95 | # verbose (many rarely useful info, but not a mess like the debug level)
 96 | # notice (moderately verbose, what you want in production probably)
 97 | # warning (only very important / critical messages are logged)
 98 | loglevel notice
 99 | 
100 | # Specify the log file name. Also the empty string can be used to force
101 | # Redis to log on the standard output. Note that if you use standard
102 | # output for logging but daemonize, logs will be sent to /dev/null
103 | logfile ""
104 | 
105 | # To enable logging to the system logger, just set 'syslog-enabled' to yes,
106 | # and optionally update the other syslog parameters to suit your needs.
107 | # syslog-enabled no
108 | 
109 | # Specify the syslog identity.
110 | # syslog-ident redis
111 | 
112 | # Specify the syslog facility. Must be USER or between LOCAL0-LOCAL7.
113 | # syslog-facility local0
114 | 
115 | # Set the number of databases. The default database is DB 0, you can select
116 | # a different one on a per-connection basis using SELECT <dbid> where
117 | # dbid is a number between 0 and 'databases'-1
118 | databases 16
119 | 
120 | ################################ SNAPSHOTTING  ################################
121 | #
122 | # Save the DB on disk:
123 | #
124 | #   save <seconds> <changes>
125 | #
126 | #   Will save the DB if both the given number of seconds and the given
127 | #   number of write operations against the DB occurred.
128 | #
129 | #   In the example below the behaviour will be to save:
130 | #   after 900 sec (15 min) if at least 1 key changed
131 | #   after 300 sec (5 min) if at least 10 keys changed
132 | #   after 60 sec if at least 10000 keys changed
133 | #
134 | #   Note: you can disable saving completely by commenting out all "save" lines.
135 | #
136 | #   It is also possible to remove all the previously configured save
137 | #   points by adding a save directive with a single empty string argument
138 | #   like in the following example:
139 | #
140 | #   save ""
141 | 
142 | save 900 1
143 | save 300 10
144 | save 60 10000
145 | 
146 | # By default Redis will stop accepting writes if RDB snapshots are enabled
147 | # (at least one save point) and the latest background save failed.
148 | # This will make the user aware (in a hard way) that data is not persisting
149 | # on disk properly, otherwise chances are that no one will notice and some
150 | # disaster will happen.
151 | #
152 | # If the background saving process will start working again Redis will
153 | # automatically allow writes again.
154 | #
155 | # However if you have setup your proper monitoring of the Redis server
156 | # and persistence, you may want to disable this feature so that Redis will
157 | # continue to work as usual even if there are problems with disk,
158 | # permissions, and so forth.
159 | stop-writes-on-bgsave-error yes
160 | 
161 | # Compress string objects using LZF when dump .rdb databases?
162 | # For default that's set to 'yes' as it's almost always a win.
163 | # If you want to save some CPU in the saving child set it to 'no' but
164 | # the dataset will likely be bigger if you have compressible values or keys.
165 | rdbcompression yes
166 | 
167 | # Since version 5 of RDB a CRC64 checksum is placed at the end of the file.
168 | # This makes the format more resistant to corruption but there is a performance
169 | # hit to pay (around 10%) when saving and loading RDB files, so you can disable it
170 | # for maximum performances.
171 | #
172 | # RDB files created with checksum disabled have a checksum of zero that will
173 | # tell the loading code to skip the check.
174 | rdbchecksum yes
175 | 
176 | # The filename where to dump the DB
177 | dbfilename cache.rdb
178 | 
179 | # The working directory.
180 | #
181 | # The DB will be written inside this directory, with the filename specified
182 | # above using the 'dbfilename' configuration directive.
183 | #
184 | # The Append Only File will also be created inside this directory.
185 | #
186 | # Note that you must specify a directory here, not a file name.
187 | dir ./
188 | 
189 | ################################# REPLICATION #################################
190 | 
191 | # Master-Slave replication. Use slaveof to make a Redis instance a copy of
192 | # another Redis server. A few things to understand ASAP about Redis replication.
193 | #
194 | # 1) Redis replication is asynchronous, but you can configure a master to
195 | #    stop accepting writes if it appears to be not connected with at least
196 | #    a given number of slaves.
197 | # 2) Redis slaves are able to perform a partial resynchronization with the
198 | #    master if the replication link is lost for a relatively small amount of
199 | #    time. You may want to configure the replication backlog size (see the next
200 | #    sections of this file) with a sensible value depending on your needs.
201 | # 3) Replication is automatic and does not need user intervention. After a
202 | #    network partition slaves automatically try to reconnect to masters
203 | #    and resynchronize with them.
204 | #
205 | # slaveof <masterip> <masterport>
206 | 
207 | # If the master is password protected (using the "requirepass" configuration
208 | # directive below) it is possible to tell the slave to authenticate before
209 | # starting the replication synchronization process, otherwise the master will
210 | # refuse the slave request.
211 | #
212 | # masterauth <master-password>
213 | 
214 | # When a slave loses its connection with the master, or when the replication
215 | # is still in progress, the slave can act in two different ways:
216 | #
217 | # 1) if slave-serve-stale-data is set to 'yes' (the default) the slave will
218 | #    still reply to client requests, possibly with out of date data, or the
219 | #    data set may just be empty if this is the first synchronization.
220 | #
221 | # 2) if slave-serve-stale-data is set to 'no' the slave will reply with
222 | #    an error "SYNC with master in progress" to all the kind of commands
223 | #    but to INFO and SLAVEOF.
224 | #
225 | slave-serve-stale-data yes
226 | 
227 | # You can configure a slave instance to accept writes or not. Writing against
228 | # a slave instance may be useful to store some ephemeral data (because data
229 | # written on a slave will be easily deleted after resync with the master) but
230 | # may also cause problems if clients are writing to it because of a
231 | # misconfiguration.
232 | #
233 | # Since Redis 2.6 by default slaves are read-only.
234 | #
235 | # Note: read only slaves are not designed to be exposed to untrusted clients
236 | # on the internet. It's just a protection layer against misuse of the instance.
237 | # Still a read only slave exports by default all the administrative commands
238 | # such as CONFIG, DEBUG, and so forth. To a limited extent you can improve
239 | # security of read only slaves using 'rename-command' to shadow all the
240 | # administrative / dangerous commands.
241 | slave-read-only yes
242 | 
243 | # Replication SYNC strategy: disk or socket.
244 | #
245 | # -------------------------------------------------------
246 | # WARNING: DISKLESS REPLICATION IS EXPERIMENTAL CURRENTLY
247 | # -------------------------------------------------------
248 | #
249 | # New slaves and reconnecting slaves that are not able to continue the replication
250 | # process just receiving differences, need to do what is called a "full
251 | # synchronization". An RDB file is transmitted from the master to the slaves.
252 | # The transmission can happen in two different ways:
253 | #
254 | # 1) Disk-backed: The Redis master creates a new process that writes the RDB
255 | #                 file on disk. Later the file is transferred by the parent
256 | #                 process to the slaves incrementally.
257 | # 2) Diskless: The Redis master creates a new process that directly writes the
258 | #              RDB file to slave sockets, without touching the disk at all.
259 | #
260 | # With disk-backed replication, while the RDB file is generated, more slaves
261 | # can be queued and served with the RDB file as soon as the current child producing
262 | # the RDB file finishes its work. With diskless replication instead once
263 | # the transfer starts, new slaves arriving will be queued and a new transfer
264 | # will start when the current one terminates.
265 | #
266 | # When diskless replication is used, the master waits a configurable amount of
267 | # time (in seconds) before starting the transfer in the hope that multiple slaves
268 | # will arrive and the transfer can be parallelized.
269 | #
270 | # With slow disks and fast (large bandwidth) networks, diskless replication
271 | # works better.
272 | repl-diskless-sync no
273 | 
274 | # When diskless replication is enabled, it is possible to configure the delay
275 | # the server waits in order to spawn the child that transfers the RDB via socket
276 | # to the slaves.
277 | #
278 | # This is important since once the transfer starts, it is not possible to serve
279 | # new slaves arriving, that will be queued for the next RDB transfer, so the server
280 | # waits a delay in order to let more slaves arrive.
281 | #
282 | # The delay is specified in seconds, and by default is 5 seconds. To disable
283 | # it entirely just set it to 0 seconds and the transfer will start ASAP.
284 | repl-diskless-sync-delay 5
285 | 
286 | # Slaves send PINGs to server in a predefined interval. It's possible to change
287 | # this interval with the repl_ping_slave_period option. The default value is 10
288 | # seconds.
289 | #
290 | # repl-ping-slave-period 10
291 | 
292 | # The following option sets the replication timeout for:
293 | #
294 | # 1) Bulk transfer I/O during SYNC, from the point of view of slave.
295 | # 2) Master timeout from the point of view of slaves (data, pings).
296 | # 3) Slave timeout from the point of view of masters (REPLCONF ACK pings).
297 | #
298 | # It is important to make sure that this value is greater than the value
299 | # specified for repl-ping-slave-period otherwise a timeout will be detected
300 | # every time there is low traffic between the master and the slave.
301 | #
302 | # repl-timeout 60
303 | 
304 | # Disable TCP_NODELAY on the slave socket after SYNC?
305 | #
306 | # If you select "yes" Redis will use a smaller number of TCP packets and
307 | # less bandwidth to send data to slaves. But this can add a delay for
308 | # the data to appear on the slave side, up to 40 milliseconds with
309 | # Linux kernels using a default configuration.
310 | #
311 | # If you select "no" the delay for data to appear on the slave side will
312 | # be reduced but more bandwidth will be used for replication.
313 | #
314 | # By default we optimize for low latency, but in very high traffic conditions
315 | # or when the master and slaves are many hops away, turning this to "yes" may
316 | # be a good idea.
317 | repl-disable-tcp-nodelay no
318 | 
319 | # Set the replication backlog size. The backlog is a buffer that accumulates
320 | # slave data when slaves are disconnected for some time, so that when a slave
321 | # wants to reconnect again, often a full resync is not needed, but a partial
322 | # resync is enough, just passing the portion of data the slave missed while
323 | # disconnected.
324 | #
325 | # The bigger the replication backlog, the longer the time the slave can be
326 | # disconnected and later be able to perform a partial resynchronization.
327 | #
328 | # The backlog is only allocated once there is at least a slave connected.
329 | #
330 | # repl-backlog-size 1mb
331 | 
332 | # After a master has no longer connected slaves for some time, the backlog
333 | # will be freed. The following option configures the amount of seconds that
334 | # need to elapse, starting from the time the last slave disconnected, for
335 | # the backlog buffer to be freed.
336 | #
337 | # A value of 0 means to never release the backlog.
338 | #
339 | # repl-backlog-ttl 3600
340 | 
341 | # The slave priority is an integer number published by Redis in the INFO output.
342 | # It is used by Redis Sentinel in order to select a slave to promote into a
343 | # master if the master is no longer working correctly.
344 | #
345 | # A slave with a low priority number is considered better for promotion, so
346 | # for instance if there are three slaves with priority 10, 100, 25 Sentinel will
347 | # pick the one with priority 10, that is the lowest.
348 | #
349 | # However a special priority of 0 marks the slave as not able to perform the
350 | # role of master, so a slave with priority of 0 will never be selected by
351 | # Redis Sentinel for promotion.
352 | #
353 | # By default the priority is 100.
354 | slave-priority 100
355 | 
356 | # It is possible for a master to stop accepting writes if there are less than
357 | # N slaves connected, having a lag less or equal than M seconds.
358 | #
359 | # The N slaves need to be in "online" state.
360 | #
361 | # The lag in seconds, that must be <= the specified value, is calculated from
362 | # the last ping received from the slave, that is usually sent every second.
363 | #
364 | # This option does not GUARANTEE that N replicas will accept the write, but
365 | # will limit the window of exposure for lost writes in case not enough slaves
366 | # are available, to the specified number of seconds.
367 | #
368 | # For example to require at least 3 slaves with a lag <= 10 seconds use:
369 | #
370 | # min-slaves-to-write 3
371 | # min-slaves-max-lag 10
372 | #
373 | # Setting one or the other to 0 disables the feature.
374 | #
375 | # By default min-slaves-to-write is set to 0 (feature disabled) and
376 | # min-slaves-max-lag is set to 10.
377 | 
378 | ################################## SECURITY ###################################
379 | 
380 | # Require clients to issue AUTH <PASSWORD> before processing any other
381 | # commands.  This might be useful in environments in which you do not trust
382 | # others with access to the host running redis-server.
383 | #
384 | # This should stay commented out for backward compatibility and because most
385 | # people do not need auth (e.g. they run their own servers).
386 | #
387 | # Warning: since Redis is pretty fast an outside user can try up to
388 | # 150k passwords per second against a good box. This means that you should
389 | # use a very strong password otherwise it will be very easy to break.
390 | #
391 | # requirepass foobared
392 | 
393 | # Command renaming.
394 | #
395 | # It is possible to change the name of dangerous commands in a shared
396 | # environment. For instance the CONFIG command may be renamed into something
397 | # hard to guess so that it will still be available for internal-use tools
398 | # but not available for general clients.
399 | #
400 | # Example:
401 | #
402 | # rename-command CONFIG b840fc02d524045429941cc15f59e41cb7be6c52
403 | #
404 | # It is also possible to completely kill a command by renaming it into
405 | # an empty string:
406 | #
407 | # rename-command CONFIG ""
408 | #
409 | # Please note that changing the name of commands that are logged into the
410 | # AOF file or transmitted to slaves may cause problems.
411 | 
412 | ################################### LIMITS ####################################
413 | 
414 | # Set the max number of connected clients at the same time. By default
415 | # this limit is set to 10000 clients, however if the Redis server is not
416 | # able to configure the process file limit to allow for the specified limit
417 | # the max number of allowed clients is set to the current file limit
418 | # minus 32 (as Redis reserves a few file descriptors for internal uses).
419 | #
420 | # Once the limit is reached Redis will close all the new connections sending
421 | # an error 'max number of clients reached'.
422 | #
423 | # maxclients 10000
424 | 
425 | # Don't use more memory than the specified amount of bytes.
426 | # When the memory limit is reached Redis will try to remove keys
427 | # according to the eviction policy selected (see maxmemory-policy).
428 | #
429 | # If Redis can't remove keys according to the policy, or if the policy is
430 | # set to 'noeviction', Redis will start to reply with errors to commands
431 | # that would use more memory, like SET, LPUSH, and so on, and will continue
432 | # to reply to read-only commands like GET.
433 | #
434 | # This option is usually useful when using Redis as an LRU cache, or to set
435 | # a hard memory limit for an instance (using the 'noeviction' policy).
436 | #
437 | # WARNING: If you have slaves attached to an instance with maxmemory on,
438 | # the size of the output buffers needed to feed the slaves are subtracted
439 | # from the used memory count, so that network problems / resyncs will
440 | # not trigger a loop where keys are evicted, and in turn the output
441 | # buffer of slaves is full with DELs of keys evicted triggering the deletion
442 | # of more keys, and so forth until the database is completely emptied.
443 | #
444 | # In short... if you have slaves attached it is suggested that you set a lower
445 | # limit for maxmemory so that there is some free RAM on the system for slave
446 | # output buffers (but this is not needed if the policy is 'noeviction').
447 | #
448 | # maxmemory <bytes>
449 | 
450 | # MAXMEMORY POLICY: how Redis will select what to remove when maxmemory
451 | # is reached. You can select among five behaviors:
452 | #
453 | # volatile-lru -> remove the key with an expire set using an LRU algorithm
454 | # allkeys-lru -> remove any key according to the LRU algorithm
455 | # volatile-random -> remove a random key with an expire set
456 | # allkeys-random -> remove a random key, any key
457 | # volatile-ttl -> remove the key with the nearest expire time (minor TTL)
458 | # noeviction -> don't expire at all, just return an error on write operations
459 | #
460 | # Note: with any of the above policies, Redis will return an error on write
461 | #       operations, when there are no suitable keys for eviction.
462 | #
463 | #       At the date of writing these commands are: set setnx setex append
464 | #       incr decr rpush lpush rpushx lpushx linsert lset rpoplpush sadd
465 | #       sinter sinterstore sunion sunionstore sdiff sdiffstore zadd zincrby
466 | #       zunionstore zinterstore hset hsetnx hmset hincrby incrby decrby
467 | #       getset mset msetnx exec sort
468 | #
469 | # The default is:
470 | #
471 | maxmemory-policy volatile-lru
472 | 
473 | # LRU and minimal TTL algorithms are not precise algorithms but approximated
474 | # algorithms (in order to save memory), so you can tune it for speed or
475 | # accuracy. For default Redis will check five keys and pick the one that was
476 | # used less recently, you can change the sample size using the following
477 | # configuration directive.
478 | #
479 | # The default of 5 produces good enough results. 10 Approximates very closely
480 | # true LRU but costs a bit more CPU. 3 is very fast but not very accurate.
481 | #
482 | # maxmemory-samples 5
483 | 
484 | ############################## APPEND ONLY MODE ###############################
485 | 
486 | # By default Redis asynchronously dumps the dataset on disk. This mode is
487 | # good enough in many applications, but an issue with the Redis process or
488 | # a power outage may result into a few minutes of writes lost (depending on
489 | # the configured save points).
490 | #
491 | # The Append Only File is an alternative persistence mode that provides
492 | # much better durability. For instance using the default data fsync policy
493 | # (see later in the config file) Redis can lose just one second of writes in a
494 | # dramatic event like a server power outage, or a single write if something
495 | # wrong with the Redis process itself happens, but the operating system is
496 | # still running correctly.
497 | #
498 | # AOF and RDB persistence can be enabled at the same time without problems.
499 | # If the AOF is enabled on startup Redis will load the AOF, that is the file
500 | # with the better durability guarantees.
501 | #
502 | # Please check http://redis.io/topics/persistence for more information.
503 | 
504 | appendonly no
505 | 
506 | # The name of the append only file (default: "appendonly.aof")
507 | 
508 | appendfilename "appendonly.aof"
509 | 
510 | # The fsync() call tells the Operating System to actually write data on disk
511 | # instead of waiting for more data in the output buffer. Some OS will really flush
512 | # data on disk, some other OS will just try to do it ASAP.
513 | #
514 | # Redis supports three different modes:
515 | #
516 | # no: don't fsync, just let the OS flush the data when it wants. Faster.
517 | # always: fsync after every write to the append only log. Slow, Safest.
518 | # everysec: fsync only one time every second. Compromise.
519 | #
520 | # The default is "everysec", as that's usually the right compromise between
521 | # speed and data safety. It's up to you to understand if you can relax this to
522 | # "no" that will let the operating system flush the output buffer when
523 | # it wants, for better performances (but if you can live with the idea of
524 | # some data loss consider the default persistence mode that's snapshotting),
525 | # or on the contrary, use "always" that's very slow but a bit safer than
526 | # everysec.
527 | #
528 | # More details please check the following article:
529 | # http://antirez.com/post/redis-persistence-demystified.html
530 | #
531 | # If unsure, use "everysec".
532 | 
533 | # appendfsync always
534 | appendfsync everysec
535 | # appendfsync no
536 | 
537 | # When the AOF fsync policy is set to always or everysec, and a background
538 | # saving process (a background save or AOF log background rewriting) is
539 | # performing a lot of I/O against the disk, in some Linux configurations
540 | # Redis may block too long on the fsync() call. Note that there is no fix for
541 | # this currently, as even performing fsync in a different thread will block
542 | # our synchronous write(2) call.
543 | #
544 | # In order to mitigate this problem it's possible to use the following option
545 | # that will prevent fsync() from being called in the main process while a
546 | # BGSAVE or BGREWRITEAOF is in progress.
547 | #
548 | # This means that while another child is saving, the durability of Redis is
549 | # the same as "appendfsync none". In practical terms, this means that it is
550 | # possible to lose up to 30 seconds of log in the worst scenario (with the
551 | # default Linux settings).
552 | #
553 | # If you have latency problems turn this to "yes". Otherwise leave it as
554 | # "no" that is the safest pick from the point of view of durability.
555 | 
556 | no-appendfsync-on-rewrite no
557 | 
558 | # Automatic rewrite of the append only file.
559 | # Redis is able to automatically rewrite the log file implicitly calling
560 | # BGREWRITEAOF when the AOF log size grows by the specified percentage.
561 | #
562 | # This is how it works: Redis remembers the size of the AOF file after the
563 | # latest rewrite (if no rewrite has happened since the restart, the size of
564 | # the AOF at startup is used).
565 | #
566 | # This base size is compared to the current size. If the current size is
567 | # bigger than the specified percentage, the rewrite is triggered. Also
568 | # you need to specify a minimal size for the AOF file to be rewritten, this
569 | # is useful to avoid rewriting the AOF file even if the percentage increase
570 | # is reached but it is still pretty small.
571 | #
572 | # Specify a percentage of zero in order to disable the automatic AOF
573 | # rewrite feature.
574 | 
575 | auto-aof-rewrite-percentage 100
576 | auto-aof-rewrite-min-size 64mb
577 | 
578 | # An AOF file may be found to be truncated at the end during the Redis
579 | # startup process, when the AOF data gets loaded back into memory.
580 | # This may happen when the system where Redis is running
581 | # crashes, especially when an ext4 filesystem is mounted without the
582 | # data=ordered option (however this can't happen when Redis itself
583 | # crashes or aborts but the operating system still works correctly).
584 | #
585 | # Redis can either exit with an error when this happens, or load as much
586 | # data as possible (the default now) and start if the AOF file is found
587 | # to be truncated at the end. The following option controls this behavior.
588 | #
589 | # If aof-load-truncated is set to yes, a truncated AOF file is loaded and
590 | # the Redis server starts emitting a log to inform the user of the event.
591 | # Otherwise if the option is set to no, the server aborts with an error
592 | # and refuses to start. When the option is set to no, the user requires
593 | # to fix the AOF file using the "redis-check-aof" utility before to restart
594 | # the server.
595 | #
596 | # Note that if the AOF file will be found to be corrupted in the middle
597 | # the server will still exit with an error. This option only applies when
598 | # Redis will try to read more data from the AOF file but not enough bytes
599 | # will be found.
600 | aof-load-truncated yes
601 | 
602 | ################################ LUA SCRIPTING  ###############################
603 | 
604 | # Max execution time of a Lua script in milliseconds.
605 | #
606 | # If the maximum execution time is reached Redis will log that a script is
607 | # still in execution after the maximum allowed time and will start to
608 | # reply to queries with an error.
609 | #
610 | # When a long running script exceeds the maximum execution time only the
611 | # SCRIPT KILL and SHUTDOWN NOSAVE commands are available. The first can be
612 | # used to stop a script that did not yet called write commands. The second
613 | # is the only way to shut down the server in the case a write command was
614 | # already issued by the script but the user doesn't want to wait for the natural
615 | # termination of the script.
616 | #
617 | # Set it to 0 or a negative value for unlimited execution without warnings.
618 | lua-time-limit 5000
619 | 
620 | ################################ REDIS CLUSTER  ###############################
621 | #
622 | # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
623 | # WARNING EXPERIMENTAL: Redis Cluster is considered to be stable code, however
624 | # in order to mark it as "mature" we need to wait for a non trivial percentage
625 | # of users to deploy it in production.
626 | # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
627 | #
628 | # Normal Redis instances can't be part of a Redis Cluster; only nodes that are
629 | # started as cluster nodes can. In order to start a Redis instance as a
630 | # cluster node enable the cluster support uncommenting the following:
631 | #
632 | # cluster-enabled yes
633 | 
634 | # Every cluster node has a cluster configuration file. This file is not
635 | # intended to be edited by hand. It is created and updated by Redis nodes.
636 | # Every Redis Cluster node requires a different cluster configuration file.
637 | # Make sure that instances running in the same system do not have
638 | # overlapping cluster configuration file names.
639 | #
640 | # cluster-config-file nodes-6379.conf
641 | 
642 | # Cluster node timeout is the amount of milliseconds a node must be unreachable
643 | # for it to be considered in failure state.
644 | # Most other internal time limits are multiple of the node timeout.
645 | #
646 | # cluster-node-timeout 15000
647 | 
648 | # A slave of a failing master will avoid to start a failover if its data
649 | # looks too old.
650 | #
651 | # There is no simple way for a slave to actually have a exact measure of
652 | # its "data age", so the following two checks are performed:
653 | #
654 | # 1) If there are multiple slaves able to failover, they exchange messages
655 | #    in order to try to give an advantage to the slave with the best
656 | #    replication offset (more data from the master processed).
657 | #    Slaves will try to get their rank by offset, and apply to the start
658 | #    of the failover a delay proportional to their rank.
659 | #
660 | # 2) Every single slave computes the time of the last interaction with
661 | #    its master. This can be the last ping or command received (if the master
662 | #    is still in the "connected" state), or the time that elapsed since the
663 | #    disconnection with the master (if the replication link is currently down).
664 | #    If the last interaction is too old, the slave will not try to failover
665 | #    at all.
666 | #
667 | # The point "2" can be tuned by user. Specifically a slave will not perform
668 | # the failover if, since the last interaction with the master, the time
669 | # elapsed is greater than:
670 | #
671 | #   (node-timeout * slave-validity-factor) + repl-ping-slave-period
672 | #
673 | # So for example if node-timeout is 30 seconds, and the slave-validity-factor
674 | # is 10, and assuming a default repl-ping-slave-period of 10 seconds, the
675 | # slave will not try to failover if it was not able to talk with the master
676 | # for longer than 310 seconds.
677 | #
678 | # A large slave-validity-factor may allow slaves with too old data to failover
679 | # a master, while a too small value may prevent the cluster from being able to
680 | # elect a slave at all.
681 | #
682 | # For maximum availability, it is possible to set the slave-validity-factor
683 | # to a value of 0, which means, that slaves will always try to failover the
684 | # master regardless of the last time they interacted with the master.
685 | # (However they'll always try to apply a delay proportional to their
686 | # offset rank).
687 | #
688 | # Zero is the only value able to guarantee that when all the partitions heal
689 | # the cluster will always be able to continue.
690 | #
691 | # cluster-slave-validity-factor 10
692 | 
693 | # Cluster slaves are able to migrate to orphaned masters, that are masters
694 | # that are left without working slaves. This improves the cluster ability
695 | # to resist to failures as otherwise an orphaned master can't be failed over
696 | # in case of failure if it has no working slaves.
697 | #
698 | # Slaves migrate to orphaned masters only if there are still at least a
699 | # given number of other working slaves for their old master. This number
700 | # is the "migration barrier". A migration barrier of 1 means that a slave
701 | # will migrate only if there is at least 1 other working slave for its master
702 | # and so forth. It usually reflects the number of slaves you want for every
703 | # master in your cluster.
704 | #
705 | # Default is 1 (slaves migrate only if their masters remain with at least
706 | # one slave). To disable migration just set it to a very large value.
707 | # A value of 0 can be set but is useful only for debugging and dangerous
708 | # in production.
709 | #
710 | # cluster-migration-barrier 1
711 | 
712 | # By default Redis Cluster nodes stop accepting queries if they detect there
713 | # is at least an hash slot uncovered (no available node is serving it).
714 | # This way if the cluster is partially down (for example a range of hash slots
715 | # are no longer covered) all the cluster becomes, eventually, unavailable.
716 | # It automatically returns available as soon as all the slots are covered again.
717 | #
718 | # However sometimes you want the subset of the cluster which is working,
719 | # to continue to accept queries for the part of the key space that is still
720 | # covered. In order to do so, just set the cluster-require-full-coverage
721 | # option to no.
722 | #
723 | # cluster-require-full-coverage yes
724 | 
725 | # In order to setup your cluster make sure to read the documentation
726 | # available at http://redis.io web site.
727 | 
728 | ################################## SLOW LOG ###################################
729 | 
730 | # The Redis Slow Log is a system to log queries that exceeded a specified
731 | # execution time. The execution time does not include the I/O operations
732 | # like talking with the client, sending the reply and so forth,
733 | # but just the time needed to actually execute the command (this is the only
734 | # stage of command execution where the thread is blocked and can not serve
735 | # other requests in the meantime).
736 | #
737 | # You can configure the slow log with two parameters: one tells Redis
738 | # what is the execution time, in microseconds, to exceed in order for the
739 | # command to get logged, and the other parameter is the length of the
740 | # slow log. When a new command is logged the oldest one is removed from the
741 | # queue of logged commands.
742 | 
743 | # The following time is expressed in microseconds, so 1000000 is equivalent
744 | # to one second. Note that a negative number disables the slow log, while
745 | # a value of zero forces the logging of every command.
746 | slowlog-log-slower-than 10000
747 | 
748 | # There is no limit to this length. Just be aware that it will consume memory.
749 | # You can reclaim memory used by the slow log with SLOWLOG RESET.
750 | slowlog-max-len 128
751 | 
752 | ################################ LATENCY MONITOR ##############################
753 | 
754 | # The Redis latency monitoring subsystem samples different operations
755 | # at runtime in order to collect data related to possible sources of
756 | # latency of a Redis instance.
757 | #
758 | # Via the LATENCY command this information is available to the user that can
759 | # print graphs and obtain reports.
760 | #
761 | # The system only logs operations that were performed in a time equal or
762 | # greater than the amount of milliseconds specified via the
763 | # latency-monitor-threshold configuration directive. When its value is set
764 | # to zero, the latency monitor is turned off.
765 | #
766 | # By default latency monitoring is disabled since it is mostly not needed
767 | # if you don't have latency issues, and collecting data has a performance
768 | # impact, that while very small, can be measured under big load. Latency
769 | # monitoring can easily be enabled at runtime using the command
770 | # "CONFIG SET latency-monitor-threshold <milliseconds>" if needed.
771 | latency-monitor-threshold 0
772 | 
773 | ############################# EVENT NOTIFICATION ##############################
774 | 
775 | # Redis can notify Pub/Sub clients about events happening in the key space.
776 | # This feature is documented at http://redis.io/topics/notifications
777 | #
778 | # For instance if keyspace events notification is enabled, and a client
779 | # performs a DEL operation on key "foo" stored in the Database 0, two
780 | # messages will be published via Pub/Sub:
781 | #
782 | # PUBLISH __keyspace@0__:foo del
783 | # PUBLISH __keyevent@0__:del foo
784 | #
785 | # It is possible to select the events that Redis will notify among a set
786 | # of classes. Every class is identified by a single character:
787 | #
788 | #  K     Keyspace events, published with __keyspace@<db>__ prefix.
789 | #  E     Keyevent events, published with __keyevent@<db>__ prefix.
790 | #  g     Generic commands (non-type specific) like DEL, EXPIRE, RENAME, ...
791 | #  $     String commands
792 | #  l     List commands
793 | #  s     Set commands
794 | #  h     Hash commands
795 | #  z     Sorted set commands
796 | #  x     Expired events (events generated every time a key expires)
797 | #  e     Evicted events (events generated when a key is evicted for maxmemory)
798 | #  A     Alias for g$lshzxe, so that the "AKE" string means all the events.
799 | #
800 | #  The "notify-keyspace-events" takes as argument a string that is composed
801 | #  of zero or multiple characters. The empty string means that notifications
802 | #  are disabled.
803 | #
804 | #  Example: to enable list and generic events, from the point of view of the
805 | #           event name, use:
806 | #
807 | #  notify-keyspace-events Elg
808 | #
809 | #  Example 2: to get the stream of the expired keys subscribing to channel
810 | #             name __keyevent@0__:expired use:
811 | #
812 | #  notify-keyspace-events Ex
813 | #
814 | #  By default all notifications are disabled because most users don't need
815 | #  this feature and the feature has some overhead. Note that if you don't
816 | #  specify at least one of K or E, no events will be delivered.
817 | notify-keyspace-events ""
818 | 
819 | ############################### ADVANCED CONFIG ###############################
820 | 
821 | # Hashes are encoded using a memory efficient data structure when they have a
822 | # small number of entries, and the biggest entry does not exceed a given
823 | # threshold. These thresholds can be configured using the following directives.
824 | hash-max-ziplist-entries 512
825 | hash-max-ziplist-value 64
826 | 
827 | # Similarly to hashes, small lists are also encoded in a special way in order
828 | # to save a lot of space. The special representation is only used when
829 | # you are under the following limits:
830 | list-max-ziplist-entries 512
831 | list-max-ziplist-value 64
832 | 
833 | # Sets have a special encoding in just one case: when a set is composed
834 | # of just strings that happen to be integers in radix 10 in the range
835 | # of 64 bit signed integers.
836 | # The following configuration setting sets the limit in the size of the
837 | # set in order to use this special memory saving encoding.
838 | set-max-intset-entries 512
839 | 
840 | # Similarly to hashes and lists, sorted sets are also specially encoded in
841 | # order to save a lot of space. This encoding is only used when the length and
842 | # elements of a sorted set are below the following limits:
843 | zset-max-ziplist-entries 128
844 | zset-max-ziplist-value 64
845 | 
846 | # HyperLogLog sparse representation bytes limit. The limit includes the
847 | # 16 bytes header. When an HyperLogLog using the sparse representation crosses
848 | # this limit, it is converted into the dense representation.
849 | #
850 | # A value greater than 16000 is totally useless, since at that point the
851 | # dense representation is more memory efficient.
852 | #
853 | # The suggested value is ~ 3000 in order to have the benefits of
854 | # the space efficient encoding without slowing down too much PFADD,
855 | # which is O(N) with the sparse encoding. The value can be raised to
856 | # ~ 10000 when CPU is not a concern, but space is, and the data set is
857 | # composed of many HyperLogLogs with cardinality in the 0 - 15000 range.
858 | hll-sparse-max-bytes 3000
859 | 
860 | # Active rehashing uses 1 millisecond every 100 milliseconds of CPU time in
861 | # order to help rehashing the main Redis hash table (the one mapping top-level
862 | # keys to values). The hash table implementation Redis uses (see dict.c)
863 | # performs a lazy rehashing: the more operation you run into a hash table
864 | # that is rehashing, the more rehashing "steps" are performed, so if the
865 | # server is idle the rehashing is never complete and some more memory is used
866 | # by the hash table.
867 | #
868 | # The default is to use this millisecond 10 times every second in order to
869 | # actively rehash the main dictionaries, freeing memory when possible.
870 | #
871 | # If unsure:
872 | # use "activerehashing no" if you have hard latency requirements and it is
873 | # not a good thing in your environment that Redis can reply from time to time
874 | # to queries with 2 milliseconds delay.
875 | #
876 | # use "activerehashing yes" if you don't have such hard requirements but
877 | # want to free memory asap when possible.
878 | activerehashing yes
879 | 
880 | # The client output buffer limits can be used to force disconnection of clients
881 | # that are not reading data from the server fast enough for some reason (a
882 | # common reason is that a Pub/Sub client can't consume messages as fast as the
883 | # publisher can produce them).
884 | #
885 | # The limit can be set differently for the three different classes of clients:
886 | #
887 | # normal -> normal clients including MONITOR clients
888 | # slave  -> slave clients
889 | # pubsub -> clients subscribed to at least one pubsub channel or pattern
890 | #
891 | # The syntax of every client-output-buffer-limit directive is the following:
892 | #
893 | # client-output-buffer-limit <class> <hard limit> <soft limit> <soft seconds>
894 | #
895 | # A client is immediately disconnected once the hard limit is reached, or if
896 | # the soft limit is reached and remains reached for the specified number of
897 | # seconds (continuously).
898 | # So for instance if the hard limit is 32 megabytes and the soft limit is
899 | # 16 megabytes / 10 seconds, the client will get disconnected immediately
900 | # if the size of the output buffers reach 32 megabytes, but will also get
901 | # disconnected if the client reaches 16 megabytes and continuously overcomes
902 | # the limit for 10 seconds.
903 | #
904 | # By default normal clients are not limited because they don't receive data
905 | # without asking (in a push way), but just after a request, so only
906 | # asynchronous clients may create a scenario where data is requested faster
907 | # than it can read.
908 | #
909 | # Instead there is a default limit for pubsub and slave clients, since
910 | # subscribers and slaves receive data in a push fashion.
911 | #
912 | # Both the hard or the soft limit can be disabled by setting them to zero.
913 | client-output-buffer-limit normal 0 0 0
914 | client-output-buffer-limit slave 256mb 64mb 60
915 | client-output-buffer-limit pubsub 32mb 8mb 60
916 | 
917 | # Redis calls an internal function to perform many background tasks, like
918 | # closing connections of clients in timeout, purging expired keys that are
919 | # never requested, and so forth.
920 | #
921 | # Not all tasks are performed with the same frequency, but Redis checks for
922 | # tasks to perform according to the specified "hz" value.
923 | #
924 | # By default "hz" is set to 10. Raising the value will use more CPU when
925 | # Redis is idle, but at the same time will make Redis more responsive when
926 | # there are many keys expiring at the same time, and timeouts may be
927 | # handled with more precision.
928 | #
929 | # The range is between 1 and 500, however a value over 100 is usually not
930 | # a good idea. Most users should use the default of 10 and raise this up to
931 | # 100 only in environments where very low latency is required.
932 | hz 10
933 | 
934 | # When a child rewrites the AOF file, if the following option is enabled
935 | # the file will be fsync-ed every 32 MB of data generated. This is useful
936 | # in order to commit the file to the disk more incrementally and avoid
937 | # big latency spikes.
938 | aof-rewrite-incremental-fsync yes
939 | 


--------------------------------------------------------------------------------
/lib/triple_pattern_search.lua:
--------------------------------------------------------------------------------
 1 | --[[
 2 | triple_pattern_search.
 3 | 
 4 | This script is licensed under the GNU Affero version 3. Copyrighted
 5 | 2015 by Jeremy Nelson <jermnelson@gmail.com>
 6 | --]]
 7 | --[[if redis.pcall("exists", KEYS[1]) then
 8 |   local subject, predicate, object = string.match("^(%a+):(%a+):(%a+)")
 9 |   output
10 | --]]  
11 | local output = {}
12 | local cursor = ARGV[1]
13 | if not cursor then
14 |   cursor = 0
15 | end
16 | cursor, result = redis.pcall("scan", cursor, "match="..KEYS[1], "count=100") 
17 | for i,key_digest in ipairs(result) do
18 |   --[[Should preprocess result from get call to support namespaces --]] 
19 |   output[i] = redis.pcall("get", key_digest)
20 | end
21 | return output
22 | 
23 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | aioredis
2 | aiohttp
3 | falcon
4 | flask
5 | rdflib
6 | redis
7 | requests
8 | 


--------------------------------------------------------------------------------
/server.py:
--------------------------------------------------------------------------------
  1 | __author__ = "Jeremy Nelson, Aaron Coburn, Mark Matienzo"
  2 | 
  3 | import argparse
  4 | import asyncio
  5 | from aiohttp import web
  6 | import cache.aio as cache
  7 | import json
  8 | import rdflib
  9 | import shlex
 10 | 
 11 | try:
 12 |     from config import config
 13 | except ImportError:
 14 |     config = {"debug": True,
 15 |               "cache": "Cache",
 16 |               "host": "0.0.0.0",
 17 |               "port": 7000,
 18 |               "redis": {"host": "localhost",
 19 | 		        "port": 6379,
 20 | 		        "ttl": 604800},
 21 |              # Blazegraph SPARQL Endpoint
 22 |               "triplestore": {"host": "localhost",
 23 |                               "port": 8080,
 24 |                               "path": "bigdata"},
 25 |              
 26 |               
 27 |     }
 28 | 
 29 | 
 30 | @asyncio.coroutine
 31 | def check_add(resource):
 32 |     """Coroutine attempts to retrieve an URL or Literal
 33 |     value from cache,
 34 |     if not present in cache, attempts to retrieve the sha1
 35 |     hashed value from the cache, otherwise adds the subject
 36 |     to the cache with the serialized value. 
 37 | 
 38 |     Args:
 39 |         value -- Subject value
 40 |     """
 41 |     rHash = cache.add_get_key(resouce)
 42 |     return rHash 
 43 | 
 44 | @asyncio.coroutine
 45 | def handle_triple(request):
 46 |     if request.method.startswith('POST'):
 47 |         data = request.POST
 48 |     elif request.method.startswith('GET'):
 49 |         data = request.GET
 50 |     else:
 51 |         data = {}
 52 |     subject_key = yield from cache.get_digest(data.get('s'))
 53 |     predicate_key = yield from cache.get_digest(data.get('p'))
 54 |     object_key = yield from cache.get_digest(data.get('o'))
 55 |     result = yield from cache.get_triple(subject_key, predicate_key, object_key)
 56 |     output = {"subject": data.get('s'),
 57 |               "predicate-objects": []}
 58 | 
 59 |     for triple_key in result:
 60 |         triples = triple_key.decode().split(":")
 61 |         predicate = yield from cache.get_value(triples[1]) 
 62 |         object_ = yield from cache.get_value(triples[-1])
 63 |         output["predicate-objects"].append(
 64 |             {"p": predicate,
 65 |              "o": object_})
 66 |     return web.Response(body=json.dumps(output).encode(),
 67 |                         content_type="application/json")
 68 | 
 69 | 
 70 | @asyncio.coroutine
 71 | def init_http_server(loop):
 72 |     app = web.Application(loop=loop)
 73 |     app.router.add_route('GET', '/', handle_triple)
 74 |     server = yield from loop.create_server(app.make_handler(),
 75 |                                            config.get('host'), 
 76 |                                            config.get('port'))
 77 |     if config.get('debug'):
 78 |         print("Running HTTP Server at {} {}".format(config.get('host'), 
 79 |                                                     config.get('port')))
 80 |     return server
 81 |  
 82 |                                       
 83 | @asyncio.coroutine
 84 | def init_socket_server(loop):
 85 |     server = yield from loop.create_server(LinkedDataFragmentsServer, 
 86 |                                            config.get('port'), 
 87 |                                            config.get(7000))
 88 |     if config.get('debug'):
 89 |         print("Running Socket Server at {} {}".format(config.get('port'), 
 90 |                                                       config.get(7000)))
 91 |     return server
 92 | 
 93 |     
 94 | @asyncio.coroutine
 95 | def sparql_subject(value):
 96 |     return "Need SPARQL query"
 97 | 
 98 | class LinkedDataFragmentsServer(asyncio.Protocol):
 99 | 
100 |     def connection_made(self, transport):
101 |         """Method 
102 |         Args:
103 |             transport -- ?
104 |         """
105 |         self.transport = transport
106 |         #print("transport type={} methods={}".format(type(self.transport), dir(self.transport)))
107 |       
108 |     def data_received(self, data):
109 |         """Method receives incoming HTTP request data
110 | 
111 |         Args:
112 |             data -- ?
113 |         """
114 |         print(data, type(data))
115 |         self.transport.write("{}".format("Response").encode())
116 |         self.transport.close()
117 | 
118 | 
119 | if __name__ == '__main__':
120 |     parser = argparse.ArgumentParser()
121 |     parser.add_argument(
122 |         'action',
123 |         choices=['socket', 'http'],
124 |         default='http',
125 |         help='Run server as either: socket, http, default is http')
126 |     args = parser.parse_args()
127 |     loop = asyncio.get_event_loop()
128 |     if args.action.lower().startswith('socket'):
129 |         server = loop.run_until_complete(init_socket_server(loop))
130 |     elif args.action.lower().startswith('http'):
131 |         server = loop.run_until_complete(init_http_server(loop)) 
132 |     try:
133 |         loop.run_forever()
134 |     finally:
135 |         server.close()
136 |         loop.close()
137 | 
138 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | __author__ = "Jeremy Nelson"
2 | 


--------------------------------------------------------------------------------
/tests.py:
--------------------------------------------------------------------------------
1 | __author__ = "Jeremy Nelson, Aaron Coburn, Mark Matienzo"
2 | __license__ = "GPL Affero"
3 | 
4 | import unittest
5 | import server
6 | 
7 | if __name__ == '__main__':
8 |     unittest.main()
9 | 


--------------------------------------------------------------------------------