├── .gitignore ├── Dockerfile ├── DockerfileServer ├── LICENSE ├── README.md ├── api.py ├── cache ├── __init__.py ├── aio.py ├── cluster.py └── twemproxy.py ├── digests.py ├── docker-compose-api.yml ├── docker-compose-server.yml ├── docker-compose.yml ├── docker_config.py ├── lib ├── add_get_triple.lua ├── get_triple.lua ├── redis.conf └── triple_pattern_search.lua ├── requirements.txt ├── server.py ├── setup.py └── tests.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.swp 3 | *.rdb 4 | *DS_STORE 5 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | #Dockerfile for Linked Data Fragments Base 2 | FROM python:3.5.1 3 | MAINTAINER Jeremy Nelson 4 | 5 | # Set environmental variables 6 | ENV LDFS_HOME /opt/ldfs 7 | 8 | # Update Ubuntu and install Python 3 setuptools, git and other 9 | # packages 10 | RUN apt-get update && apt-get install -y && \ 11 | apt-get install -y python3-setuptools &&\ 12 | apt-get install -y git &&\ 13 | apt-get install -y python3-pip 14 | 15 | 16 | # Retrieve latest development branch of Linked Data Fragments project on 17 | # github.com 18 | RUN git clone https://github.com/jermnelson/linked-data-fragments.git $LDFS_HOME \ 19 | && cd $LDFS_HOME \ 20 | && git checkout -b development \ 21 | && git pull origin development \ 22 | && pip3 install -r requirements.txt \ 23 | && touch __init__.py 24 | 25 | WORKDIR $LDFS_HOME 26 | CMD ["nohup", "python", "server.py", "&"] 27 | -------------------------------------------------------------------------------- /DockerfileServer: -------------------------------------------------------------------------------- 1 | # This Dockerfile run the asynco Linked Data Fragments Server 2 | FROM jermnelson/ldfs-base 3 | MAINTAINER Jeremy Nelson 4 | 5 | EXPOSE 7000 6 | WORKDIR $LDFS_HOME 7 | #CMD ['python server.py run'] 8 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU AFFERO GENERAL PUBLIC LICENSE 2 | Version 3, 19 November 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU Affero General Public License is a free, copyleft license for 11 | software and other kinds of works, specifically designed to ensure 12 | cooperation with the community in the case of network server software. 13 | 14 | The licenses for most software and other practical works are designed 15 | to take away your freedom to share and change the works. By contrast, 16 | our General Public Licenses are intended to guarantee your freedom to 17 | share and change all versions of a program--to make sure it remains free 18 | software for all its users. 19 | 20 | When we speak of free software, we are referring to freedom, not 21 | price. Our General Public Licenses are designed to make sure that you 22 | have the freedom to distribute copies of free software (and charge for 23 | them if you wish), that you receive source code or can get it if you 24 | want it, that you can change the software or use pieces of it in new 25 | free programs, and that you know you can do these things. 26 | 27 | Developers that use our General Public Licenses protect your rights 28 | with two steps: (1) assert copyright on the software, and (2) offer 29 | you this License which gives you legal permission to copy, distribute 30 | and/or modify the software. 31 | 32 | A secondary benefit of defending all users' freedom is that 33 | improvements made in alternate versions of the program, if they 34 | receive widespread use, become available for other developers to 35 | incorporate. Many developers of free software are heartened and 36 | encouraged by the resulting cooperation. However, in the case of 37 | software used on network servers, this result may fail to come about. 38 | The GNU General Public License permits making a modified version and 39 | letting the public access it on a server without ever releasing its 40 | source code to the public. 41 | 42 | The GNU Affero General Public License is designed specifically to 43 | ensure that, in such cases, the modified source code becomes available 44 | to the community. It requires the operator of a network server to 45 | provide the source code of the modified version running there to the 46 | users of that server. Therefore, public use of a modified version, on 47 | a publicly accessible server, gives the public access to the source 48 | code of the modified version. 49 | 50 | An older license, called the Affero General Public License and 51 | published by Affero, was designed to accomplish similar goals. This is 52 | a different license, not a version of the Affero GPL, but Affero has 53 | released a new version of the Affero GPL which permits relicensing under 54 | this license. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | TERMS AND CONDITIONS 60 | 61 | 0. Definitions. 62 | 63 | "This License" refers to version 3 of the GNU Affero General Public License. 64 | 65 | "Copyright" also means copyright-like laws that apply to other kinds of 66 | works, such as semiconductor masks. 67 | 68 | "The Program" refers to any copyrightable work licensed under this 69 | License. Each licensee is addressed as "you". "Licensees" and 70 | "recipients" may be individuals or organizations. 71 | 72 | To "modify" a work means to copy from or adapt all or part of the work 73 | in a fashion requiring copyright permission, other than the making of an 74 | exact copy. The resulting work is called a "modified version" of the 75 | earlier work or a work "based on" the earlier work. 76 | 77 | A "covered work" means either the unmodified Program or a work based 78 | on the Program. 79 | 80 | To "propagate" a work means to do anything with it that, without 81 | permission, would make you directly or secondarily liable for 82 | infringement under applicable copyright law, except executing it on a 83 | computer or modifying a private copy. Propagation includes copying, 84 | distribution (with or without modification), making available to the 85 | public, and in some countries other activities as well. 86 | 87 | To "convey" a work means any kind of propagation that enables other 88 | parties to make or receive copies. Mere interaction with a user through 89 | a computer network, with no transfer of a copy, is not conveying. 90 | 91 | An interactive user interface displays "Appropriate Legal Notices" 92 | to the extent that it includes a convenient and prominently visible 93 | feature that (1) displays an appropriate copyright notice, and (2) 94 | tells the user that there is no warranty for the work (except to the 95 | extent that warranties are provided), that licensees may convey the 96 | work under this License, and how to view a copy of this License. If 97 | the interface presents a list of user commands or options, such as a 98 | menu, a prominent item in the list meets this criterion. 99 | 100 | 1. Source Code. 101 | 102 | The "source code" for a work means the preferred form of the work 103 | for making modifications to it. "Object code" means any non-source 104 | form of a work. 105 | 106 | A "Standard Interface" means an interface that either is an official 107 | standard defined by a recognized standards body, or, in the case of 108 | interfaces specified for a particular programming language, one that 109 | is widely used among developers working in that language. 110 | 111 | The "System Libraries" of an executable work include anything, other 112 | than the work as a whole, that (a) is included in the normal form of 113 | packaging a Major Component, but which is not part of that Major 114 | Component, and (b) serves only to enable use of the work with that 115 | Major Component, or to implement a Standard Interface for which an 116 | implementation is available to the public in source code form. A 117 | "Major Component", in this context, means a major essential component 118 | (kernel, window system, and so on) of the specific operating system 119 | (if any) on which the executable work runs, or a compiler used to 120 | produce the work, or an object code interpreter used to run it. 121 | 122 | The "Corresponding Source" for a work in object code form means all 123 | the source code needed to generate, install, and (for an executable 124 | work) run the object code and to modify the work, including scripts to 125 | control those activities. However, it does not include the work's 126 | System Libraries, or general-purpose tools or generally available free 127 | programs which are used unmodified in performing those activities but 128 | which are not part of the work. For example, Corresponding Source 129 | includes interface definition files associated with source files for 130 | the work, and the source code for shared libraries and dynamically 131 | linked subprograms that the work is specifically designed to require, 132 | such as by intimate data communication or control flow between those 133 | subprograms and other parts of the work. 134 | 135 | The Corresponding Source need not include anything that users 136 | can regenerate automatically from other parts of the Corresponding 137 | Source. 138 | 139 | The Corresponding Source for a work in source code form is that 140 | same work. 141 | 142 | 2. Basic Permissions. 143 | 144 | All rights granted under this License are granted for the term of 145 | copyright on the Program, and are irrevocable provided the stated 146 | conditions are met. This License explicitly affirms your unlimited 147 | permission to run the unmodified Program. The output from running a 148 | covered work is covered by this License only if the output, given its 149 | content, constitutes a covered work. This License acknowledges your 150 | rights of fair use or other equivalent, as provided by copyright law. 151 | 152 | You may make, run and propagate covered works that you do not 153 | convey, without conditions so long as your license otherwise remains 154 | in force. You may convey covered works to others for the sole purpose 155 | of having them make modifications exclusively for you, or provide you 156 | with facilities for running those works, provided that you comply with 157 | the terms of this License in conveying all material for which you do 158 | not control copyright. Those thus making or running the covered works 159 | for you must do so exclusively on your behalf, under your direction 160 | and control, on terms that prohibit them from making any copies of 161 | your copyrighted material outside their relationship with you. 162 | 163 | Conveying under any other circumstances is permitted solely under 164 | the conditions stated below. Sublicensing is not allowed; section 10 165 | makes it unnecessary. 166 | 167 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 168 | 169 | No covered work shall be deemed part of an effective technological 170 | measure under any applicable law fulfilling obligations under article 171 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 172 | similar laws prohibiting or restricting circumvention of such 173 | measures. 174 | 175 | When you convey a covered work, you waive any legal power to forbid 176 | circumvention of technological measures to the extent such circumvention 177 | is effected by exercising rights under this License with respect to 178 | the covered work, and you disclaim any intention to limit operation or 179 | modification of the work as a means of enforcing, against the work's 180 | users, your or third parties' legal rights to forbid circumvention of 181 | technological measures. 182 | 183 | 4. Conveying Verbatim Copies. 184 | 185 | You may convey verbatim copies of the Program's source code as you 186 | receive it, in any medium, provided that you conspicuously and 187 | appropriately publish on each copy an appropriate copyright notice; 188 | keep intact all notices stating that this License and any 189 | non-permissive terms added in accord with section 7 apply to the code; 190 | keep intact all notices of the absence of any warranty; and give all 191 | recipients a copy of this License along with the Program. 192 | 193 | You may charge any price or no price for each copy that you convey, 194 | and you may offer support or warranty protection for a fee. 195 | 196 | 5. Conveying Modified Source Versions. 197 | 198 | You may convey a work based on the Program, or the modifications to 199 | produce it from the Program, in the form of source code under the 200 | terms of section 4, provided that you also meet all of these conditions: 201 | 202 | a) The work must carry prominent notices stating that you modified 203 | it, and giving a relevant date. 204 | 205 | b) The work must carry prominent notices stating that it is 206 | released under this License and any conditions added under section 207 | 7. This requirement modifies the requirement in section 4 to 208 | "keep intact all notices". 209 | 210 | c) You must license the entire work, as a whole, under this 211 | License to anyone who comes into possession of a copy. This 212 | License will therefore apply, along with any applicable section 7 213 | additional terms, to the whole of the work, and all its parts, 214 | regardless of how they are packaged. This License gives no 215 | permission to license the work in any other way, but it does not 216 | invalidate such permission if you have separately received it. 217 | 218 | d) If the work has interactive user interfaces, each must display 219 | Appropriate Legal Notices; however, if the Program has interactive 220 | interfaces that do not display Appropriate Legal Notices, your 221 | work need not make them do so. 222 | 223 | A compilation of a covered work with other separate and independent 224 | works, which are not by their nature extensions of the covered work, 225 | and which are not combined with it such as to form a larger program, 226 | in or on a volume of a storage or distribution medium, is called an 227 | "aggregate" if the compilation and its resulting copyright are not 228 | used to limit the access or legal rights of the compilation's users 229 | beyond what the individual works permit. Inclusion of a covered work 230 | in an aggregate does not cause this License to apply to the other 231 | parts of the aggregate. 232 | 233 | 6. Conveying Non-Source Forms. 234 | 235 | You may convey a covered work in object code form under the terms 236 | of sections 4 and 5, provided that you also convey the 237 | machine-readable Corresponding Source under the terms of this License, 238 | in one of these ways: 239 | 240 | a) Convey the object code in, or embodied in, a physical product 241 | (including a physical distribution medium), accompanied by the 242 | Corresponding Source fixed on a durable physical medium 243 | customarily used for software interchange. 244 | 245 | b) Convey the object code in, or embodied in, a physical product 246 | (including a physical distribution medium), accompanied by a 247 | written offer, valid for at least three years and valid for as 248 | long as you offer spare parts or customer support for that product 249 | model, to give anyone who possesses the object code either (1) a 250 | copy of the Corresponding Source for all the software in the 251 | product that is covered by this License, on a durable physical 252 | medium customarily used for software interchange, for a price no 253 | more than your reasonable cost of physically performing this 254 | conveying of source, or (2) access to copy the 255 | Corresponding Source from a network server at no charge. 256 | 257 | c) Convey individual copies of the object code with a copy of the 258 | written offer to provide the Corresponding Source. This 259 | alternative is allowed only occasionally and noncommercially, and 260 | only if you received the object code with such an offer, in accord 261 | with subsection 6b. 262 | 263 | d) Convey the object code by offering access from a designated 264 | place (gratis or for a charge), and offer equivalent access to the 265 | Corresponding Source in the same way through the same place at no 266 | further charge. You need not require recipients to copy the 267 | Corresponding Source along with the object code. If the place to 268 | copy the object code is a network server, the Corresponding Source 269 | may be on a different server (operated by you or a third party) 270 | that supports equivalent copying facilities, provided you maintain 271 | clear directions next to the object code saying where to find the 272 | Corresponding Source. Regardless of what server hosts the 273 | Corresponding Source, you remain obligated to ensure that it is 274 | available for as long as needed to satisfy these requirements. 275 | 276 | e) Convey the object code using peer-to-peer transmission, provided 277 | you inform other peers where the object code and Corresponding 278 | Source of the work are being offered to the general public at no 279 | charge under subsection 6d. 280 | 281 | A separable portion of the object code, whose source code is excluded 282 | from the Corresponding Source as a System Library, need not be 283 | included in conveying the object code work. 284 | 285 | A "User Product" is either (1) a "consumer product", which means any 286 | tangible personal property which is normally used for personal, family, 287 | or household purposes, or (2) anything designed or sold for incorporation 288 | into a dwelling. In determining whether a product is a consumer product, 289 | doubtful cases shall be resolved in favor of coverage. For a particular 290 | product received by a particular user, "normally used" refers to a 291 | typical or common use of that class of product, regardless of the status 292 | of the particular user or of the way in which the particular user 293 | actually uses, or expects or is expected to use, the product. A product 294 | is a consumer product regardless of whether the product has substantial 295 | commercial, industrial or non-consumer uses, unless such uses represent 296 | the only significant mode of use of the product. 297 | 298 | "Installation Information" for a User Product means any methods, 299 | procedures, authorization keys, or other information required to install 300 | and execute modified versions of a covered work in that User Product from 301 | a modified version of its Corresponding Source. The information must 302 | suffice to ensure that the continued functioning of the modified object 303 | code is in no case prevented or interfered with solely because 304 | modification has been made. 305 | 306 | If you convey an object code work under this section in, or with, or 307 | specifically for use in, a User Product, and the conveying occurs as 308 | part of a transaction in which the right of possession and use of the 309 | User Product is transferred to the recipient in perpetuity or for a 310 | fixed term (regardless of how the transaction is characterized), the 311 | Corresponding Source conveyed under this section must be accompanied 312 | by the Installation Information. But this requirement does not apply 313 | if neither you nor any third party retains the ability to install 314 | modified object code on the User Product (for example, the work has 315 | been installed in ROM). 316 | 317 | The requirement to provide Installation Information does not include a 318 | requirement to continue to provide support service, warranty, or updates 319 | for a work that has been modified or installed by the recipient, or for 320 | the User Product in which it has been modified or installed. Access to a 321 | network may be denied when the modification itself materially and 322 | adversely affects the operation of the network or violates the rules and 323 | protocols for communication across the network. 324 | 325 | Corresponding Source conveyed, and Installation Information provided, 326 | in accord with this section must be in a format that is publicly 327 | documented (and with an implementation available to the public in 328 | source code form), and must require no special password or key for 329 | unpacking, reading or copying. 330 | 331 | 7. Additional Terms. 332 | 333 | "Additional permissions" are terms that supplement the terms of this 334 | License by making exceptions from one or more of its conditions. 335 | Additional permissions that are applicable to the entire Program shall 336 | be treated as though they were included in this License, to the extent 337 | that they are valid under applicable law. If additional permissions 338 | apply only to part of the Program, that part may be used separately 339 | under those permissions, but the entire Program remains governed by 340 | this License without regard to the additional permissions. 341 | 342 | When you convey a copy of a covered work, you may at your option 343 | remove any additional permissions from that copy, or from any part of 344 | it. (Additional permissions may be written to require their own 345 | removal in certain cases when you modify the work.) You may place 346 | additional permissions on material, added by you to a covered work, 347 | for which you have or can give appropriate copyright permission. 348 | 349 | Notwithstanding any other provision of this License, for material you 350 | add to a covered work, you may (if authorized by the copyright holders of 351 | that material) supplement the terms of this License with terms: 352 | 353 | a) Disclaiming warranty or limiting liability differently from the 354 | terms of sections 15 and 16 of this License; or 355 | 356 | b) Requiring preservation of specified reasonable legal notices or 357 | author attributions in that material or in the Appropriate Legal 358 | Notices displayed by works containing it; or 359 | 360 | c) Prohibiting misrepresentation of the origin of that material, or 361 | requiring that modified versions of such material be marked in 362 | reasonable ways as different from the original version; or 363 | 364 | d) Limiting the use for publicity purposes of names of licensors or 365 | authors of the material; or 366 | 367 | e) Declining to grant rights under trademark law for use of some 368 | trade names, trademarks, or service marks; or 369 | 370 | f) Requiring indemnification of licensors and authors of that 371 | material by anyone who conveys the material (or modified versions of 372 | it) with contractual assumptions of liability to the recipient, for 373 | any liability that these contractual assumptions directly impose on 374 | those licensors and authors. 375 | 376 | All other non-permissive additional terms are considered "further 377 | restrictions" within the meaning of section 10. If the Program as you 378 | received it, or any part of it, contains a notice stating that it is 379 | governed by this License along with a term that is a further 380 | restriction, you may remove that term. If a license document contains 381 | a further restriction but permits relicensing or conveying under this 382 | License, you may add to a covered work material governed by the terms 383 | of that license document, provided that the further restriction does 384 | not survive such relicensing or conveying. 385 | 386 | If you add terms to a covered work in accord with this section, you 387 | must place, in the relevant source files, a statement of the 388 | additional terms that apply to those files, or a notice indicating 389 | where to find the applicable terms. 390 | 391 | Additional terms, permissive or non-permissive, may be stated in the 392 | form of a separately written license, or stated as exceptions; 393 | the above requirements apply either way. 394 | 395 | 8. Termination. 396 | 397 | You may not propagate or modify a covered work except as expressly 398 | provided under this License. Any attempt otherwise to propagate or 399 | modify it is void, and will automatically terminate your rights under 400 | this License (including any patent licenses granted under the third 401 | paragraph of section 11). 402 | 403 | However, if you cease all violation of this License, then your 404 | license from a particular copyright holder is reinstated (a) 405 | provisionally, unless and until the copyright holder explicitly and 406 | finally terminates your license, and (b) permanently, if the copyright 407 | holder fails to notify you of the violation by some reasonable means 408 | prior to 60 days after the cessation. 409 | 410 | Moreover, your license from a particular copyright holder is 411 | reinstated permanently if the copyright holder notifies you of the 412 | violation by some reasonable means, this is the first time you have 413 | received notice of violation of this License (for any work) from that 414 | copyright holder, and you cure the violation prior to 30 days after 415 | your receipt of the notice. 416 | 417 | Termination of your rights under this section does not terminate the 418 | licenses of parties who have received copies or rights from you under 419 | this License. If your rights have been terminated and not permanently 420 | reinstated, you do not qualify to receive new licenses for the same 421 | material under section 10. 422 | 423 | 9. Acceptance Not Required for Having Copies. 424 | 425 | You are not required to accept this License in order to receive or 426 | run a copy of the Program. Ancillary propagation of a covered work 427 | occurring solely as a consequence of using peer-to-peer transmission 428 | to receive a copy likewise does not require acceptance. However, 429 | nothing other than this License grants you permission to propagate or 430 | modify any covered work. These actions infringe copyright if you do 431 | not accept this License. Therefore, by modifying or propagating a 432 | covered work, you indicate your acceptance of this License to do so. 433 | 434 | 10. Automatic Licensing of Downstream Recipients. 435 | 436 | Each time you convey a covered work, the recipient automatically 437 | receives a license from the original licensors, to run, modify and 438 | propagate that work, subject to this License. You are not responsible 439 | for enforcing compliance by third parties with this License. 440 | 441 | An "entity transaction" is a transaction transferring control of an 442 | organization, or substantially all assets of one, or subdividing an 443 | organization, or merging organizations. If propagation of a covered 444 | work results from an entity transaction, each party to that 445 | transaction who receives a copy of the work also receives whatever 446 | licenses to the work the party's predecessor in interest had or could 447 | give under the previous paragraph, plus a right to possession of the 448 | Corresponding Source of the work from the predecessor in interest, if 449 | the predecessor has it or can get it with reasonable efforts. 450 | 451 | You may not impose any further restrictions on the exercise of the 452 | rights granted or affirmed under this License. For example, you may 453 | not impose a license fee, royalty, or other charge for exercise of 454 | rights granted under this License, and you may not initiate litigation 455 | (including a cross-claim or counterclaim in a lawsuit) alleging that 456 | any patent claim is infringed by making, using, selling, offering for 457 | sale, or importing the Program or any portion of it. 458 | 459 | 11. Patents. 460 | 461 | A "contributor" is a copyright holder who authorizes use under this 462 | License of the Program or a work on which the Program is based. The 463 | work thus licensed is called the contributor's "contributor version". 464 | 465 | A contributor's "essential patent claims" are all patent claims 466 | owned or controlled by the contributor, whether already acquired or 467 | hereafter acquired, that would be infringed by some manner, permitted 468 | by this License, of making, using, or selling its contributor version, 469 | but do not include claims that would be infringed only as a 470 | consequence of further modification of the contributor version. For 471 | purposes of this definition, "control" includes the right to grant 472 | patent sublicenses in a manner consistent with the requirements of 473 | this License. 474 | 475 | Each contributor grants you a non-exclusive, worldwide, royalty-free 476 | patent license under the contributor's essential patent claims, to 477 | make, use, sell, offer for sale, import and otherwise run, modify and 478 | propagate the contents of its contributor version. 479 | 480 | In the following three paragraphs, a "patent license" is any express 481 | agreement or commitment, however denominated, not to enforce a patent 482 | (such as an express permission to practice a patent or covenant not to 483 | sue for patent infringement). To "grant" such a patent license to a 484 | party means to make such an agreement or commitment not to enforce a 485 | patent against the party. 486 | 487 | If you convey a covered work, knowingly relying on a patent license, 488 | and the Corresponding Source of the work is not available for anyone 489 | to copy, free of charge and under the terms of this License, through a 490 | publicly available network server or other readily accessible means, 491 | then you must either (1) cause the Corresponding Source to be so 492 | available, or (2) arrange to deprive yourself of the benefit of the 493 | patent license for this particular work, or (3) arrange, in a manner 494 | consistent with the requirements of this License, to extend the patent 495 | license to downstream recipients. "Knowingly relying" means you have 496 | actual knowledge that, but for the patent license, your conveying the 497 | covered work in a country, or your recipient's use of the covered work 498 | in a country, would infringe one or more identifiable patents in that 499 | country that you have reason to believe are valid. 500 | 501 | If, pursuant to or in connection with a single transaction or 502 | arrangement, you convey, or propagate by procuring conveyance of, a 503 | covered work, and grant a patent license to some of the parties 504 | receiving the covered work authorizing them to use, propagate, modify 505 | or convey a specific copy of the covered work, then the patent license 506 | you grant is automatically extended to all recipients of the covered 507 | work and works based on it. 508 | 509 | A patent license is "discriminatory" if it does not include within 510 | the scope of its coverage, prohibits the exercise of, or is 511 | conditioned on the non-exercise of one or more of the rights that are 512 | specifically granted under this License. You may not convey a covered 513 | work if you are a party to an arrangement with a third party that is 514 | in the business of distributing software, under which you make payment 515 | to the third party based on the extent of your activity of conveying 516 | the work, and under which the third party grants, to any of the 517 | parties who would receive the covered work from you, a discriminatory 518 | patent license (a) in connection with copies of the covered work 519 | conveyed by you (or copies made from those copies), or (b) primarily 520 | for and in connection with specific products or compilations that 521 | contain the covered work, unless you entered into that arrangement, 522 | or that patent license was granted, prior to 28 March 2007. 523 | 524 | Nothing in this License shall be construed as excluding or limiting 525 | any implied license or other defenses to infringement that may 526 | otherwise be available to you under applicable patent law. 527 | 528 | 12. No Surrender of Others' Freedom. 529 | 530 | If conditions are imposed on you (whether by court order, agreement or 531 | otherwise) that contradict the conditions of this License, they do not 532 | excuse you from the conditions of this License. If you cannot convey a 533 | covered work so as to satisfy simultaneously your obligations under this 534 | License and any other pertinent obligations, then as a consequence you may 535 | not convey it at all. For example, if you agree to terms that obligate you 536 | to collect a royalty for further conveying from those to whom you convey 537 | the Program, the only way you could satisfy both those terms and this 538 | License would be to refrain entirely from conveying the Program. 539 | 540 | 13. Remote Network Interaction; Use with the GNU General Public License. 541 | 542 | Notwithstanding any other provision of this License, if you modify the 543 | Program, your modified version must prominently offer all users 544 | interacting with it remotely through a computer network (if your version 545 | supports such interaction) an opportunity to receive the Corresponding 546 | Source of your version by providing access to the Corresponding Source 547 | from a network server at no charge, through some standard or customary 548 | means of facilitating copying of software. This Corresponding Source 549 | shall include the Corresponding Source for any work covered by version 3 550 | of the GNU General Public License that is incorporated pursuant to the 551 | following paragraph. 552 | 553 | Notwithstanding any other provision of this License, you have 554 | permission to link or combine any covered work with a work licensed 555 | under version 3 of the GNU General Public License into a single 556 | combined work, and to convey the resulting work. The terms of this 557 | License will continue to apply to the part which is the covered work, 558 | but the work with which it is combined will remain governed by version 559 | 3 of the GNU General Public License. 560 | 561 | 14. Revised Versions of this License. 562 | 563 | The Free Software Foundation may publish revised and/or new versions of 564 | the GNU Affero General Public License from time to time. Such new versions 565 | will be similar in spirit to the present version, but may differ in detail to 566 | address new problems or concerns. 567 | 568 | Each version is given a distinguishing version number. If the 569 | Program specifies that a certain numbered version of the GNU Affero General 570 | Public License "or any later version" applies to it, you have the 571 | option of following the terms and conditions either of that numbered 572 | version or of any later version published by the Free Software 573 | Foundation. If the Program does not specify a version number of the 574 | GNU Affero General Public License, you may choose any version ever published 575 | by the Free Software Foundation. 576 | 577 | If the Program specifies that a proxy can decide which future 578 | versions of the GNU Affero General Public License can be used, that proxy's 579 | public statement of acceptance of a version permanently authorizes you 580 | to choose that version for the Program. 581 | 582 | Later license versions may give you additional or different 583 | permissions. However, no additional obligations are imposed on any 584 | author or copyright holder as a result of your choosing to follow a 585 | later version. 586 | 587 | 15. Disclaimer of Warranty. 588 | 589 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 590 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 591 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 592 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 593 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 594 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 595 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 596 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 597 | 598 | 16. Limitation of Liability. 599 | 600 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 601 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 602 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 603 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 604 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 605 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 606 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 607 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 608 | SUCH DAMAGES. 609 | 610 | 17. Interpretation of Sections 15 and 16. 611 | 612 | If the disclaimer of warranty and limitation of liability provided 613 | above cannot be given local legal effect according to their terms, 614 | reviewing courts shall apply local law that most closely approximates 615 | an absolute waiver of all civil liability in connection with the 616 | Program, unless a warranty or assumption of liability accompanies a 617 | copy of the Program in return for a fee. 618 | 619 | END OF TERMS AND CONDITIONS 620 | 621 | How to Apply These Terms to Your New Programs 622 | 623 | If you develop a new program, and you want it to be of the greatest 624 | possible use to the public, the best way to achieve this is to make it 625 | free software which everyone can redistribute and change under these terms. 626 | 627 | To do so, attach the following notices to the program. It is safest 628 | to attach them to the start of each source file to most effectively 629 | state the exclusion of warranty; and each file should have at least 630 | the "copyright" line and a pointer to where the full notice is found. 631 | 632 | 633 | Copyright (C) 634 | 635 | This program is free software: you can redistribute it and/or modify 636 | it under the terms of the GNU Affero General Public License as published 637 | by the Free Software Foundation, either version 3 of the License, or 638 | (at your option) any later version. 639 | 640 | This program is distributed in the hope that it will be useful, 641 | but WITHOUT ANY WARRANTY; without even the implied warranty of 642 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 643 | GNU Affero General Public License for more details. 644 | 645 | You should have received a copy of the GNU Affero General Public License 646 | along with this program. If not, see . 647 | 648 | Also add information on how to contact you by electronic and paper mail. 649 | 650 | If your software can interact with users remotely through a computer 651 | network, you should also make sure that it provides a way for users to 652 | get its source. For example, if your program is a web application, its 653 | interface could display a "Source" link that leads users to an archive 654 | of the code. There are many ways you could offer source, and different 655 | solutions will be better for different programs; see section 13 for the 656 | specific requirements. 657 | 658 | You should also get your employer (if you work as a programmer) or school, 659 | if any, to sign a "copyright disclaimer" for the program, if necessary. 660 | For more information on this, and how to apply and follow the GNU AGPL, see 661 | . 662 | 663 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # linked-data-fragments 2 | Python Linked Data Fragment server using asyncio and Redis; See 3 | https://docs.google.com/presentation/d/1oCbMKO0iwulkiqgDarfcto9naYVzl6rcCsxukGLVDLk/edit?usp=sharing 4 | for more information. 5 | -------------------------------------------------------------------------------- /api.py: -------------------------------------------------------------------------------- 1 | __author__ = "Jeremy Nelson" 2 | 3 | 4 | import digests 5 | import falcon 6 | import hashlib 7 | import json 8 | import os 9 | import rdflib 10 | import requests 11 | 12 | try: 13 | from config import config 14 | except ImportError: 15 | config = {"debug": True, 16 | "cache": "Cache", 17 | "redis": {"host": "localhost", 18 | "port": 6379, 19 | "ttl": 604800}, 20 | "rest_api": {"host": "localhost", 21 | "port": 18150}, 22 | # Blazegraph SPARQL Endpoint 23 | "triplestore": {"host": "localhost", 24 | "port": 8080, 25 | "path": "bigdata"}, 26 | 27 | } 28 | 29 | if config['cache'].startswith("TwemproxyCache"): 30 | from cache.twemproxy import TwemproxyCache 31 | CACHE = TwemproxyCache(**config) 32 | elif config['cache'].startswith("ClusterCache"): 33 | from cache.cluster import ClusterCache 34 | CACHE = ClusterCache(**config) 35 | else: 36 | from cache import Cache 37 | CACHE = Cache(**config) 38 | print("CACHE is {}".format(CACHE)) 39 | 40 | 41 | rest = falcon.API() 42 | 43 | 44 | # Hooks 45 | def triple_key(req, resp, params): 46 | if len(params) < 1: 47 | params = req.params 48 | subj = params.get('s', None) 49 | pred = params.get('p', None) 50 | obj = params.get('o', None) 51 | triple_str, resp.body = None, None 52 | print("In triple key {} {} {}".format(subj, pred, obj)) 53 | if subj and pred and obj: 54 | triple_str = CACHE.datastore.evalsha( 55 | CACHE.add_get_triple, 56 | 3, 57 | subj, 58 | pred, 59 | obj) 60 | if triple_str and CACHE.datastore.exists(triple_str): 61 | triple_key = triple_str.decode() 62 | triple_digests = triple_key.split(":") 63 | resp.body = json.dumps( 64 | {"key": triple_str.decode(), 65 | "subject_sha1": triple_digests[0], 66 | "predicate_sha1": triple_digests[1], 67 | "object_sha1": triple_digests[2]} 68 | ) 69 | elif triple_str: 70 | resp.body = json.dumps( 71 | {"missing-triple-key": triple_str.decode()} 72 | ) 73 | else: 74 | raise falcon.HTTPNotFound() 75 | output = {"metadata": {"p": "void:triples", 76 | "o": 0 }, 77 | "data": []} 78 | # Subject search 79 | if subj and (pred is None or obj is None): 80 | print("Before subject key") 81 | subject_key = "{}:pred-obj".format(hashlib.sha1(str(subj).encode()).hexdigest()) 82 | if not pred and not obj: 83 | # Retrieve the entire set 84 | results = CACHE.datastore.smembers(subject_key) 85 | else: 86 | if not pred: 87 | pattern = "*:{}".format(hashlib.sha1(str(obj).encode()).hexdigest()) 88 | else: 89 | pattern = "{}:*".format(hashlib.sha1(str(pred).encode()).hexdigest()) 90 | cursor, results = CACHE.datastore.sscan(subject_key, 0, match=pattern) 91 | while cursor: 92 | cursor, shard_results = CACHE.datastore.sscan( 93 | subject_key, 94 | cursor, 95 | match=pattern) 96 | results.extend(shard_results) 97 | if len(results) >= 100: 98 | ouput["metadata"]["cursor"] = cursor 99 | break 100 | output["metadata"]["o"] = len(results) 101 | for triple_key in results: 102 | triples = triple_key.decode().split(":") 103 | output["data"].append({"p": CACHE.datastore.get(triples[0]).decode(), 104 | "o": CACHE.datastore.get(triples[-1]).decode(), 105 | "s": subj}) 106 | 107 | if pred and (subj is None or obj is None) and len(output["data"]) < 1: 108 | predicate_key = "{}:subj-obj".format( 109 | hashlib.sha1(str(pred).encode()).hexdigest()) 110 | if not obj and not subj: 111 | results = CACHE.datastore.smembers(predicate_key) 112 | else: 113 | if not obj: 114 | pattern = "{}:*".format( 115 | hashlib.sha1(str(subj).encode()).hexdigest()) 116 | else: 117 | pattern = "*:{}".format( 118 | hashlib.sha1(str(obj).encode()).hexdigest()) 119 | cursor, results = CACHE.datastore.sscan( 120 | predicate_key, 121 | 0, 122 | match=pattern) 123 | while cursor: 124 | cursor, shard_results = CACHE.datastore.sscan( 125 | predicate_key, 126 | cursor, 127 | match=pattern) 128 | results.extend(shard_results) 129 | if len(results) >= 100: 130 | output["metadata"]["cursor"] = cursor 131 | break 132 | for triple_key in results: 133 | triples = triple_key.decode().split(":") 134 | output["data"].append({"p": pred, 135 | "o": CACHE.datastore.get(triples[-1]).decode(), 136 | "s": CACHE.datastore.get(triples[0]).decode()}) 137 | if obj and (subj is None or pred is None) and len(output["data"]) < 1: 138 | obj_key = "{}:subj-pred".format( 139 | hashlib.sha1(str(obj).encode()).hexdigest()) 140 | if not subj and not pred: 141 | results = CACHE.datastore.smembers(obj_key) 142 | else: 143 | if not subj: 144 | pattern = "*:{}".format( 145 | hashlib.sha1(str(pred).encode()).hexdigest()) 146 | else: 147 | pattern = "{}:*".format( 148 | hashlib.sha1(str(obj).encode()).hexdigest()) 149 | cursor, results = CACHE.datastore.sscan( 150 | obj_key, 151 | 0, 152 | match=pattern) 153 | while cursor: 154 | cursor, shard_results = CACHE.datastore.sscan( 155 | obj_key, 156 | cursor, 157 | match=pattern) 158 | results.extend(shard_results) 159 | if len(results) >= 100: 160 | output["metadata"]["cursor"] = cursor 161 | break 162 | for triple_key in results: 163 | triples = triple_key.decode().split(":") 164 | output["data"].append({"p": CACHE.datastore.get(triples[-1]).decode(), 165 | "o": obj, 166 | "s": CACHE.datastore.get(triples[0]).decode()}) 167 | resp.body = json.dumps(output) 168 | 169 | def get_triples(pattern): 170 | cursor = -1 171 | output = [] 172 | iterations = 0 173 | while 1: 174 | iterations += 1 175 | if cursor == 0: 176 | break 177 | elif cursor < 0: 178 | cursor = 0 179 | cursor, resources = CACHE.datastore.scan( 180 | cursor=cursor, 181 | match=pattern, 182 | count=1000) 183 | cursor = int(cursor) 184 | if len(resources) > 0: 185 | output.extend(resources) 186 | return output 187 | 188 | 189 | def get_types(type_uri): 190 | """Function takes a type uri and returns all triple keys that 191 | matches that RDF type for that uri. 192 | 193 | Args: 194 | type_uri -- URI to search for 195 | 196 | Returns: 197 | A list of all triples that match the RDF type of the 198 | type_uri 199 | """ 200 | pattern = "*:{}:{}".format(digests.RDF.get(str(rdflib.RDF.type)), 201 | digests.get_sha1_digest(type_uri)) 202 | return get_triples(pattern) 203 | 204 | 205 | def get_graph(pattern): 206 | graph = rdflib.Graph() 207 | transaction = CACHE.datastore.pipeline(transaction=True) 208 | for key in get_triples(pattern): 209 | transaction.get(key) 210 | json_triples = transaction.execute() 211 | 212 | 213 | def get_subject_graph(subject): 214 | """Function takes a subject URI and scans through cache for all 215 | triples matching the subject 216 | 217 | Args: 218 | subject -- subject URI 219 | 220 | Returns: 221 | rdflib.Graph made up all triples 222 | """ 223 | 224 | pattern = "{}:*:*".format(digests.get_sha1_digest(subject)) 225 | transaction = CACHE.datastore.pipeline 226 | for key in get_triples(pattern): 227 | pass 228 | 229 | class Triple: 230 | 231 | def __init__(self, **kwargs): 232 | self.triplestore_url = kwargs.get("triplestore_url", None) 233 | if not self.triplestore_url: 234 | self.triplestore_url = "{}:{}/{}".format( 235 | config.get('triplestore').get('host'), 236 | config.get('triplestore').get('port'), 237 | config.get('triplestore').get('path')) 238 | 239 | @falcon.before(triple_key) 240 | def on_get(self, req, resp): 241 | if not resp.body: 242 | # Should search SPARQL endpoint and add to cache 243 | # if found 244 | result = requests.post(self.triplestore_url, 245 | data={"query": TRIPLE_SPARQL.format(req.args.get('s'), 246 | req.args.get('p'), 247 | req.args.get('o')), 248 | "format": "json"}) 249 | if result.status_code < 399: 250 | bindings = result.get('results').get('bindings') 251 | if len(bindings) > 0: 252 | for binding in bindings: 253 | print(binding) 254 | 255 | else: 256 | raise falcon.HTTPNotFound() 257 | resp.status = falcon.HTTP_200 258 | 259 | 260 | # raise falcon.HTTPInternalServerError( 261 | # "Failed to retrieve triple key", 262 | # "Subject={} Predicate={} Object={}".format(req.args.get('s'), 263 | # req.args.get('p'), 264 | # req.args.get('o'))) 265 | 266 | @falcon.before(triple_key) 267 | def on_post(self, req, resp): 268 | if resp.body: 269 | if 'missing-triple-key' in resp.body: 270 | print(resp.body) 271 | else: 272 | raise falcon.HTTPInternalServerError("Error with server", "Could not set triple") 273 | resp.status = falcon.HTTP_201 274 | 275 | triple = Triple() 276 | rest.add_route("/", triple) 277 | 278 | if __name__ == '__main__': 279 | if config.get('debug'): 280 | from werkzeug.serving import run_simple 281 | run_simple( 282 | config.get('rest_api').get('host'), 283 | config.get('rest_api').get('port'), 284 | rest, 285 | use_reloader=True) 286 | else: 287 | print("Production mode not support") 288 | 289 | -------------------------------------------------------------------------------- /cache/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "Jeremy Nelson" 2 | 3 | import json 4 | import hashlib 5 | import os 6 | import redis 7 | 8 | # Different strategies for storing triple information in 9 | # Redis data structures; 10 | def hash_pattern(transaction, 11 | subject_sha1, 12 | predicate_sha1, 13 | object_sha1): 14 | pass 15 | 16 | def string_pattern(transaction, 17 | subject_sha1, 18 | predicate_sha1, 19 | object_sha1): 20 | """The string pattern is the simplest to implement 21 | but slow O(n) performance with KEYS and SCAN""" 22 | transaction.set("{}:{}:{}".format( 23 | subject_sha1, 24 | predicate_sha1, 25 | object_sha1), 26 | 1) 27 | 28 | def set_pattern(transaction, 29 | subject_sha1, 30 | predicate_sha1, 31 | object_sha1): 32 | transaction.sadd("{}:pred-obj".format(subject_sha1), 33 | "{}:{}".format(predicate_sha1, 34 | object_sha1)) 35 | transaction.sadd("{}:subj-obj".format(predicate_sha1), 36 | "{}:{}".format(subject_sha1, 37 | object_sha1)) 38 | transaction.sadd("{}:subj-pred".format(object_sha1), 39 | "{}:{}".format(subject_sha1, 40 | predicate_sha1)) 41 | 42 | 43 | def add_triple(datastore, subject, predicate, object_, pattern="string"): 44 | subject_sha1 = hashlib.sha1(subject.encode()).hexdigest() 45 | predicate_sha1 = hashlib.sha1(predicate.encode()).hexdigest() 46 | object_sha1 = hashlib.sha1(object_.encode()).hexdigest() 47 | transaction = datastore.pipeline(transaction=True) 48 | transaction.set(subject_sha1, subject) 49 | transaction.set(predicate_sha1, predicate) 50 | transaction.set(object_sha1, object_) 51 | if pattern.startswith("string"): 52 | strategy = string_pattern 53 | elif pattern.startswith("hash"): 54 | strategy = hash_pattern 55 | elif pattern.startswith("set"): 56 | strategy = set_pattern 57 | strategy(transaction, 58 | subject_sha1, 59 | predicate_sha1, 60 | object_sha1) 61 | transaction.execute() 62 | 63 | def remove_expired(**kwargs): 64 | datastore = kwargs.get("datastore", redis.StrictRedis()) 65 | strategy= kwargs.get("strategy", "string") 66 | database = kwargs.get('db', 0) 67 | if strategy.startswith('string'): 68 | return 69 | expired_key_notification = "__keyevent@{}__:expired" 70 | expired_pubsub = datastore.pubsub() 71 | expired_pubsub.subscribe(expired_key_notification) 72 | for item in expired_pubsub.listen(): 73 | sha1 = item.get("data") 74 | transaction = datastore.pipeline(transaction=True) 75 | remove_subject(sha1, transaction, datastore) 76 | remove_predicate(sha1, transaction, datastore) 77 | remove_object(sha1, transaction, datastore) 78 | transaction.execute() 79 | 80 | def remove_object(digest, transaction, datastore=redis.StrictRedis()): 81 | object_key = "{}:subj-pred".format(digest) 82 | if not datastore.exists(object_key): 83 | return 84 | for row in datastore.smembers(object_key): 85 | subject_digest, predicate_digest = row.split(":") 86 | subj_pred_obj = "{}:pred-obj".format(subject_digest) 87 | if datastore.exists(subj_pred_obj): 88 | transaction.srem( 89 | subj_pred_obj, 90 | "{}:{}".format(predicate_digest, digest)) 91 | pred_subj_obj = "{}:subj-obj".format(predicate_digest) 92 | if datastore.exists(pred_subj_obj): 93 | transaction.srem( 94 | pred_subj_obj, 95 | "{}:{}".format(subject_digest, digest)) 96 | transaction.delete(object_key) 97 | 98 | 99 | def remove_predicate(digest, transaction, datastore=redis.StrictRedis()): 100 | predicate_key = "{}:subj-obj".format(digest) 101 | if not datastore.exists(predicate_key): 102 | return 103 | for row in datastore.smembers(member_key): 104 | subject_digest, object_digest = row.split(":") 105 | subj_pred_obj = "{}:pred-obj".format(subject_digest) 106 | if datastore.exists(subj_pred_obj): 107 | transaction.srem( 108 | subj_pred_obj, 109 | "{}:{}".format(digest, object_digest)) 110 | obj_subj_pred = "{}:subj-pred".format(object_digest) 111 | if datastore.exists(obj_subj_pred): 112 | transaction.srem( 113 | obj_subj_pred, 114 | "{}:{}".format(subject_digest, digest)) 115 | transaction.delete(predicate_key) 116 | 117 | 118 | def remove_subject(digest, transaction, datastore=redis.StrictRedis()): 119 | subject_key = "{}:pred-obj".format(digest) 120 | if not datastore.exists(subject_key): 121 | return 122 | for row in datastore.smembers(subject_key): 123 | predicate, object_ = row.split(":") 124 | pred_subj_obj = "{}:subj-obj".format(predicate) 125 | if datastore.exists(pred_subj_obj): 126 | transaction.srem(pred_subj_obj, 127 | "{}:{}".format(digest, object_)) 128 | obj_subj_pred = "{}:subj-pred".format(object_) 129 | if datastore.exists(obj_subj_pred): 130 | transaction.srem( 131 | obj_subj_pred, 132 | "{}:{}".format(digest, predicate)) 133 | transaction.delete(subject_key) 134 | 135 | # SPARQL statements 136 | TRIPLE_SPARQL = """SELECT DISTINCT * 137 | WHERE {{{{ 138 | {} {} {} . 139 | }}}}""" 140 | 141 | class Cache(object): 142 | 143 | def __init__(self, **kwargs): 144 | self.lua_scripts = dict() 145 | redis_config = kwargs.get('redis', None) 146 | if redis_config: 147 | self.datastore = redis.StrictRedis( 148 | host=redis_config.get('host'), 149 | port=redis_config.get('port')) 150 | else: 151 | self.datastore = redis.StrictRedis() 152 | lua_location = kwargs.get('lua_location', None) 153 | if not lua_location: 154 | base_dir = os.path.dirname(os.path.abspath(__name__)) 155 | lua_location = os.path.join(base_dir, "lib") 156 | for name in ["get_triple", 157 | "add_get_triple", 158 | "triple_pattern_search"]: 159 | filepath = os.path.join( 160 | lua_location, "{}.lua".format(name)) 161 | with open(filepath) as fo: 162 | lua_script = fo.read() 163 | sha1 = self.datastore.script_load(lua_script) 164 | setattr(self, name, sha1) 165 | 166 | def __get_sha1__(self, entity): 167 | return hashlib.sha1(entity.encode()).hexdigest() 168 | 169 | def triple_search(self, subject=None, predicate=None, object_=None): 170 | triple_str = self.datastore.evalsha( 171 | self.add_get_triple, 172 | 3, 173 | subject, 174 | predicate, 175 | object_) 176 | if triple_str: 177 | if self.cache.exists(triple_str): 178 | return json.dumps(self.cache.get(triple_str)) 179 | else: 180 | return {"result": "Missing Triple Key {}".format(triple_str)} 181 | 182 | 183 | -------------------------------------------------------------------------------- /cache/aio.py: -------------------------------------------------------------------------------- 1 | __author__ = "Jeremy Nelson, Aaron Coburn, Mark Matienzo" 2 | 3 | import asyncio 4 | import aioredis 5 | import hashlib 6 | import os 7 | import redis 8 | try: 9 | import config 10 | except ImportError: 11 | config = {"redis": {"host": "localhost", 12 | "port": 6379, 13 | "ttl": 604800 14 | }} 15 | 16 | LUA_SCRIPTS ={} 17 | BASE_DIR = os.path.dirname(os.path.abspath(__name__)) 18 | LUA_LOCATION = os.path.join(BASE_DIR, "lib") 19 | DATASTORE = redis.StrictRedis(host=config.get("redis")["host"], 20 | port=config.get("redis")["port"]) 21 | for name in ["add_get_triple", 22 | "get_triple", 23 | "triple_pattern_search"]: 24 | filepath = os.path.join( 25 | LUA_LOCATION, "{}.lua".format(name)) 26 | with open(filepath) as fo: 27 | lua_script = fo.read() 28 | sha1 = DATASTORE.script_load(lua_script) 29 | LUA_SCRIPTS[name] = sha1 30 | 31 | @asyncio.coroutine 32 | def get_digest(value): 33 | """Get digest takes either an URI/URL or a Literal value and 34 | calls the SHA1 for the add_get_hash.lua script. 35 | 36 | Args: 37 | value -- URI/URL or Literal value 38 | """ 39 | if not value: 40 | return None 41 | connection = yield from aioredis.create_connection( 42 | (config.get("redis")["host"], 43 | config.get("redis")["port"]), 44 | encoding='utf-8') 45 | sha1_digest = yield from connection.execute( 46 | b'EVALSHA', 47 | LUA_SCRIPTS['add_get_hash'], 48 | 1, 49 | value, 50 | config.get("redis").get('ttl')) 51 | connection.close() 52 | return sha1_digest 53 | 54 | 55 | @asyncio.coroutine 56 | def get_value(digest): 57 | connection = yield from aioredis.create_redis( 58 | (config.get("redis")["host"], 59 | config.get("redis")["port"]), 60 | encoding='utf-8') 61 | value = yield from connection.get(digest) 62 | connection.close() 63 | return value 64 | 65 | @asyncio.coroutine 66 | def get_triple(subject_key, predicate_key, object_key): 67 | connection = yield from aioredis.create_redis( 68 | (config.get("redis")["host"], 69 | config.get("redis")["port"])) 70 | 71 | pattern = str() 72 | for key in [subject_key, predicate_key, object_key]: 73 | if key is None: 74 | pattern += "*:" 75 | else: 76 | pattern += "{}:".format(key) 77 | pattern = pattern[:-1] 78 | cur = b'0' 79 | results = yield from connection.keys(pattern) 80 | results = [] 81 | while cur: 82 | cur, keys = yield from connection.scan(cur, 83 | match=pattern, 84 | count=1000) 85 | if len(keys) > 0: 86 | results.extend(keys) 87 | connection.close() 88 | return results 89 | -------------------------------------------------------------------------------- /cache/cluster.py: -------------------------------------------------------------------------------- 1 | __author__ = "Jeremy Nelson" 2 | 3 | import hashlib 4 | from rediscluster import StrictRedisCluster 5 | from . import Cache 6 | 7 | class ClusterCache(Cache): 8 | 9 | def __init__(self, **kwargs): 10 | startup_nodes = kwargs.get("startup_nodes") 11 | if not startup_nodes: 12 | startup_nodes = [{"port": 30001, "host": "0.0.0.0"}, 13 | {"port": 30002, "host": "0.0.0.0"} 14 | self.cache = StrictRedisCluster(startup_nodes) 15 | 16 | 17 | def __get_sha1__(self, value): 18 | return hashlib.sha1(value).hexdigest() 19 | 20 | def triple_search(self, subject, predicate, object_): 21 | triple_str = "{}:{}:{}".format( 22 | self.__get_sha1__(subject), 23 | self.__get_sha1__(predicate), 24 | self.__get_sha1__(object_)) 25 | -------------------------------------------------------------------------------- /cache/twemproxy.py: -------------------------------------------------------------------------------- 1 | __author__ = "Jeremy Nelson" 2 | 3 | import redis 4 | import socket 5 | from . import Cache 6 | 7 | class TwemproxyCache(Cache): 8 | 9 | def __init__(self): 10 | pass 11 | 12 | def triple_search(self, 13 | subject=None, 14 | predicate=None, 15 | object_=None): 16 | pass 17 | -------------------------------------------------------------------------------- /digests.py: -------------------------------------------------------------------------------- 1 | """Module contains commonly used SHA1 digests in the Linked Data Fragments 2 | Data Store and helper functions""" 3 | 4 | __author__ = "Jeremy Nelson" 5 | 6 | import hashlib 7 | import rdflib 8 | 9 | def get_sha1_digest(value): 10 | """Function takes a unicode string and returns it's sha1 digest 11 | 12 | Args: 13 | value -- Unicode string 14 | 15 | Returns: 16 | sha1 of value 17 | """ 18 | return hashlib.sha1(value.encode()).hexdigest() 19 | 20 | OWL = { 21 | "http://www.w3.org/2002/07/owl#sameAs": "7bffe77e6f9af628763e215707119bc2dbc9b927" 22 | } 23 | 24 | RDF = { 25 | "http://www.w3.org/1999/02/22-rdf-syntax-ns#type": "3c197cb1f6842dc41aa48dc8b9032284bcf39a27" 26 | } 27 | 28 | RDFS = { 29 | "http://www.w3.org/2000/01/rdf-schema#label": "9ac796fdb3c1f82ad26a447b600262114a19983b" 30 | } 31 | 32 | -------------------------------------------------------------------------------- /docker-compose-api.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jermnelson/linked-data-fragments/74fed07cdbfd7af17bdc21c90d4928ead4116687/docker-compose-api.yml -------------------------------------------------------------------------------- /docker-compose-server.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jermnelson/linked-data-fragments/74fed07cdbfd7af17bdc21c90d4928ead4116687/docker-compose-server.yml -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | ldfs: 2 | build: . 3 | ports: 4 | - 7000:7000 5 | links: 6 | - redis 7 | - semanticServer 8 | redis: 9 | image: redis 10 | ports: 11 | - 6379:6379 12 | volumes: 13 | - /opt/ldfs/data:/data 14 | semanticServer: 15 | image: jermnelson/semantic-server-core:dev 16 | ports: 17 | - 8080:8080 18 | - 9999:9999 19 | volumes: 20 | - /opt/bibcat_repository/fedora-data:/usr/share/fedora-data 21 | - /opt/bibcat_triplestore:/usr/share/blazegraph 22 | 23 | 24 | -------------------------------------------------------------------------------- /docker_config.py: -------------------------------------------------------------------------------- 1 | config = {"debug": True, 2 | "cache": "Cache", 3 | "redis": {"host": "redis", 4 | "port": 6379, 5 | "ttl": 604800}, 6 | "rest_api": {"host": "localhost", 7 | "port": 18150}, 8 | # Blazegraph SPARQL Endpoint 9 | "triplestore": {"host": "semantic_server", 10 | "port": 8080, 11 | "path": "bigdata"} 12 | } 13 | -------------------------------------------------------------------------------- /lib/add_get_triple.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | add_get_triple 3 | 4 | This script is licensed under the GNU Affero version 3. 5 | Copyrighted 2015 by Jeremy Nelson 6 | --]] 7 | local function add(value) 8 | local digest = redis.sha1hex(value) 9 | redis.pcall('setnx', digest, value) 10 | return digest 11 | end 12 | 13 | local function add_string(subject_digest, predicate_digest, object_digest) 14 | local triple_key = subject_digest..":"..predicate_digest..":"..object_digest 15 | local triple_body = 1 16 | if ARGV[1] then 17 | triple_body = ARGV[1] 18 | end 19 | redis.pcall('setnx', triple_key, triple_body) 20 | end 21 | 22 | local function add_hash(subject_digest, predicate_digest, object_digest) 23 | local subject_key = subject_digest..":pred-obj" 24 | redis.pcall('hset', subject_key, predicate_digest..":"..object_digest, 1) 25 | local predicate_key = predicate_digest..":subj-obj" 26 | redis.pcall('hset', predicate_key, subject_digest..":"..object_digest, 1) 27 | local object_key = object_digest..":subj-pred" 28 | redis.pcall('hset', object_key, subject_digest..":"..predicate_digest, 1) 29 | end 30 | 31 | local function add_set(subject_digest, predicate_digest, object_digest) 32 | local subject_key = subject_digest..":pred-obj" 33 | redis.pcall('sadd', subject_key, predicate_digest..":"..object_digest) 34 | local predicate_key = predicate_digest..":subj-obj" 35 | redis.pcall('sadd', predicate_key, subject_digest..":"..object_digest) 36 | local object_key = object_digest..":subj-pred" 37 | redis.pcall('sadd', object_key, subject_digest..":"..predicate_digest) 38 | end 39 | 40 | local subject_sha1 = add(KEYS[1]) 41 | local predicate_sha1 = add(KEYS[2]) 42 | local object_sha1 = add(KEYS[3]) 43 | if KEYS[4] then 44 | if KEYS[4] == "hash" then 45 | add_hash(subject_sha1, predicate_sha1, object_sha1) 46 | elseif KEYS[4] == "set" then 47 | add_set(subject_sha1, predicate_sha1, object_sha1) 48 | else 49 | add_string(subject_sha1, predicate_sha1, object_sha1) 50 | end 51 | else 52 | add_set(subject_sha1, predicate_sha1, object_sha1) 53 | end 54 | return true 55 | -------------------------------------------------------------------------------- /lib/get_triple.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | get_triple_search 3 | 4 | This script is licensed under the GNU Affero version 3. Copyrighted 5 | 2015 by Jeremy Nelson 6 | --]] 7 | local subject_sha1, predicate_sha1, object_sha1 = split(KEYS[1], ":") 8 | local output = '[{"@id": "' 9 | output = output..redis.pcall('get', subject_sha1_)..'",' 10 | output = output..redis.pcall('get', predicate_sha1)..'":[{' 11 | local object = redis.pcall('get', object_sha1) 12 | if string.sub(object,1,string.len("http")) == 'http' then 13 | output = output..'"@id": "' 14 | else 15 | output = output..'"@value": "' 16 | end 17 | output = output..'"'..object..'"}]}]' 18 | return output 19 | -------------------------------------------------------------------------------- /lib/redis.conf: -------------------------------------------------------------------------------- 1 | # Redis configuration file example 2 | 3 | # Note on units: when memory size is needed, it is possible to specify 4 | # it in the usual form of 1k 5GB 4M and so forth: 5 | # 6 | # 1k => 1000 bytes 7 | # 1kb => 1024 bytes 8 | # 1m => 1000000 bytes 9 | # 1mb => 1024*1024 bytes 10 | # 1g => 1000000000 bytes 11 | # 1gb => 1024*1024*1024 bytes 12 | # 13 | # units are case insensitive so 1GB 1Gb 1gB are all the same. 14 | 15 | ################################## INCLUDES ################################### 16 | 17 | # Include one or more other config files here. This is useful if you 18 | # have a standard template that goes to all Redis servers but also need 19 | # to customize a few per-server settings. Include files can include 20 | # other files, so use this wisely. 21 | # 22 | # Notice option "include" won't be rewritten by command "CONFIG REWRITE" 23 | # from admin or Redis Sentinel. Since Redis always uses the last processed 24 | # line as value of a configuration directive, you'd better put includes 25 | # at the beginning of this file to avoid overwriting config change at runtime. 26 | # 27 | # If instead you are interested in using includes to override configuration 28 | # options, it is better to use include as the last line. 29 | # 30 | # include /path/to/local.conf 31 | # include /path/to/other.conf 32 | 33 | ################################ GENERAL ##################################### 34 | 35 | # By default Redis does not run as a daemon. Use 'yes' if you need it. 36 | # Note that Redis will write a pid file in /var/run/redis.pid when daemonized. 37 | daemonize no 38 | 39 | # When running daemonized, Redis writes a pid file in /var/run/redis.pid by 40 | # default. You can specify a custom pid file location here. 41 | pidfile /var/run/redis.pid 42 | 43 | # Accept connections on the specified port, default is 6379. 44 | # If port 0 is specified Redis will not listen on a TCP socket. 45 | port 6379 46 | 47 | # TCP listen() backlog. 48 | # 49 | # In high requests-per-second environments you need an high backlog in order 50 | # to avoid slow clients connections issues. Note that the Linux kernel 51 | # will silently truncate it to the value of /proc/sys/net/core/somaxconn so 52 | # make sure to raise both the value of somaxconn and tcp_max_syn_backlog 53 | # in order to get the desired effect. 54 | tcp-backlog 511 55 | 56 | # By default Redis listens for connections from all the network interfaces 57 | # available on the server. It is possible to listen to just one or multiple 58 | # interfaces using the "bind" configuration directive, followed by one or 59 | # more IP addresses. 60 | # 61 | # Examples: 62 | # 63 | # bind 192.168.1.100 10.0.0.1 64 | # bind 127.0.0.1 65 | 66 | # Specify the path for the Unix socket that will be used to listen for 67 | # incoming connections. There is no default, so Redis will not listen 68 | # on a unix socket when not specified. 69 | # 70 | # unixsocket /tmp/redis.sock 71 | # unixsocketperm 700 72 | 73 | # Close the connection after a client is idle for N seconds (0 to disable) 74 | timeout 0 75 | 76 | # TCP keepalive. 77 | # 78 | # If non-zero, use SO_KEEPALIVE to send TCP ACKs to clients in absence 79 | # of communication. This is useful for two reasons: 80 | # 81 | # 1) Detect dead peers. 82 | # 2) Take the connection alive from the point of view of network 83 | # equipment in the middle. 84 | # 85 | # On Linux, the specified value (in seconds) is the period used to send ACKs. 86 | # Note that to close the connection the double of the time is needed. 87 | # On other kernels the period depends on the kernel configuration. 88 | # 89 | # A reasonable value for this option is 60 seconds. 90 | tcp-keepalive 0 91 | 92 | # Specify the server verbosity level. 93 | # This can be one of: 94 | # debug (a lot of information, useful for development/testing) 95 | # verbose (many rarely useful info, but not a mess like the debug level) 96 | # notice (moderately verbose, what you want in production probably) 97 | # warning (only very important / critical messages are logged) 98 | loglevel notice 99 | 100 | # Specify the log file name. Also the empty string can be used to force 101 | # Redis to log on the standard output. Note that if you use standard 102 | # output for logging but daemonize, logs will be sent to /dev/null 103 | logfile "" 104 | 105 | # To enable logging to the system logger, just set 'syslog-enabled' to yes, 106 | # and optionally update the other syslog parameters to suit your needs. 107 | # syslog-enabled no 108 | 109 | # Specify the syslog identity. 110 | # syslog-ident redis 111 | 112 | # Specify the syslog facility. Must be USER or between LOCAL0-LOCAL7. 113 | # syslog-facility local0 114 | 115 | # Set the number of databases. The default database is DB 0, you can select 116 | # a different one on a per-connection basis using SELECT where 117 | # dbid is a number between 0 and 'databases'-1 118 | databases 16 119 | 120 | ################################ SNAPSHOTTING ################################ 121 | # 122 | # Save the DB on disk: 123 | # 124 | # save 125 | # 126 | # Will save the DB if both the given number of seconds and the given 127 | # number of write operations against the DB occurred. 128 | # 129 | # In the example below the behaviour will be to save: 130 | # after 900 sec (15 min) if at least 1 key changed 131 | # after 300 sec (5 min) if at least 10 keys changed 132 | # after 60 sec if at least 10000 keys changed 133 | # 134 | # Note: you can disable saving completely by commenting out all "save" lines. 135 | # 136 | # It is also possible to remove all the previously configured save 137 | # points by adding a save directive with a single empty string argument 138 | # like in the following example: 139 | # 140 | # save "" 141 | 142 | save 900 1 143 | save 300 10 144 | save 60 10000 145 | 146 | # By default Redis will stop accepting writes if RDB snapshots are enabled 147 | # (at least one save point) and the latest background save failed. 148 | # This will make the user aware (in a hard way) that data is not persisting 149 | # on disk properly, otherwise chances are that no one will notice and some 150 | # disaster will happen. 151 | # 152 | # If the background saving process will start working again Redis will 153 | # automatically allow writes again. 154 | # 155 | # However if you have setup your proper monitoring of the Redis server 156 | # and persistence, you may want to disable this feature so that Redis will 157 | # continue to work as usual even if there are problems with disk, 158 | # permissions, and so forth. 159 | stop-writes-on-bgsave-error yes 160 | 161 | # Compress string objects using LZF when dump .rdb databases? 162 | # For default that's set to 'yes' as it's almost always a win. 163 | # If you want to save some CPU in the saving child set it to 'no' but 164 | # the dataset will likely be bigger if you have compressible values or keys. 165 | rdbcompression yes 166 | 167 | # Since version 5 of RDB a CRC64 checksum is placed at the end of the file. 168 | # This makes the format more resistant to corruption but there is a performance 169 | # hit to pay (around 10%) when saving and loading RDB files, so you can disable it 170 | # for maximum performances. 171 | # 172 | # RDB files created with checksum disabled have a checksum of zero that will 173 | # tell the loading code to skip the check. 174 | rdbchecksum yes 175 | 176 | # The filename where to dump the DB 177 | dbfilename cache.rdb 178 | 179 | # The working directory. 180 | # 181 | # The DB will be written inside this directory, with the filename specified 182 | # above using the 'dbfilename' configuration directive. 183 | # 184 | # The Append Only File will also be created inside this directory. 185 | # 186 | # Note that you must specify a directory here, not a file name. 187 | dir ./ 188 | 189 | ################################# REPLICATION ################################# 190 | 191 | # Master-Slave replication. Use slaveof to make a Redis instance a copy of 192 | # another Redis server. A few things to understand ASAP about Redis replication. 193 | # 194 | # 1) Redis replication is asynchronous, but you can configure a master to 195 | # stop accepting writes if it appears to be not connected with at least 196 | # a given number of slaves. 197 | # 2) Redis slaves are able to perform a partial resynchronization with the 198 | # master if the replication link is lost for a relatively small amount of 199 | # time. You may want to configure the replication backlog size (see the next 200 | # sections of this file) with a sensible value depending on your needs. 201 | # 3) Replication is automatic and does not need user intervention. After a 202 | # network partition slaves automatically try to reconnect to masters 203 | # and resynchronize with them. 204 | # 205 | # slaveof 206 | 207 | # If the master is password protected (using the "requirepass" configuration 208 | # directive below) it is possible to tell the slave to authenticate before 209 | # starting the replication synchronization process, otherwise the master will 210 | # refuse the slave request. 211 | # 212 | # masterauth 213 | 214 | # When a slave loses its connection with the master, or when the replication 215 | # is still in progress, the slave can act in two different ways: 216 | # 217 | # 1) if slave-serve-stale-data is set to 'yes' (the default) the slave will 218 | # still reply to client requests, possibly with out of date data, or the 219 | # data set may just be empty if this is the first synchronization. 220 | # 221 | # 2) if slave-serve-stale-data is set to 'no' the slave will reply with 222 | # an error "SYNC with master in progress" to all the kind of commands 223 | # but to INFO and SLAVEOF. 224 | # 225 | slave-serve-stale-data yes 226 | 227 | # You can configure a slave instance to accept writes or not. Writing against 228 | # a slave instance may be useful to store some ephemeral data (because data 229 | # written on a slave will be easily deleted after resync with the master) but 230 | # may also cause problems if clients are writing to it because of a 231 | # misconfiguration. 232 | # 233 | # Since Redis 2.6 by default slaves are read-only. 234 | # 235 | # Note: read only slaves are not designed to be exposed to untrusted clients 236 | # on the internet. It's just a protection layer against misuse of the instance. 237 | # Still a read only slave exports by default all the administrative commands 238 | # such as CONFIG, DEBUG, and so forth. To a limited extent you can improve 239 | # security of read only slaves using 'rename-command' to shadow all the 240 | # administrative / dangerous commands. 241 | slave-read-only yes 242 | 243 | # Replication SYNC strategy: disk or socket. 244 | # 245 | # ------------------------------------------------------- 246 | # WARNING: DISKLESS REPLICATION IS EXPERIMENTAL CURRENTLY 247 | # ------------------------------------------------------- 248 | # 249 | # New slaves and reconnecting slaves that are not able to continue the replication 250 | # process just receiving differences, need to do what is called a "full 251 | # synchronization". An RDB file is transmitted from the master to the slaves. 252 | # The transmission can happen in two different ways: 253 | # 254 | # 1) Disk-backed: The Redis master creates a new process that writes the RDB 255 | # file on disk. Later the file is transferred by the parent 256 | # process to the slaves incrementally. 257 | # 2) Diskless: The Redis master creates a new process that directly writes the 258 | # RDB file to slave sockets, without touching the disk at all. 259 | # 260 | # With disk-backed replication, while the RDB file is generated, more slaves 261 | # can be queued and served with the RDB file as soon as the current child producing 262 | # the RDB file finishes its work. With diskless replication instead once 263 | # the transfer starts, new slaves arriving will be queued and a new transfer 264 | # will start when the current one terminates. 265 | # 266 | # When diskless replication is used, the master waits a configurable amount of 267 | # time (in seconds) before starting the transfer in the hope that multiple slaves 268 | # will arrive and the transfer can be parallelized. 269 | # 270 | # With slow disks and fast (large bandwidth) networks, diskless replication 271 | # works better. 272 | repl-diskless-sync no 273 | 274 | # When diskless replication is enabled, it is possible to configure the delay 275 | # the server waits in order to spawn the child that transfers the RDB via socket 276 | # to the slaves. 277 | # 278 | # This is important since once the transfer starts, it is not possible to serve 279 | # new slaves arriving, that will be queued for the next RDB transfer, so the server 280 | # waits a delay in order to let more slaves arrive. 281 | # 282 | # The delay is specified in seconds, and by default is 5 seconds. To disable 283 | # it entirely just set it to 0 seconds and the transfer will start ASAP. 284 | repl-diskless-sync-delay 5 285 | 286 | # Slaves send PINGs to server in a predefined interval. It's possible to change 287 | # this interval with the repl_ping_slave_period option. The default value is 10 288 | # seconds. 289 | # 290 | # repl-ping-slave-period 10 291 | 292 | # The following option sets the replication timeout for: 293 | # 294 | # 1) Bulk transfer I/O during SYNC, from the point of view of slave. 295 | # 2) Master timeout from the point of view of slaves (data, pings). 296 | # 3) Slave timeout from the point of view of masters (REPLCONF ACK pings). 297 | # 298 | # It is important to make sure that this value is greater than the value 299 | # specified for repl-ping-slave-period otherwise a timeout will be detected 300 | # every time there is low traffic between the master and the slave. 301 | # 302 | # repl-timeout 60 303 | 304 | # Disable TCP_NODELAY on the slave socket after SYNC? 305 | # 306 | # If you select "yes" Redis will use a smaller number of TCP packets and 307 | # less bandwidth to send data to slaves. But this can add a delay for 308 | # the data to appear on the slave side, up to 40 milliseconds with 309 | # Linux kernels using a default configuration. 310 | # 311 | # If you select "no" the delay for data to appear on the slave side will 312 | # be reduced but more bandwidth will be used for replication. 313 | # 314 | # By default we optimize for low latency, but in very high traffic conditions 315 | # or when the master and slaves are many hops away, turning this to "yes" may 316 | # be a good idea. 317 | repl-disable-tcp-nodelay no 318 | 319 | # Set the replication backlog size. The backlog is a buffer that accumulates 320 | # slave data when slaves are disconnected for some time, so that when a slave 321 | # wants to reconnect again, often a full resync is not needed, but a partial 322 | # resync is enough, just passing the portion of data the slave missed while 323 | # disconnected. 324 | # 325 | # The bigger the replication backlog, the longer the time the slave can be 326 | # disconnected and later be able to perform a partial resynchronization. 327 | # 328 | # The backlog is only allocated once there is at least a slave connected. 329 | # 330 | # repl-backlog-size 1mb 331 | 332 | # After a master has no longer connected slaves for some time, the backlog 333 | # will be freed. The following option configures the amount of seconds that 334 | # need to elapse, starting from the time the last slave disconnected, for 335 | # the backlog buffer to be freed. 336 | # 337 | # A value of 0 means to never release the backlog. 338 | # 339 | # repl-backlog-ttl 3600 340 | 341 | # The slave priority is an integer number published by Redis in the INFO output. 342 | # It is used by Redis Sentinel in order to select a slave to promote into a 343 | # master if the master is no longer working correctly. 344 | # 345 | # A slave with a low priority number is considered better for promotion, so 346 | # for instance if there are three slaves with priority 10, 100, 25 Sentinel will 347 | # pick the one with priority 10, that is the lowest. 348 | # 349 | # However a special priority of 0 marks the slave as not able to perform the 350 | # role of master, so a slave with priority of 0 will never be selected by 351 | # Redis Sentinel for promotion. 352 | # 353 | # By default the priority is 100. 354 | slave-priority 100 355 | 356 | # It is possible for a master to stop accepting writes if there are less than 357 | # N slaves connected, having a lag less or equal than M seconds. 358 | # 359 | # The N slaves need to be in "online" state. 360 | # 361 | # The lag in seconds, that must be <= the specified value, is calculated from 362 | # the last ping received from the slave, that is usually sent every second. 363 | # 364 | # This option does not GUARANTEE that N replicas will accept the write, but 365 | # will limit the window of exposure for lost writes in case not enough slaves 366 | # are available, to the specified number of seconds. 367 | # 368 | # For example to require at least 3 slaves with a lag <= 10 seconds use: 369 | # 370 | # min-slaves-to-write 3 371 | # min-slaves-max-lag 10 372 | # 373 | # Setting one or the other to 0 disables the feature. 374 | # 375 | # By default min-slaves-to-write is set to 0 (feature disabled) and 376 | # min-slaves-max-lag is set to 10. 377 | 378 | ################################## SECURITY ################################### 379 | 380 | # Require clients to issue AUTH before processing any other 381 | # commands. This might be useful in environments in which you do not trust 382 | # others with access to the host running redis-server. 383 | # 384 | # This should stay commented out for backward compatibility and because most 385 | # people do not need auth (e.g. they run their own servers). 386 | # 387 | # Warning: since Redis is pretty fast an outside user can try up to 388 | # 150k passwords per second against a good box. This means that you should 389 | # use a very strong password otherwise it will be very easy to break. 390 | # 391 | # requirepass foobared 392 | 393 | # Command renaming. 394 | # 395 | # It is possible to change the name of dangerous commands in a shared 396 | # environment. For instance the CONFIG command may be renamed into something 397 | # hard to guess so that it will still be available for internal-use tools 398 | # but not available for general clients. 399 | # 400 | # Example: 401 | # 402 | # rename-command CONFIG b840fc02d524045429941cc15f59e41cb7be6c52 403 | # 404 | # It is also possible to completely kill a command by renaming it into 405 | # an empty string: 406 | # 407 | # rename-command CONFIG "" 408 | # 409 | # Please note that changing the name of commands that are logged into the 410 | # AOF file or transmitted to slaves may cause problems. 411 | 412 | ################################### LIMITS #################################### 413 | 414 | # Set the max number of connected clients at the same time. By default 415 | # this limit is set to 10000 clients, however if the Redis server is not 416 | # able to configure the process file limit to allow for the specified limit 417 | # the max number of allowed clients is set to the current file limit 418 | # minus 32 (as Redis reserves a few file descriptors for internal uses). 419 | # 420 | # Once the limit is reached Redis will close all the new connections sending 421 | # an error 'max number of clients reached'. 422 | # 423 | # maxclients 10000 424 | 425 | # Don't use more memory than the specified amount of bytes. 426 | # When the memory limit is reached Redis will try to remove keys 427 | # according to the eviction policy selected (see maxmemory-policy). 428 | # 429 | # If Redis can't remove keys according to the policy, or if the policy is 430 | # set to 'noeviction', Redis will start to reply with errors to commands 431 | # that would use more memory, like SET, LPUSH, and so on, and will continue 432 | # to reply to read-only commands like GET. 433 | # 434 | # This option is usually useful when using Redis as an LRU cache, or to set 435 | # a hard memory limit for an instance (using the 'noeviction' policy). 436 | # 437 | # WARNING: If you have slaves attached to an instance with maxmemory on, 438 | # the size of the output buffers needed to feed the slaves are subtracted 439 | # from the used memory count, so that network problems / resyncs will 440 | # not trigger a loop where keys are evicted, and in turn the output 441 | # buffer of slaves is full with DELs of keys evicted triggering the deletion 442 | # of more keys, and so forth until the database is completely emptied. 443 | # 444 | # In short... if you have slaves attached it is suggested that you set a lower 445 | # limit for maxmemory so that there is some free RAM on the system for slave 446 | # output buffers (but this is not needed if the policy is 'noeviction'). 447 | # 448 | # maxmemory 449 | 450 | # MAXMEMORY POLICY: how Redis will select what to remove when maxmemory 451 | # is reached. You can select among five behaviors: 452 | # 453 | # volatile-lru -> remove the key with an expire set using an LRU algorithm 454 | # allkeys-lru -> remove any key according to the LRU algorithm 455 | # volatile-random -> remove a random key with an expire set 456 | # allkeys-random -> remove a random key, any key 457 | # volatile-ttl -> remove the key with the nearest expire time (minor TTL) 458 | # noeviction -> don't expire at all, just return an error on write operations 459 | # 460 | # Note: with any of the above policies, Redis will return an error on write 461 | # operations, when there are no suitable keys for eviction. 462 | # 463 | # At the date of writing these commands are: set setnx setex append 464 | # incr decr rpush lpush rpushx lpushx linsert lset rpoplpush sadd 465 | # sinter sinterstore sunion sunionstore sdiff sdiffstore zadd zincrby 466 | # zunionstore zinterstore hset hsetnx hmset hincrby incrby decrby 467 | # getset mset msetnx exec sort 468 | # 469 | # The default is: 470 | # 471 | maxmemory-policy volatile-lru 472 | 473 | # LRU and minimal TTL algorithms are not precise algorithms but approximated 474 | # algorithms (in order to save memory), so you can tune it for speed or 475 | # accuracy. For default Redis will check five keys and pick the one that was 476 | # used less recently, you can change the sample size using the following 477 | # configuration directive. 478 | # 479 | # The default of 5 produces good enough results. 10 Approximates very closely 480 | # true LRU but costs a bit more CPU. 3 is very fast but not very accurate. 481 | # 482 | # maxmemory-samples 5 483 | 484 | ############################## APPEND ONLY MODE ############################### 485 | 486 | # By default Redis asynchronously dumps the dataset on disk. This mode is 487 | # good enough in many applications, but an issue with the Redis process or 488 | # a power outage may result into a few minutes of writes lost (depending on 489 | # the configured save points). 490 | # 491 | # The Append Only File is an alternative persistence mode that provides 492 | # much better durability. For instance using the default data fsync policy 493 | # (see later in the config file) Redis can lose just one second of writes in a 494 | # dramatic event like a server power outage, or a single write if something 495 | # wrong with the Redis process itself happens, but the operating system is 496 | # still running correctly. 497 | # 498 | # AOF and RDB persistence can be enabled at the same time without problems. 499 | # If the AOF is enabled on startup Redis will load the AOF, that is the file 500 | # with the better durability guarantees. 501 | # 502 | # Please check http://redis.io/topics/persistence for more information. 503 | 504 | appendonly no 505 | 506 | # The name of the append only file (default: "appendonly.aof") 507 | 508 | appendfilename "appendonly.aof" 509 | 510 | # The fsync() call tells the Operating System to actually write data on disk 511 | # instead of waiting for more data in the output buffer. Some OS will really flush 512 | # data on disk, some other OS will just try to do it ASAP. 513 | # 514 | # Redis supports three different modes: 515 | # 516 | # no: don't fsync, just let the OS flush the data when it wants. Faster. 517 | # always: fsync after every write to the append only log. Slow, Safest. 518 | # everysec: fsync only one time every second. Compromise. 519 | # 520 | # The default is "everysec", as that's usually the right compromise between 521 | # speed and data safety. It's up to you to understand if you can relax this to 522 | # "no" that will let the operating system flush the output buffer when 523 | # it wants, for better performances (but if you can live with the idea of 524 | # some data loss consider the default persistence mode that's snapshotting), 525 | # or on the contrary, use "always" that's very slow but a bit safer than 526 | # everysec. 527 | # 528 | # More details please check the following article: 529 | # http://antirez.com/post/redis-persistence-demystified.html 530 | # 531 | # If unsure, use "everysec". 532 | 533 | # appendfsync always 534 | appendfsync everysec 535 | # appendfsync no 536 | 537 | # When the AOF fsync policy is set to always or everysec, and a background 538 | # saving process (a background save or AOF log background rewriting) is 539 | # performing a lot of I/O against the disk, in some Linux configurations 540 | # Redis may block too long on the fsync() call. Note that there is no fix for 541 | # this currently, as even performing fsync in a different thread will block 542 | # our synchronous write(2) call. 543 | # 544 | # In order to mitigate this problem it's possible to use the following option 545 | # that will prevent fsync() from being called in the main process while a 546 | # BGSAVE or BGREWRITEAOF is in progress. 547 | # 548 | # This means that while another child is saving, the durability of Redis is 549 | # the same as "appendfsync none". In practical terms, this means that it is 550 | # possible to lose up to 30 seconds of log in the worst scenario (with the 551 | # default Linux settings). 552 | # 553 | # If you have latency problems turn this to "yes". Otherwise leave it as 554 | # "no" that is the safest pick from the point of view of durability. 555 | 556 | no-appendfsync-on-rewrite no 557 | 558 | # Automatic rewrite of the append only file. 559 | # Redis is able to automatically rewrite the log file implicitly calling 560 | # BGREWRITEAOF when the AOF log size grows by the specified percentage. 561 | # 562 | # This is how it works: Redis remembers the size of the AOF file after the 563 | # latest rewrite (if no rewrite has happened since the restart, the size of 564 | # the AOF at startup is used). 565 | # 566 | # This base size is compared to the current size. If the current size is 567 | # bigger than the specified percentage, the rewrite is triggered. Also 568 | # you need to specify a minimal size for the AOF file to be rewritten, this 569 | # is useful to avoid rewriting the AOF file even if the percentage increase 570 | # is reached but it is still pretty small. 571 | # 572 | # Specify a percentage of zero in order to disable the automatic AOF 573 | # rewrite feature. 574 | 575 | auto-aof-rewrite-percentage 100 576 | auto-aof-rewrite-min-size 64mb 577 | 578 | # An AOF file may be found to be truncated at the end during the Redis 579 | # startup process, when the AOF data gets loaded back into memory. 580 | # This may happen when the system where Redis is running 581 | # crashes, especially when an ext4 filesystem is mounted without the 582 | # data=ordered option (however this can't happen when Redis itself 583 | # crashes or aborts but the operating system still works correctly). 584 | # 585 | # Redis can either exit with an error when this happens, or load as much 586 | # data as possible (the default now) and start if the AOF file is found 587 | # to be truncated at the end. The following option controls this behavior. 588 | # 589 | # If aof-load-truncated is set to yes, a truncated AOF file is loaded and 590 | # the Redis server starts emitting a log to inform the user of the event. 591 | # Otherwise if the option is set to no, the server aborts with an error 592 | # and refuses to start. When the option is set to no, the user requires 593 | # to fix the AOF file using the "redis-check-aof" utility before to restart 594 | # the server. 595 | # 596 | # Note that if the AOF file will be found to be corrupted in the middle 597 | # the server will still exit with an error. This option only applies when 598 | # Redis will try to read more data from the AOF file but not enough bytes 599 | # will be found. 600 | aof-load-truncated yes 601 | 602 | ################################ LUA SCRIPTING ############################### 603 | 604 | # Max execution time of a Lua script in milliseconds. 605 | # 606 | # If the maximum execution time is reached Redis will log that a script is 607 | # still in execution after the maximum allowed time and will start to 608 | # reply to queries with an error. 609 | # 610 | # When a long running script exceeds the maximum execution time only the 611 | # SCRIPT KILL and SHUTDOWN NOSAVE commands are available. The first can be 612 | # used to stop a script that did not yet called write commands. The second 613 | # is the only way to shut down the server in the case a write command was 614 | # already issued by the script but the user doesn't want to wait for the natural 615 | # termination of the script. 616 | # 617 | # Set it to 0 or a negative value for unlimited execution without warnings. 618 | lua-time-limit 5000 619 | 620 | ################################ REDIS CLUSTER ############################### 621 | # 622 | # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 623 | # WARNING EXPERIMENTAL: Redis Cluster is considered to be stable code, however 624 | # in order to mark it as "mature" we need to wait for a non trivial percentage 625 | # of users to deploy it in production. 626 | # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 627 | # 628 | # Normal Redis instances can't be part of a Redis Cluster; only nodes that are 629 | # started as cluster nodes can. In order to start a Redis instance as a 630 | # cluster node enable the cluster support uncommenting the following: 631 | # 632 | # cluster-enabled yes 633 | 634 | # Every cluster node has a cluster configuration file. This file is not 635 | # intended to be edited by hand. It is created and updated by Redis nodes. 636 | # Every Redis Cluster node requires a different cluster configuration file. 637 | # Make sure that instances running in the same system do not have 638 | # overlapping cluster configuration file names. 639 | # 640 | # cluster-config-file nodes-6379.conf 641 | 642 | # Cluster node timeout is the amount of milliseconds a node must be unreachable 643 | # for it to be considered in failure state. 644 | # Most other internal time limits are multiple of the node timeout. 645 | # 646 | # cluster-node-timeout 15000 647 | 648 | # A slave of a failing master will avoid to start a failover if its data 649 | # looks too old. 650 | # 651 | # There is no simple way for a slave to actually have a exact measure of 652 | # its "data age", so the following two checks are performed: 653 | # 654 | # 1) If there are multiple slaves able to failover, they exchange messages 655 | # in order to try to give an advantage to the slave with the best 656 | # replication offset (more data from the master processed). 657 | # Slaves will try to get their rank by offset, and apply to the start 658 | # of the failover a delay proportional to their rank. 659 | # 660 | # 2) Every single slave computes the time of the last interaction with 661 | # its master. This can be the last ping or command received (if the master 662 | # is still in the "connected" state), or the time that elapsed since the 663 | # disconnection with the master (if the replication link is currently down). 664 | # If the last interaction is too old, the slave will not try to failover 665 | # at all. 666 | # 667 | # The point "2" can be tuned by user. Specifically a slave will not perform 668 | # the failover if, since the last interaction with the master, the time 669 | # elapsed is greater than: 670 | # 671 | # (node-timeout * slave-validity-factor) + repl-ping-slave-period 672 | # 673 | # So for example if node-timeout is 30 seconds, and the slave-validity-factor 674 | # is 10, and assuming a default repl-ping-slave-period of 10 seconds, the 675 | # slave will not try to failover if it was not able to talk with the master 676 | # for longer than 310 seconds. 677 | # 678 | # A large slave-validity-factor may allow slaves with too old data to failover 679 | # a master, while a too small value may prevent the cluster from being able to 680 | # elect a slave at all. 681 | # 682 | # For maximum availability, it is possible to set the slave-validity-factor 683 | # to a value of 0, which means, that slaves will always try to failover the 684 | # master regardless of the last time they interacted with the master. 685 | # (However they'll always try to apply a delay proportional to their 686 | # offset rank). 687 | # 688 | # Zero is the only value able to guarantee that when all the partitions heal 689 | # the cluster will always be able to continue. 690 | # 691 | # cluster-slave-validity-factor 10 692 | 693 | # Cluster slaves are able to migrate to orphaned masters, that are masters 694 | # that are left without working slaves. This improves the cluster ability 695 | # to resist to failures as otherwise an orphaned master can't be failed over 696 | # in case of failure if it has no working slaves. 697 | # 698 | # Slaves migrate to orphaned masters only if there are still at least a 699 | # given number of other working slaves for their old master. This number 700 | # is the "migration barrier". A migration barrier of 1 means that a slave 701 | # will migrate only if there is at least 1 other working slave for its master 702 | # and so forth. It usually reflects the number of slaves you want for every 703 | # master in your cluster. 704 | # 705 | # Default is 1 (slaves migrate only if their masters remain with at least 706 | # one slave). To disable migration just set it to a very large value. 707 | # A value of 0 can be set but is useful only for debugging and dangerous 708 | # in production. 709 | # 710 | # cluster-migration-barrier 1 711 | 712 | # By default Redis Cluster nodes stop accepting queries if they detect there 713 | # is at least an hash slot uncovered (no available node is serving it). 714 | # This way if the cluster is partially down (for example a range of hash slots 715 | # are no longer covered) all the cluster becomes, eventually, unavailable. 716 | # It automatically returns available as soon as all the slots are covered again. 717 | # 718 | # However sometimes you want the subset of the cluster which is working, 719 | # to continue to accept queries for the part of the key space that is still 720 | # covered. In order to do so, just set the cluster-require-full-coverage 721 | # option to no. 722 | # 723 | # cluster-require-full-coverage yes 724 | 725 | # In order to setup your cluster make sure to read the documentation 726 | # available at http://redis.io web site. 727 | 728 | ################################## SLOW LOG ################################### 729 | 730 | # The Redis Slow Log is a system to log queries that exceeded a specified 731 | # execution time. The execution time does not include the I/O operations 732 | # like talking with the client, sending the reply and so forth, 733 | # but just the time needed to actually execute the command (this is the only 734 | # stage of command execution where the thread is blocked and can not serve 735 | # other requests in the meantime). 736 | # 737 | # You can configure the slow log with two parameters: one tells Redis 738 | # what is the execution time, in microseconds, to exceed in order for the 739 | # command to get logged, and the other parameter is the length of the 740 | # slow log. When a new command is logged the oldest one is removed from the 741 | # queue of logged commands. 742 | 743 | # The following time is expressed in microseconds, so 1000000 is equivalent 744 | # to one second. Note that a negative number disables the slow log, while 745 | # a value of zero forces the logging of every command. 746 | slowlog-log-slower-than 10000 747 | 748 | # There is no limit to this length. Just be aware that it will consume memory. 749 | # You can reclaim memory used by the slow log with SLOWLOG RESET. 750 | slowlog-max-len 128 751 | 752 | ################################ LATENCY MONITOR ############################## 753 | 754 | # The Redis latency monitoring subsystem samples different operations 755 | # at runtime in order to collect data related to possible sources of 756 | # latency of a Redis instance. 757 | # 758 | # Via the LATENCY command this information is available to the user that can 759 | # print graphs and obtain reports. 760 | # 761 | # The system only logs operations that were performed in a time equal or 762 | # greater than the amount of milliseconds specified via the 763 | # latency-monitor-threshold configuration directive. When its value is set 764 | # to zero, the latency monitor is turned off. 765 | # 766 | # By default latency monitoring is disabled since it is mostly not needed 767 | # if you don't have latency issues, and collecting data has a performance 768 | # impact, that while very small, can be measured under big load. Latency 769 | # monitoring can easily be enabled at runtime using the command 770 | # "CONFIG SET latency-monitor-threshold " if needed. 771 | latency-monitor-threshold 0 772 | 773 | ############################# EVENT NOTIFICATION ############################## 774 | 775 | # Redis can notify Pub/Sub clients about events happening in the key space. 776 | # This feature is documented at http://redis.io/topics/notifications 777 | # 778 | # For instance if keyspace events notification is enabled, and a client 779 | # performs a DEL operation on key "foo" stored in the Database 0, two 780 | # messages will be published via Pub/Sub: 781 | # 782 | # PUBLISH __keyspace@0__:foo del 783 | # PUBLISH __keyevent@0__:del foo 784 | # 785 | # It is possible to select the events that Redis will notify among a set 786 | # of classes. Every class is identified by a single character: 787 | # 788 | # K Keyspace events, published with __keyspace@__ prefix. 789 | # E Keyevent events, published with __keyevent@__ prefix. 790 | # g Generic commands (non-type specific) like DEL, EXPIRE, RENAME, ... 791 | # $ String commands 792 | # l List commands 793 | # s Set commands 794 | # h Hash commands 795 | # z Sorted set commands 796 | # x Expired events (events generated every time a key expires) 797 | # e Evicted events (events generated when a key is evicted for maxmemory) 798 | # A Alias for g$lshzxe, so that the "AKE" string means all the events. 799 | # 800 | # The "notify-keyspace-events" takes as argument a string that is composed 801 | # of zero or multiple characters. The empty string means that notifications 802 | # are disabled. 803 | # 804 | # Example: to enable list and generic events, from the point of view of the 805 | # event name, use: 806 | # 807 | # notify-keyspace-events Elg 808 | # 809 | # Example 2: to get the stream of the expired keys subscribing to channel 810 | # name __keyevent@0__:expired use: 811 | # 812 | # notify-keyspace-events Ex 813 | # 814 | # By default all notifications are disabled because most users don't need 815 | # this feature and the feature has some overhead. Note that if you don't 816 | # specify at least one of K or E, no events will be delivered. 817 | notify-keyspace-events "" 818 | 819 | ############################### ADVANCED CONFIG ############################### 820 | 821 | # Hashes are encoded using a memory efficient data structure when they have a 822 | # small number of entries, and the biggest entry does not exceed a given 823 | # threshold. These thresholds can be configured using the following directives. 824 | hash-max-ziplist-entries 512 825 | hash-max-ziplist-value 64 826 | 827 | # Similarly to hashes, small lists are also encoded in a special way in order 828 | # to save a lot of space. The special representation is only used when 829 | # you are under the following limits: 830 | list-max-ziplist-entries 512 831 | list-max-ziplist-value 64 832 | 833 | # Sets have a special encoding in just one case: when a set is composed 834 | # of just strings that happen to be integers in radix 10 in the range 835 | # of 64 bit signed integers. 836 | # The following configuration setting sets the limit in the size of the 837 | # set in order to use this special memory saving encoding. 838 | set-max-intset-entries 512 839 | 840 | # Similarly to hashes and lists, sorted sets are also specially encoded in 841 | # order to save a lot of space. This encoding is only used when the length and 842 | # elements of a sorted set are below the following limits: 843 | zset-max-ziplist-entries 128 844 | zset-max-ziplist-value 64 845 | 846 | # HyperLogLog sparse representation bytes limit. The limit includes the 847 | # 16 bytes header. When an HyperLogLog using the sparse representation crosses 848 | # this limit, it is converted into the dense representation. 849 | # 850 | # A value greater than 16000 is totally useless, since at that point the 851 | # dense representation is more memory efficient. 852 | # 853 | # The suggested value is ~ 3000 in order to have the benefits of 854 | # the space efficient encoding without slowing down too much PFADD, 855 | # which is O(N) with the sparse encoding. The value can be raised to 856 | # ~ 10000 when CPU is not a concern, but space is, and the data set is 857 | # composed of many HyperLogLogs with cardinality in the 0 - 15000 range. 858 | hll-sparse-max-bytes 3000 859 | 860 | # Active rehashing uses 1 millisecond every 100 milliseconds of CPU time in 861 | # order to help rehashing the main Redis hash table (the one mapping top-level 862 | # keys to values). The hash table implementation Redis uses (see dict.c) 863 | # performs a lazy rehashing: the more operation you run into a hash table 864 | # that is rehashing, the more rehashing "steps" are performed, so if the 865 | # server is idle the rehashing is never complete and some more memory is used 866 | # by the hash table. 867 | # 868 | # The default is to use this millisecond 10 times every second in order to 869 | # actively rehash the main dictionaries, freeing memory when possible. 870 | # 871 | # If unsure: 872 | # use "activerehashing no" if you have hard latency requirements and it is 873 | # not a good thing in your environment that Redis can reply from time to time 874 | # to queries with 2 milliseconds delay. 875 | # 876 | # use "activerehashing yes" if you don't have such hard requirements but 877 | # want to free memory asap when possible. 878 | activerehashing yes 879 | 880 | # The client output buffer limits can be used to force disconnection of clients 881 | # that are not reading data from the server fast enough for some reason (a 882 | # common reason is that a Pub/Sub client can't consume messages as fast as the 883 | # publisher can produce them). 884 | # 885 | # The limit can be set differently for the three different classes of clients: 886 | # 887 | # normal -> normal clients including MONITOR clients 888 | # slave -> slave clients 889 | # pubsub -> clients subscribed to at least one pubsub channel or pattern 890 | # 891 | # The syntax of every client-output-buffer-limit directive is the following: 892 | # 893 | # client-output-buffer-limit 894 | # 895 | # A client is immediately disconnected once the hard limit is reached, or if 896 | # the soft limit is reached and remains reached for the specified number of 897 | # seconds (continuously). 898 | # So for instance if the hard limit is 32 megabytes and the soft limit is 899 | # 16 megabytes / 10 seconds, the client will get disconnected immediately 900 | # if the size of the output buffers reach 32 megabytes, but will also get 901 | # disconnected if the client reaches 16 megabytes and continuously overcomes 902 | # the limit for 10 seconds. 903 | # 904 | # By default normal clients are not limited because they don't receive data 905 | # without asking (in a push way), but just after a request, so only 906 | # asynchronous clients may create a scenario where data is requested faster 907 | # than it can read. 908 | # 909 | # Instead there is a default limit for pubsub and slave clients, since 910 | # subscribers and slaves receive data in a push fashion. 911 | # 912 | # Both the hard or the soft limit can be disabled by setting them to zero. 913 | client-output-buffer-limit normal 0 0 0 914 | client-output-buffer-limit slave 256mb 64mb 60 915 | client-output-buffer-limit pubsub 32mb 8mb 60 916 | 917 | # Redis calls an internal function to perform many background tasks, like 918 | # closing connections of clients in timeout, purging expired keys that are 919 | # never requested, and so forth. 920 | # 921 | # Not all tasks are performed with the same frequency, but Redis checks for 922 | # tasks to perform according to the specified "hz" value. 923 | # 924 | # By default "hz" is set to 10. Raising the value will use more CPU when 925 | # Redis is idle, but at the same time will make Redis more responsive when 926 | # there are many keys expiring at the same time, and timeouts may be 927 | # handled with more precision. 928 | # 929 | # The range is between 1 and 500, however a value over 100 is usually not 930 | # a good idea. Most users should use the default of 10 and raise this up to 931 | # 100 only in environments where very low latency is required. 932 | hz 10 933 | 934 | # When a child rewrites the AOF file, if the following option is enabled 935 | # the file will be fsync-ed every 32 MB of data generated. This is useful 936 | # in order to commit the file to the disk more incrementally and avoid 937 | # big latency spikes. 938 | aof-rewrite-incremental-fsync yes 939 | -------------------------------------------------------------------------------- /lib/triple_pattern_search.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | triple_pattern_search. 3 | 4 | This script is licensed under the GNU Affero version 3. Copyrighted 5 | 2015 by Jeremy Nelson 6 | --]] 7 | --[[if redis.pcall("exists", KEYS[1]) then 8 | local subject, predicate, object = string.match("^(%a+):(%a+):(%a+)") 9 | output 10 | --]] 11 | local output = {} 12 | local cursor = ARGV[1] 13 | if not cursor then 14 | cursor = 0 15 | end 16 | cursor, result = redis.pcall("scan", cursor, "match="..KEYS[1], "count=100") 17 | for i,key_digest in ipairs(result) do 18 | --[[Should preprocess result from get call to support namespaces --]] 19 | output[i] = redis.pcall("get", key_digest) 20 | end 21 | return output 22 | 23 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | aioredis 2 | aiohttp 3 | falcon 4 | flask 5 | rdflib 6 | redis 7 | requests 8 | -------------------------------------------------------------------------------- /server.py: -------------------------------------------------------------------------------- 1 | __author__ = "Jeremy Nelson, Aaron Coburn, Mark Matienzo" 2 | 3 | import argparse 4 | import asyncio 5 | from aiohttp import web 6 | import cache.aio as cache 7 | import json 8 | import rdflib 9 | import shlex 10 | 11 | try: 12 | from config import config 13 | except ImportError: 14 | config = {"debug": True, 15 | "cache": "Cache", 16 | "host": "0.0.0.0", 17 | "port": 7000, 18 | "redis": {"host": "localhost", 19 | "port": 6379, 20 | "ttl": 604800}, 21 | # Blazegraph SPARQL Endpoint 22 | "triplestore": {"host": "localhost", 23 | "port": 8080, 24 | "path": "bigdata"}, 25 | 26 | 27 | } 28 | 29 | 30 | @asyncio.coroutine 31 | def check_add(resource): 32 | """Coroutine attempts to retrieve an URL or Literal 33 | value from cache, 34 | if not present in cache, attempts to retrieve the sha1 35 | hashed value from the cache, otherwise adds the subject 36 | to the cache with the serialized value. 37 | 38 | Args: 39 | value -- Subject value 40 | """ 41 | rHash = cache.add_get_key(resouce) 42 | return rHash 43 | 44 | @asyncio.coroutine 45 | def handle_triple(request): 46 | if request.method.startswith('POST'): 47 | data = request.POST 48 | elif request.method.startswith('GET'): 49 | data = request.GET 50 | else: 51 | data = {} 52 | subject_key = yield from cache.get_digest(data.get('s')) 53 | predicate_key = yield from cache.get_digest(data.get('p')) 54 | object_key = yield from cache.get_digest(data.get('o')) 55 | result = yield from cache.get_triple(subject_key, predicate_key, object_key) 56 | output = {"subject": data.get('s'), 57 | "predicate-objects": []} 58 | 59 | for triple_key in result: 60 | triples = triple_key.decode().split(":") 61 | predicate = yield from cache.get_value(triples[1]) 62 | object_ = yield from cache.get_value(triples[-1]) 63 | output["predicate-objects"].append( 64 | {"p": predicate, 65 | "o": object_}) 66 | return web.Response(body=json.dumps(output).encode(), 67 | content_type="application/json") 68 | 69 | 70 | @asyncio.coroutine 71 | def init_http_server(loop): 72 | app = web.Application(loop=loop) 73 | app.router.add_route('GET', '/', handle_triple) 74 | server = yield from loop.create_server(app.make_handler(), 75 | config.get('host'), 76 | config.get('port')) 77 | if config.get('debug'): 78 | print("Running HTTP Server at {} {}".format(config.get('host'), 79 | config.get('port'))) 80 | return server 81 | 82 | 83 | @asyncio.coroutine 84 | def init_socket_server(loop): 85 | server = yield from loop.create_server(LinkedDataFragmentsServer, 86 | config.get('port'), 87 | config.get(7000)) 88 | if config.get('debug'): 89 | print("Running Socket Server at {} {}".format(config.get('port'), 90 | config.get(7000))) 91 | return server 92 | 93 | 94 | @asyncio.coroutine 95 | def sparql_subject(value): 96 | return "Need SPARQL query" 97 | 98 | class LinkedDataFragmentsServer(asyncio.Protocol): 99 | 100 | def connection_made(self, transport): 101 | """Method 102 | Args: 103 | transport -- ? 104 | """ 105 | self.transport = transport 106 | #print("transport type={} methods={}".format(type(self.transport), dir(self.transport))) 107 | 108 | def data_received(self, data): 109 | """Method receives incoming HTTP request data 110 | 111 | Args: 112 | data -- ? 113 | """ 114 | print(data, type(data)) 115 | self.transport.write("{}".format("Response").encode()) 116 | self.transport.close() 117 | 118 | 119 | if __name__ == '__main__': 120 | parser = argparse.ArgumentParser() 121 | parser.add_argument( 122 | 'action', 123 | choices=['socket', 'http'], 124 | default='http', 125 | help='Run server as either: socket, http, default is http') 126 | args = parser.parse_args() 127 | loop = asyncio.get_event_loop() 128 | if args.action.lower().startswith('socket'): 129 | server = loop.run_until_complete(init_socket_server(loop)) 130 | elif args.action.lower().startswith('http'): 131 | server = loop.run_until_complete(init_http_server(loop)) 132 | try: 133 | loop.run_forever() 134 | finally: 135 | server.close() 136 | loop.close() 137 | 138 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | __author__ = "Jeremy Nelson" 2 | -------------------------------------------------------------------------------- /tests.py: -------------------------------------------------------------------------------- 1 | __author__ = "Jeremy Nelson, Aaron Coburn, Mark Matienzo" 2 | __license__ = "GPL Affero" 3 | 4 | import unittest 5 | import server 6 | 7 | if __name__ == '__main__': 8 | unittest.main() 9 | --------------------------------------------------------------------------------