├── .gitignore ├── CMakeLists.txt ├── LICENSE ├── README.md ├── beer-sample.zip ├── cbdocloader ├── gamesim-sample.zip ├── generate-travel-sample.rb ├── travel-sample.zip └── wrapper └── wrapper /.gitignore: -------------------------------------------------------------------------------- 1 | # Keep the entries sorted to reduce the risk for a merge conflict 2 | *.pyc 3 | *.tar.gz 4 | *~ 5 | .DS_Store 6 | /CMakeFiles/ 7 | /CTestTestfile.cmake 8 | /Makefile 9 | /Makefile.in 10 | /TAGS 11 | /cmake_install.cmake 12 | /tags 13 | /wrapper/cbdocloader 14 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | PROJECT(CouchbasePythonClient) 2 | CMAKE_MINIMUM_REQUIRED(VERSION 2.8) 3 | 4 | # Generate the python wrappers 5 | CONFIGURE_FILE (${CMAKE_CURRENT_SOURCE_DIR}/wrapper/wrapper 6 | ${CMAKE_CURRENT_BINARY_DIR}/wrapper/cbdocloader) 7 | 8 | INSTALL(PROGRAMS 9 | ${CMAKE_CURRENT_BINARY_DIR}/wrapper/cbdocloader 10 | DESTINATION bin) 11 | 12 | INSTALL(PROGRAMS 13 | cbdocloader 14 | DESTINATION lib/python) 15 | 16 | INSTALL(FILES 17 | beer-sample.zip 18 | gamesim-sample.zip 19 | travel-sample.zip 20 | DESTINATION samples) 21 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | For the gamsim sample: 2 | 3 | Apache License 4 | Version 2.0, January 2004 5 | http://www.apache.org/licenses/ 6 | 7 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 8 | 9 | 1. Definitions. 10 | 11 | "License" shall mean the terms and conditions for use, reproduction, 12 | and distribution as defined by Sections 1 through 9 of this document. 13 | 14 | "Licensor" shall mean the copyright owner or entity authorized by 15 | the copyright owner that is granting the License. 16 | 17 | "Legal Entity" shall mean the union of the acting entity and all 18 | other entities that control, are controlled by, or are under common 19 | control with that entity. For the purposes of this definition, 20 | "control" means (i) the power, direct or indirect, to cause the 21 | direction or management of such entity, whether by contract or 22 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 23 | outstanding shares, or (iii) beneficial ownership of such entity. 24 | 25 | "You" (or "Your") shall mean an individual or Legal Entity 26 | exercising permissions granted by this License. 27 | 28 | "Source" form shall mean the preferred form for making modifications, 29 | including but not limited to software source code, documentation 30 | source, and configuration files. 31 | 32 | "Object" form shall mean any form resulting from mechanical 33 | transformation or translation of a Source form, including but 34 | not limited to compiled object code, generated documentation, 35 | and conversions to other media types. 36 | 37 | "Work" shall mean the work of authorship, whether in Source or 38 | Object form, made available under the License, as indicated by a 39 | copyright notice that is included in or attached to the work 40 | (an example is provided in the Appendix below). 41 | 42 | "Derivative Works" shall mean any work, whether in Source or Object 43 | form, that is based on (or derived from) the Work and for which the 44 | editorial revisions, annotations, elaborations, or other modifications 45 | represent, as a whole, an original work of authorship. For the purposes 46 | of this License, Derivative Works shall not include works that remain 47 | separable from, or merely link (or bind by name) to the interfaces of, 48 | the Work and Derivative Works thereof. 49 | 50 | "Contribution" shall mean any work of authorship, including 51 | the original version of the Work and any modifications or additions 52 | to that Work or Derivative Works thereof, that is intentionally 53 | submitted to Licensor for inclusion in the Work by the copyright owner 54 | or by an individual or Legal Entity authorized to submit on behalf of 55 | the copyright owner. For the purposes of this definition, "submitted" 56 | means any form of electronic, verbal, or written communication sent 57 | to the Licensor or its representatives, including but not limited to 58 | communication on electronic mailing lists, source code control systems, 59 | and issue tracking systems that are managed by, or on behalf of, the 60 | Licensor for the purpose of discussing and improving the Work, but 61 | excluding communication that is conspicuously marked or otherwise 62 | designated in writing by the copyright owner as "Not a Contribution." 63 | 64 | "Contributor" shall mean Licensor and any individual or Legal Entity 65 | on behalf of whom a Contribution has been received by Licensor and 66 | subsequently incorporated within the Work. 67 | 68 | 2. Grant of Copyright License. Subject to the terms and conditions of 69 | this License, each Contributor hereby grants to You a perpetual, 70 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 71 | copyright license to reproduce, prepare Derivative Works of, 72 | publicly display, publicly perform, sublicense, and distribute the 73 | Work and such Derivative Works in Source or Object form. 74 | 75 | 3. Grant of Patent License. Subject to the terms and conditions of 76 | this License, each Contributor hereby grants to You a perpetual, 77 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 78 | (except as stated in this section) patent license to make, have made, 79 | use, offer to sell, sell, import, and otherwise transfer the Work, 80 | where such license applies only to those patent claims licensable 81 | by such Contributor that are necessarily infringed by their 82 | Contribution(s) alone or by combination of their Contribution(s) 83 | with the Work to which such Contribution(s) was submitted. If You 84 | institute patent litigation against any entity (including a 85 | cross-claim or counterclaim in a lawsuit) alleging that the Work 86 | or a Contribution incorporated within the Work constitutes direct 87 | or contributory patent infringement, then any patent licenses 88 | granted to You under this License for that Work shall terminate 89 | as of the date such litigation is filed. 90 | 91 | 4. Redistribution. You may reproduce and distribute copies of the 92 | Work or Derivative Works thereof in any medium, with or without 93 | modifications, and in Source or Object form, provided that You 94 | meet the following conditions: 95 | 96 | (a) You must give any other recipients of the Work or 97 | Derivative Works a copy of this License; and 98 | 99 | (b) You must cause any modified files to carry prominent notices 100 | stating that You changed the files; and 101 | 102 | (c) You must retain, in the Source form of any Derivative Works 103 | that You distribute, all copyright, patent, trademark, and 104 | attribution notices from the Source form of the Work, 105 | excluding those notices that do not pertain to any part of 106 | the Derivative Works; and 107 | 108 | (d) If the Work includes a "NOTICE" text file as part of its 109 | distribution, then any Derivative Works that You distribute must 110 | include a readable copy of the attribution notices contained 111 | within such NOTICE file, excluding those notices that do not 112 | pertain to any part of the Derivative Works, in at least one 113 | of the following places: within a NOTICE text file distributed 114 | as part of the Derivative Works; within the Source form or 115 | documentation, if provided along with the Derivative Works; or, 116 | within a display generated by the Derivative Works, if and 117 | wherever such third-party notices normally appear. The contents 118 | of the NOTICE file are for informational purposes only and 119 | do not modify the License. You may add Your own attribution 120 | notices within Derivative Works that You distribute, alongside 121 | or as an addendum to the NOTICE text from the Work, provided 122 | that such additional attribution notices cannot be construed 123 | as modifying the License. 124 | 125 | You may add Your own copyright statement to Your modifications and 126 | may provide additional or different license terms and conditions 127 | for use, reproduction, or distribution of Your modifications, or 128 | for any such Derivative Works as a whole, provided Your use, 129 | reproduction, and distribution of the Work otherwise complies with 130 | the conditions stated in this License. 131 | 132 | 5. Submission of Contributions. Unless You explicitly state otherwise, 133 | any Contribution intentionally submitted for inclusion in the Work 134 | by You to the Licensor shall be under the terms and conditions of 135 | this License, without any additional terms or conditions. 136 | Notwithstanding the above, nothing herein shall supersede or modify 137 | the terms of any separate license agreement you may have executed 138 | with Licensor regarding such Contributions. 139 | 140 | 6. Trademarks. This License does not grant permission to use the trade 141 | names, trademarks, service marks, or product names of the Licensor, 142 | except as required for reasonable and customary use in describing the 143 | origin of the Work and reproducing the content of the NOTICE file. 144 | 145 | 7. Disclaimer of Warranty. Unless required by applicable law or 146 | agreed to in writing, Licensor provides the Work (and each 147 | Contributor provides its Contributions) on an "AS IS" BASIS, 148 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 149 | implied, including, without limitation, any warranties or conditions 150 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 151 | PARTICULAR PURPOSE. You are solely responsible for determining the 152 | appropriateness of using or redistributing the Work and assume any 153 | risks associated with Your exercise of permissions under this License. 154 | 155 | 8. Limitation of Liability. In no event and under no legal theory, 156 | whether in tort (including negligence), contract, or otherwise, 157 | unless required by applicable law (such as deliberate and grossly 158 | negligent acts) or agreed to in writing, shall any Contributor be 159 | liable to You for damages, including any direct, indirect, special, 160 | incidental, or consequential damages of any character arising as a 161 | result of this License or out of the use or inability to use the 162 | Work (including but not limited to damages for loss of goodwill, 163 | work stoppage, computer failure or malfunction, or any and all 164 | other commercial damages or losses), even if such Contributor 165 | has been advised of the possibility of such damages. 166 | 167 | 9. Accepting Warranty or Additional Liability. While redistributing 168 | the Work or Derivative Works thereof, You may choose to offer, 169 | and charge a fee for, acceptance of support, warranty, indemnity, 170 | or other liability obligations and/or rights consistent with this 171 | License. However, in accepting such obligations, You may act only 172 | on Your own behalf and on Your sole responsibility, not on behalf 173 | of any other Contributor, and only if You agree to indemnify, 174 | defend, and hold each Contributor harmless for any liability 175 | incurred by, or claims asserted against, such Contributor by reason 176 | of your accepting any such warranty or additional liability. 177 | 178 | END OF TERMS AND CONDITIONS 179 | 180 | APPENDIX: How to apply the Apache License to your work. 181 | 182 | To apply the Apache License to your work, attach the following 183 | boilerplate notice, with the fields enclosed by brackets "[]" 184 | replaced with your own identifying information. (Don't include 185 | the brackets!) The text should be enclosed in the appropriate 186 | comment syntax for the file format. We also recommend that a 187 | file or class name and description of purpose be included on the 188 | same "printed page" as the copyright notice for easier 189 | identification within third-party archives. 190 | 191 | Copyright [yyyy] [name of copyright owner] 192 | 193 | Licensed under the Apache License, Version 2.0 (the "License"); 194 | you may not use this file except in compliance with the License. 195 | You may obtain a copy of the License at 196 | 197 | http://www.apache.org/licenses/LICENSE-2.0 198 | 199 | Unless required by applicable law or agreed to in writing, software 200 | distributed under the License is distributed on an "AS IS" BASIS, 201 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 202 | See the License for the specific language governing permissions and 203 | limitations under the License. 204 | 205 | 206 | For the beer sample: 207 | 208 | ## ODC Open Database License (ODbL) 209 | 210 | ### Preamble 211 | 212 | The Open Database License (ODbL) is a license agreement intended to 213 | allow users to freely share, modify, and use this Database while 214 | maintaining this same freedom for others. Many databases are covered by 215 | copyright, and therefore this document licenses these rights. Some 216 | jurisdictions, mainly in the European Union, have specific rights that 217 | cover databases, and so the ODbL addresses these rights, too. Finally, 218 | the ODbL is also an agreement in contract for users of this Database to 219 | act in certain ways in return for accessing this Database. 220 | 221 | Databases can contain a wide variety of types of content (images, 222 | audiovisual material, and sounds all in the same database, for example), 223 | and so the ODbL only governs the rights over the Database, and not the 224 | contents of the Database individually. Licensors should use the ODbL 225 | together with another license for the contents, if the contents have a 226 | single set of rights that uniformly covers all of the contents. If the 227 | contents have multiple sets of different rights, Licensors should 228 | describe what rights govern what contents together in the individual 229 | record or in some other way that clarifies what rights apply. 230 | 231 | Sometimes the contents of a database, or the database itself, can be 232 | covered by other rights not addressed here (such as private contracts, 233 | trade mark over the name, or privacy rights / data protection rights 234 | over information in the contents), and so you are advised that you may 235 | have to consult other documents or clear other rights before doing 236 | activities not covered by this License. 237 | 238 | ------ 239 | 240 | The Licensor (as defined below) 241 | 242 | and 243 | 244 | You (as defined below) 245 | 246 | agree as follows: 247 | 248 | ### 1.0 Definitions of Capitalised Words 249 | 250 | "Collective Database" – Means this Database in unmodified form as part 251 | of a collection of independent databases in themselves that together are 252 | assembled into a collective whole. A work that constitutes a Collective 253 | Database will not be considered a Derivative Database. 254 | 255 | "Convey" – As a verb, means Using the Database, a Derivative Database, 256 | or the Database as part of a Collective Database in any way that enables 257 | a Person to make or receive copies of the Database or a Derivative 258 | Database. Conveying does not include interaction with a user through a 259 | computer network, or creating and Using a Produced Work, where no 260 | transfer of a copy of the Database or a Derivative Database occurs. 261 | "Contents" – The contents of this Database, which includes the 262 | information, independent works, or other material collected into the 263 | Database. For example, the contents of the Database could be factual 264 | data or works such as images, audiovisual material, text, or sounds. 265 | 266 | "Database" – A collection of material (the Contents) arranged in a 267 | systematic or methodical way and individually accessible by electronic 268 | or other means offered under the terms of this License. 269 | 270 | "Database Directive" – Means Directive 96/9/EC of the European 271 | Parliament and of the Council of 11 March 1996 on the legal protection 272 | of databases, as amended or succeeded. 273 | 274 | "Database Right" – Means rights resulting from the Chapter III ("sui 275 | generis") rights in the Database Directive (as amended and as transposed 276 | by member states), which includes the Extraction and Re-utilisation of 277 | the whole or a Substantial part of the Contents, as well as any similar 278 | rights available in the relevant jurisdiction under Section 10.4. 279 | 280 | "Derivative Database" – Means a database based upon the Database, and 281 | includes any translation, adaptation, arrangement, modification, or any 282 | other alteration of the Database or of a Substantial part of the 283 | Contents. This includes, but is not limited to, Extracting or 284 | Re-utilising the whole or a Substantial part of the Contents in a new 285 | Database. 286 | 287 | "Extraction" – Means the permanent or temporary transfer of all or a 288 | Substantial part of the Contents to another medium by any means or in 289 | any form. 290 | 291 | "License" – Means this license agreement and is both a license of rights 292 | such as copyright and Database Rights and an agreement in contract. 293 | 294 | "Licensor" – Means the Person that offers the Database under the terms 295 | of this License. 296 | 297 | "Person" – Means a natural or legal person or a body of persons 298 | corporate or incorporate. 299 | 300 | "Produced Work" – a work (such as an image, audiovisual material, text, 301 | or sounds) resulting from using the whole or a Substantial part of the 302 | Contents (via a search or other query) from this Database, a Derivative 303 | Database, or this Database as part of a Collective Database. 304 | 305 | "Publicly" – means to Persons other than You or under Your control by 306 | either more than 50% ownership or by the power to direct their 307 | activities (such as contracting with an independent consultant). 308 | 309 | "Re-utilisation" – means any form of making available to the public all 310 | or a Substantial part of the Contents by the distribution of copies, by 311 | renting, by online or other forms of transmission. 312 | 313 | "Substantial" – Means substantial in terms of quantity or quality or a 314 | combination of both. The repeated and systematic Extraction or 315 | Re-utilisation of insubstantial parts of the Contents may amount to the 316 | Extraction or Re-utilisation of a Substantial part of the Contents. 317 | 318 | "Use" – As a verb, means doing any act that is restricted by copyright 319 | or Database Rights whether in the original medium or any other; and 320 | includes without limitation distributing, copying, publicly performing, 321 | publicly displaying, and preparing derivative works of the Database, as 322 | well as modifying the Database as may be technically necessary to use it 323 | in a different mode or format. 324 | 325 | "You" – Means a Person exercising rights under this License who has not 326 | previously violated the terms of this License with respect to the 327 | Database, or who has received express permission from the Licensor to 328 | exercise rights under this License despite a previous violation. 329 | 330 | Words in the singular include the plural and vice versa. 331 | 332 | ### 2.0 What this License covers 333 | 334 | 2.1. Legal effect of this document. This License is: 335 | 336 | a. A license of applicable copyright and neighbouring rights; 337 | 338 | b. A license of the Database Right; and 339 | 340 | c. An agreement in contract between You and the Licensor. 341 | 342 | 2.2 Legal rights covered. This License covers the legal rights in the 343 | Database, including: 344 | 345 | a. Copyright. Any copyright or neighbouring rights in the Database. 346 | The copyright licensed includes any individual elements of the 347 | Database, but does not cover the copyright over the Contents 348 | independent of this Database. See Section 2.4 for details. Copyright 349 | law varies between jurisdictions, but is likely to cover: the Database 350 | model or schema, which is the structure, arrangement, and organisation 351 | of the Database, and can also include the Database tables and table 352 | indexes; the data entry and output sheets; and the Field names of 353 | Contents stored in the Database; 354 | 355 | b. Database Rights. Database Rights only extend to the Extraction and 356 | Re-utilisation of the whole or a Substantial part of the Contents. 357 | Database Rights can apply even when there is no copyright over the 358 | Database. Database Rights can also apply when the Contents are removed 359 | from the Database and are selected and arranged in a way that would 360 | not infringe any applicable copyright; and 361 | 362 | c. Contract. This is an agreement between You and the Licensor for 363 | access to the Database. In return you agree to certain conditions of 364 | use on this access as outlined in this License. 365 | 366 | 2.3 Rights not covered. 367 | 368 | a. This License does not apply to computer programs used in the making 369 | or operation of the Database; 370 | 371 | b. This License does not cover any patents over the Contents or the 372 | Database; and 373 | 374 | c. This License does not cover any trademarks associated with the 375 | Database. 376 | 377 | 2.4 Relationship to Contents in the Database. The individual items of 378 | the Contents contained in this Database may be covered by other rights, 379 | including copyright, patent, data protection, privacy, or personality 380 | rights, and this License does not cover any rights (other than Database 381 | Rights or in contract) in individual Contents contained in the Database. 382 | For example, if used on a Database of images (the Contents), this 383 | License would not apply to copyright over individual images, which could 384 | have their own separate licenses, or one single license covering all of 385 | the rights over the images. 386 | 387 | ### 3.0 Rights granted 388 | 389 | 3.1 Subject to the terms and conditions of this License, the Licensor 390 | grants to You a worldwide, royalty-free, non-exclusive, terminable (but 391 | only under Section 9) license to Use the Database for the duration of 392 | any applicable copyright and Database Rights. These rights explicitly 393 | include commercial use, and do not exclude any field of endeavour. To 394 | the extent possible in the relevant jurisdiction, these rights may be 395 | exercised in all media and formats whether now known or created in the 396 | future. 397 | 398 | The rights granted cover, for example: 399 | 400 | a. Extraction and Re-utilisation of the whole or a Substantial part of 401 | the Contents; 402 | 403 | b. Creation of Derivative Databases; 404 | 405 | c. Creation of Collective Databases; 406 | 407 | d. Creation of temporary or permanent reproductions by any means and 408 | in any form, in whole or in part, including of any Derivative 409 | Databases or as a part of Collective Databases; and 410 | 411 | e. Distribution, communication, display, lending, making available, or 412 | performance to the public by any means and in any form, in whole or in 413 | part, including of any Derivative Database or as a part of Collective 414 | Databases. 415 | 416 | 3.2 Compulsory license schemes. For the avoidance of doubt: 417 | 418 | a. Non-waivable compulsory license schemes. In those jurisdictions in 419 | which the right to collect royalties through any statutory or 420 | compulsory licensing scheme cannot be waived, the Licensor reserves 421 | the exclusive right to collect such royalties for any exercise by You 422 | of the rights granted under this License; 423 | 424 | b. Waivable compulsory license schemes. In those jurisdictions in 425 | which the right to collect royalties through any statutory or 426 | compulsory licensing scheme can be waived, the Licensor waives the 427 | exclusive right to collect such royalties for any exercise by You of 428 | the rights granted under this License; and, 429 | 430 | c. Voluntary license schemes. The Licensor waives the right to collect 431 | royalties, whether individually or, in the event that the Licensor is 432 | a member of a collecting society that administers voluntary licensing 433 | schemes, via that society, from any exercise by You of the rights 434 | granted under this License. 435 | 436 | 3.3 The right to release the Database under different terms, or to stop 437 | distributing or making available the Database, is reserved. Note that 438 | this Database may be multiple-licensed, and so You may have the choice 439 | of using alternative licenses for this Database. Subject to Section 440 | 10.4, all other rights not expressly granted by Licensor are reserved. 441 | 442 | ### 4.0 Conditions of Use 443 | 444 | 4.1 The rights granted in Section 3 above are expressly made subject to 445 | Your complying with the following conditions of use. These are important 446 | conditions of this License, and if You fail to follow them, You will be 447 | in material breach of its terms. 448 | 449 | 4.2 Notices. If You Publicly Convey this Database, any Derivative 450 | Database, or the Database as part of a Collective Database, then You 451 | must: 452 | 453 | a. Do so only under the terms of this License or another license 454 | permitted under Section 4.4; 455 | 456 | b. Include a copy of this License (or, as applicable, a license 457 | permitted under Section 4.4) or its Uniform Resource Identifier (URI) 458 | with the Database or Derivative Database, including both in the 459 | Database or Derivative Database and in any relevant documentation; and 460 | 461 | c. Keep intact any copyright or Database Right notices and notices 462 | that refer to this License. 463 | 464 | d. If it is not possible to put the required notices in a particular 465 | file due to its structure, then You must include the notices in a 466 | location (such as a relevant directory) where users would be likely to 467 | look for it. 468 | 469 | 4.3 Notice for using output (Contents). Creating and Using a Produced 470 | Work does not require the notice in Section 4.2. However, if you 471 | Publicly Use a Produced Work, You must include a notice associated with 472 | the Produced Work reasonably calculated to make any Person that uses, 473 | views, accesses, interacts with, or is otherwise exposed to the Produced 474 | Work aware that Content was obtained from the Database, Derivative 475 | Database, or the Database as part of a Collective Database, and that it 476 | is available under this License. 477 | 478 | a. Example notice. The following text will satisfy notice under 479 | Section 4.3: 480 | 481 | Contains information from DATABASE NAME, which is made available 482 | here under the Open Database License (ODbL). 483 | 484 | DATABASE NAME should be replaced with the name of the Database and a 485 | hyperlink to the URI of the Database. "Open Database License" should 486 | contain a hyperlink to the URI of the text of this License. If 487 | hyperlinks are not possible, You should include the plain text of the 488 | required URI's with the above notice. 489 | 490 | 4.4 Share alike. 491 | 492 | a. Any Derivative Database that You Publicly Use must be only under 493 | the terms of: 494 | 495 | i. This License; 496 | 497 | ii. A later version of this License similar in spirit to this 498 | License; or 499 | 500 | iii. A compatible license. 501 | 502 | If You license the Derivative Database under one of the licenses 503 | mentioned in (iii), You must comply with the terms of that license. 504 | 505 | b. For the avoidance of doubt, Extraction or Re-utilisation of the 506 | whole or a Substantial part of the Contents into a new database is a 507 | Derivative Database and must comply with Section 4.4. 508 | 509 | c. Derivative Databases and Produced Works. A Derivative Database is 510 | Publicly Used and so must comply with Section 4.4. if a Produced Work 511 | created from the Derivative Database is Publicly Used. 512 | 513 | d. Share Alike and additional Contents. For the avoidance of doubt, 514 | You must not add Contents to Derivative Databases under Section 4.4 a 515 | that are incompatible with the rights granted under this License. 516 | 517 | e. Compatible licenses. Licensors may authorise a proxy to determine 518 | compatible licenses under Section 4.4 a iii. If they do so, the 519 | authorised proxy's public statement of acceptance of a compatible 520 | license grants You permission to use the compatible license. 521 | 522 | 523 | 4.5 Limits of Share Alike. The requirements of Section 4.4 do not apply 524 | in the following: 525 | 526 | a. For the avoidance of doubt, You are not required to license 527 | Collective Databases under this License if You incorporate this 528 | Database or a Derivative Database in the collection, but this License 529 | still applies to this Database or a Derivative Database as a part of 530 | the Collective Database; 531 | 532 | b. Using this Database, a Derivative Database, or this Database as 533 | part of a Collective Database to create a Produced Work does not 534 | create a Derivative Database for purposes of Section 4.4; and 535 | 536 | c. Use of a Derivative Database internally within an organisation is 537 | not to the public and therefore does not fall under the requirements 538 | of Section 4.4. 539 | 540 | 4.6 Access to Derivative Databases. If You Publicly Use a Derivative 541 | Database or a Produced Work from a Derivative Database, You must also 542 | offer to recipients of the Derivative Database or Produced Work a copy 543 | in a machine readable form of: 544 | 545 | a. The entire Derivative Database; or 546 | 547 | b. A file containing all of the alterations made to the Database or 548 | the method of making the alterations to the Database (such as an 549 | algorithm), including any additional Contents, that make up all the 550 | differences between the Database and the Derivative Database. 551 | 552 | The Derivative Database (under a.) or alteration file (under b.) must be 553 | available at no more than a reasonable production cost for physical 554 | distributions and free of charge if distributed over the internet. 555 | 556 | 4.7 Technological measures and additional terms 557 | 558 | a. This License does not allow You to impose (except subject to 559 | Section 4.7 b.) any terms or any technological measures on the 560 | Database, a Derivative Database, or the whole or a Substantial part of 561 | the Contents that alter or restrict the terms of this License, or any 562 | rights granted under it, or have the effect or intent of restricting 563 | the ability of any person to exercise those rights. 564 | 565 | b. Parallel distribution. You may impose terms or technological 566 | measures on the Database, a Derivative Database, or the whole or a 567 | Substantial part of the Contents (a "Restricted Database") in 568 | contravention of Section 4.74 a. only if You also make a copy of the 569 | Database or a Derivative Database available to the recipient of the 570 | Restricted Database: 571 | 572 | i. That is available without additional fee; 573 | 574 | ii. That is available in a medium that does not alter or restrict 575 | the terms of this License, or any rights granted under it, or have 576 | the effect or intent of restricting the ability of any person to 577 | exercise those rights (an "Unrestricted Database"); and 578 | 579 | iii. The Unrestricted Database is at least as accessible to the 580 | recipient as a practical matter as the Restricted Database. 581 | 582 | c. For the avoidance of doubt, You may place this Database or a 583 | Derivative Database in an authenticated environment, behind a 584 | password, or within a similar access control scheme provided that You 585 | do not alter or restrict the terms of this License or any rights 586 | granted under it or have the effect or intent of restricting the 587 | ability of any person to exercise those rights. 588 | 589 | 4.8 Licensing of others. You may not sublicense the Database. Each time 590 | You communicate the Database, the whole or Substantial part of the 591 | Contents, or any Derivative Database to anyone else in any way, the 592 | Licensor offers to the recipient a license to the Database on the same 593 | terms and conditions as this License. You are not responsible for 594 | enforcing compliance by third parties with this License, but You may 595 | enforce any rights that You have over a Derivative Database. You are 596 | solely responsible for any modifications of a Derivative Database made 597 | by You or another Person at Your direction. You may not impose any 598 | further restrictions on the exercise of the rights granted or affirmed 599 | under this License. 600 | 601 | ### 5.0 Moral rights 602 | 603 | 5.1 Moral rights. This section covers moral rights, including any rights 604 | to be identified as the author of the Database or to object to treatment 605 | that would otherwise prejudice the author's honour and reputation, or 606 | any other derogatory treatment: 607 | 608 | a. For jurisdictions allowing waiver of moral rights, Licensor waives 609 | all moral rights that Licensor may have in the Database to the fullest 610 | extent possible by the law of the relevant jurisdiction under Section 611 | 10.4; 612 | 613 | b. If waiver of moral rights under Section 5.1 a in the relevant 614 | jurisdiction is not possible, Licensor agrees not to assert any moral 615 | rights over the Database and waives all claims in moral rights to the 616 | fullest extent possible by the law of the relevant jurisdiction under 617 | Section 10.4; and 618 | 619 | c. For jurisdictions not allowing waiver or an agreement not to assert 620 | moral rights under Section 5.1 a and b, the author may retain their 621 | moral rights over certain aspects of the Database. 622 | 623 | Please note that some jurisdictions do not allow for the waiver of moral 624 | rights, and so moral rights may still subsist over the Database in some 625 | jurisdictions. 626 | 627 | ### 6.0 Fair dealing, Database exceptions, and other rights not affected 628 | 629 | 6.1 This License does not affect any rights that You or anyone else may 630 | independently have under any applicable law to make any use of this 631 | Database, including without limitation: 632 | 633 | a. Exceptions to the Database Right including: Extraction of Contents 634 | from non-electronic Databases for private purposes, Extraction for 635 | purposes of illustration for teaching or scientific research, and 636 | Extraction or Re-utilisation for public security or an administrative 637 | or judicial procedure. 638 | 639 | b. Fair dealing, fair use, or any other legally recognised limitation 640 | or exception to infringement of copyright or other applicable laws. 641 | 642 | 6.2 This License does not affect any rights of lawful users to Extract 643 | and Re-utilise insubstantial parts of the Contents, evaluated 644 | quantitatively or qualitatively, for any purposes whatsoever, including 645 | creating a Derivative Database (subject to other rights over the 646 | Contents, see Section 2.4). The repeated and systematic Extraction or 647 | Re-utilisation of insubstantial parts of the Contents may however amount 648 | to the Extraction or Re-utilisation of a Substantial part of the 649 | Contents. 650 | 651 | ### 7.0 Warranties and Disclaimer 652 | 653 | 7.1 The Database is licensed by the Licensor "as is" and without any 654 | warranty of any kind, either express, implied, or arising by statute, 655 | custom, course of dealing, or trade usage. Licensor specifically 656 | disclaims any and all implied warranties or conditions of title, 657 | non-infringement, accuracy or completeness, the presence or absence of 658 | errors, fitness for a particular purpose, merchantability, or otherwise. 659 | Some jurisdictions do not allow the exclusion of implied warranties, so 660 | this exclusion may not apply to You. 661 | 662 | ### 8.0 Limitation of liability 663 | 664 | 8.1 Subject to any liability that may not be excluded or limited by law, 665 | the Licensor is not liable for, and expressly excludes, all liability 666 | for loss or damage however and whenever caused to anyone by any use 667 | under this License, whether by You or by anyone else, and whether caused 668 | by any fault on the part of the Licensor or not. This exclusion of 669 | liability includes, but is not limited to, any special, incidental, 670 | consequential, punitive, or exemplary damages such as loss of revenue, 671 | data, anticipated profits, and lost business. This exclusion applies 672 | even if the Licensor has been advised of the possibility of such 673 | damages. 674 | 675 | 8.2 If liability may not be excluded by law, it is limited to actual and 676 | direct financial loss to the extent it is caused by proved negligence on 677 | the part of the Licensor. 678 | 679 | ### 9.0 Termination of Your rights under this License 680 | 681 | 9.1 Any breach by You of the terms and conditions of this License 682 | automatically terminates this License with immediate effect and without 683 | notice to You. For the avoidance of doubt, Persons who have received the 684 | Database, the whole or a Substantial part of the Contents, Derivative 685 | Databases, or the Database as part of a Collective Database from You 686 | under this License will not have their licenses terminated provided 687 | their use is in full compliance with this License or a license granted 688 | under Section 4.8 of this License. Sections 1, 2, 7, 8, 9 and 10 will 689 | survive any termination of this License. 690 | 691 | 9.2 If You are not in breach of the terms of this License, the Licensor 692 | will not terminate Your rights under it. 693 | 694 | 9.3 Unless terminated under Section 9.1, this License is granted to You 695 | for the duration of applicable rights in the Database. 696 | 697 | 9.4 Reinstatement of rights. If you cease any breach of the terms and 698 | conditions of this License, then your full rights under this License 699 | will be reinstated: 700 | 701 | a. Provisionally and subject to permanent termination until the 60th 702 | day after cessation of breach; 703 | 704 | b. Permanently on the 60th day after cessation of breach unless 705 | otherwise reasonably notified by the Licensor; or 706 | 707 | c. Permanently if reasonably notified by the Licensor of the 708 | violation, this is the first time You have received notice of 709 | violation of this License from the Licensor, and You cure the 710 | violation prior to 30 days after your receipt of the notice. 711 | 712 | Persons subject to permanent termination of rights are not eligible to 713 | be a recipient and receive a license under Section 4.8. 714 | 715 | 9.5 Notwithstanding the above, Licensor reserves the right to release 716 | the Database under different license terms or to stop distributing or 717 | making available the Database. Releasing the Database under different 718 | license terms or stopping the distribution of the Database will not 719 | withdraw this License (or any other license that has been, or is 720 | required to be, granted under the terms of this License), and this 721 | License will continue in full force and effect unless terminated as 722 | stated above. 723 | 724 | ### 10.0 General 725 | 726 | 10.1 If any provision of this License is held to be invalid or 727 | unenforceable, that must not affect the validity or enforceability of 728 | the remainder of the terms and conditions of this License and each 729 | remaining provision of this License shall be valid and enforced to the 730 | fullest extent permitted by law. 731 | 732 | 10.2 This License is the entire agreement between the parties with 733 | respect to the rights granted here over the Database. It replaces any 734 | earlier understandings, agreements or representations with respect to 735 | the Database. 736 | 737 | 10.3 If You are in breach of the terms of this License, You will not be 738 | entitled to rely on the terms of this License or to complain of any 739 | breach by the Licensor. 740 | 741 | 10.4 Choice of law. This License takes effect in and will be governed by 742 | the laws of the relevant jurisdiction in which the License terms are 743 | sought to be enforced. If the standard suite of rights granted under 744 | applicable copyright law and Database Rights in the relevant 745 | jurisdiction includes additional rights not granted under this License, 746 | these additional rights are granted in this License in order to meet the 747 | terms of this License. 748 | 749 | 750 | For the beer sample: 751 | 752 | ## ODC Database Contents License 753 | 754 | The Licensor and You agree as follows: 755 | 756 | ### 1.0 Definitions of Capitalised Words 757 | 758 | The definitions of the Open Database License (ODbL) 1.0 are incorporated 759 | by reference into the Database Contents License. 760 | 761 | ### 2.0 Rights granted and Conditions of Use 762 | 763 | 2.1 Rights granted. The Licensor grants to You a worldwide, 764 | royalty-free, non-exclusive, perpetual, irrevocable copyright license to 765 | do any act that is restricted by copyright over anything within the 766 | Contents, whether in the original medium or any other. These rights 767 | explicitly include commercial use, and do not exclude any field of 768 | endeavour. These rights include, without limitation, the right to 769 | sublicense the work. 770 | 771 | 2.2 Conditions of Use. You must comply with the ODbL. 772 | 773 | 2.3 Relationship to Databases and ODbL. This license does not cover any 774 | Database Rights, Database copyright, or contract over the Contents as 775 | part of the Database. Please see the ODbL covering the Database for more 776 | details about Your rights and obligations. 777 | 778 | 2.4 Non-assertion of copyright over facts. The Licensor takes the 779 | position that factual information is not covered by copyright. The DbCL 780 | grants you permission for any information having copyright contained in 781 | the Contents. 782 | 783 | ### 3.0 Warranties, disclaimer, and limitation of liability 784 | 785 | 3.1 The Contents are licensed by the Licensor "as is" and without any 786 | warranty of any kind, either express or implied, whether of title, of 787 | accuracy, of the presence of absence of errors, of fitness for purpose, 788 | or otherwise. Some jurisdictions do not allow the exclusion of implied 789 | warranties, so this exclusion may not apply to You. 790 | 791 | 3.2 Subject to any liability that may not be excluded or limited by law, 792 | the Licensor is not liable for, and expressly excludes, all liability 793 | for loss or damage however and whenever caused to anyone by any use 794 | under this License, whether by You or by anyone else, and whether caused 795 | by any fault on the part of the Licensor or not. This exclusion of 796 | liability includes, but is not limited to, any special, incidental, 797 | consequential, punitive, or exemplary damages. This exclusion applies 798 | even if the Licensor has been advised of the possibility of such 799 | damages. 800 | 801 | 3.3 If liability may not be excluded by law, it is limited to actual and 802 | direct financial loss to the extent it is caused by proved negligence on 803 | the part of the Licensor. 804 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Docloader 2 | ============= 3 | 4 | We use this tool to upload a bunch of json documents into Couchbase Server. 5 | 6 | Build 7 | ------- 8 | 9 | After you clone the project from `git@github.com:couchbase/couchbase-examples.git`, run the following command: 10 | 11 | config/autorun.sh 12 | 13 | To build the package, run 14 | 15 | make bdist 16 | 17 | Run command 18 | ------------ 19 | 20 | cbdocloader OPTIONS DOCUMENTS 21 | 22 | DOCUMENTS: 23 | 24 | The documents parameter can be either a directory name which contains all the json documents or a .zip file which archives the document directory. 25 | 26 | Generally speaking, the document directory should have the following layout: 27 | 28 | /design_docs which contains all the design docs for views. 29 | /docs which contains all the raw json data files. It can have other sub directories too. 30 | 31 | All json files should be well formatted. And no spaces allowed in file names. Design docs will be uploaded after all other data files. 32 | 33 | OPTIONS: 34 | 35 | `-n HOST[:PORT]`, --node=HOST[:PORT] Default port is 8091 36 | 37 | `-u USERNAME`, --user=USERNAME REST username of the cluster. It can be specified in environment variable REST_USERNAME. 38 | 39 | `-p PASSWORD`, --password=PASSWORD REST password of the cluster. It can be specified in environment variable REST_PASSWORD. 40 | 41 | `-b BUCKETNAME`, --bucket=BUCKETNAME Specific bucket name. Default is default bucket. Bucket will be created if it doesn't exist. 42 | 43 | `-s QUOTA`, RAM quota for the bucket. Unit is MB. Default is 100MB. 44 | 45 | `-h` --help Show this help message and exit 46 | 47 | Example 48 | ------- 49 | 50 | # Upload documents archived in zip file ../samples/gamesim.zip. All data will be inserted in bucket mybucket 51 | # 52 | ./cbdocloader -n localhost:8091 -u Administrator -p password -b mybucket ../samples/gamesim.zip 53 | 54 | Errors 55 | ------ 56 | 57 | These are kinds of error cases to consider ... 58 | 59 | * JSON files are not well formatted 60 | * Wrong REST username and password 61 | * Bucket cannot be created due to too large ram quota specified. 62 | 63 | Licenses 64 | -------- 65 | 66 | ### Beer sample 67 | 68 | To quote from the original [Open Beer Database](http://openbeerdb.com/): 69 | 70 | This Open Beer Database is made available under the Open Database License: 71 | http://opendatacommons.org/licenses/odbl/1.0/. Any rights in individual 72 | contents of the database are licensed under the Database Contents License: 73 | http://opendatacommons.org/licenses/dbcl/1.0/ 74 | 75 | The data was converted to JSON with the [scripts from Sergey Avseyev](https://github.com/avsej/beer-sample). 76 | 77 | ### Gamesim sample 78 | 79 | The gamesim sample is licensed under the Apache License 2.0. 80 | 81 | ### Travel sample 82 | 83 | Landmarks converted to JSON from CSV dump of [english version of wikivoyage][wikivoyage]. 84 | Licensed under [Creative Commons Attribution Share-Alike][cc-by-sa]. 85 | 86 | [wikivoyage]: http://datahub.io/dataset/wikivoyage-listings-as-csv 87 | [cc-by-sa]: http://opendefinition.org/licenses/cc-by-sa/ 88 | -------------------------------------------------------------------------------- /beer-sample.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/couchbase/couchbase-examples/39ee1c2451ac2f2e86a6ff308e21e1201c49cafe/beer-sample.zip -------------------------------------------------------------------------------- /cbdocloader: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- python -*- 3 | 4 | import pump 5 | import pump_transfer 6 | import pump_json 7 | import util_cli as util 8 | 9 | import sys 10 | import time 11 | import os 12 | import os.path 13 | import shutil 14 | import json 15 | 16 | from optparse import OptionParser 17 | 18 | from cluster_manager import ClusterManager 19 | 20 | class DocLoader(pump_transfer.Transfer): 21 | 22 | def parse_args(self, argv): 23 | usage = "usage: %prog [options] |zipfile\n\n" + \ 24 | "Example: %prog -u Administrator -p password -n 127.0.0.1:8091 " + \ 25 | "-b mybucket -s 100 gamesim-sample.zip" 26 | 27 | parser = OptionParser(usage) 28 | 29 | username = os.environ.get('BUCKET_USERNAME', "") 30 | password = os.environ.get('BUCKET_PASSWORD', "") 31 | 32 | parser.add_option('-u', dest='username', default=username, 33 | help='Username', metavar='Administrator') 34 | parser.add_option('-p', dest='password', default=password, 35 | help='Password', metavar='password') 36 | parser.add_option('-b', dest='bucket', 37 | help='Bucket', metavar='mybucket') 38 | parser.add_option('-n', dest='node', default='127.0.0.1:8091', 39 | help='Node address', metavar='127.0.0.1:8091') 40 | parser.add_option('-s', dest='ram_quota', default=100, type='int', 41 | help='RAM quota in MB', metavar=100) 42 | parser.add_option("-v", dest='verbose', action="count") 43 | 44 | self.options, self.args = parser.parse_args(argv[1:]) 45 | if not self.args or not self.options.bucket: 46 | parser.print_help() 47 | sys.exit(1) 48 | 49 | # check if the uploaded file exists 50 | if not os.path.exists(self.args[0]): 51 | sys.stderr.write("Invalid path: %s\n" % self.args[0]) 52 | sys.exit(1) 53 | 54 | def opt_construct(self, argv): 55 | sink_opts = {"node" : "http://"} 56 | common_opts = {"bucket" : ["-B", None], 57 | "username" : ["-u", None], 58 | "password" : ["-p", None], 59 | } 60 | count_opts = {"verbose" : ["-v", None]} 61 | 62 | # parse options and arguments 63 | self.parse_args(argv) 64 | 65 | gen_str = "json://" + self.args[0] 66 | sink_str = "" 67 | for key in sink_opts.iterkeys(): 68 | val = getattr(self.options, key, None) 69 | if val: 70 | sink_str += sink_opts[key] + val 71 | 72 | for key in common_opts.iterkeys(): 73 | val = getattr(self.options, key, None) 74 | if val: 75 | common_opts[key][1] = str(val) 76 | 77 | for key in count_opts.iterkeys(): 78 | val = getattr(self.options, key, None) 79 | if val: 80 | count_opts[key][1] = int(val) 81 | 82 | return gen_str, sink_str, common_opts, count_opts 83 | 84 | def init_bucket(self): 85 | host, port= util.hostport(self.options.node) 86 | server_info = {'ip': host, 87 | 'port': port, 88 | 'username': self.options.username, 89 | 'password': self.options.password} 90 | 91 | timeout_in_seconds = 120 92 | cm = ClusterManager(host, port, self.options.username, self.options.password) 93 | if self.options.password: 94 | uri = "http://%s:%s/nodes/self" % (server_info["ip"], server_info["port"]) 95 | content, errors = cm._get(uri) 96 | _exitIfErrors(errors) 97 | 98 | quotaUnused = -1 99 | try: 100 | json_parsed = json.loads(content) 101 | quotaTotal = json_parsed["storageTotals"]["ram"]["quotaTotal"] 102 | quotaUnused = quotaTotal - json_parsed["storageTotals"]["ram"]["quotaUsed"] 103 | except: 104 | pass 105 | quotaUnused = quotaUnused / 1024.0 106 | if quotaUnused > 0 and quotaUnused < self.options.ram_quota: 107 | sys.stderr.write("RAM quota specified is too large to be provisioned into this cluster\n") 108 | sys.stderr.write("Available RAM quota: %d, requested: %d\n" %\ 109 | (quotaUnused, self.options.ram_quota)) 110 | sys.exit(1) 111 | if not self.bucket_exists(self.options.bucket): 112 | _, errors = cm.create_bucket(self.options.bucket, self.options.ram_quota, 113 | "sasl", "", 1, 0, "membase") 114 | _exitIfErrors(errors) 115 | 116 | 117 | start = time.time() 118 | # Make sure the bucket exists before querying its status 119 | bucket_exist = False 120 | while (time.time() - start) <= timeout_in_seconds and not bucket_exist: 121 | bucket_exist = self.bucket_exists(self.options.bucket) 122 | if bucket_exist: 123 | break 124 | else: 125 | sys.stderr.write(".") 126 | time.sleep(2) 127 | 128 | if not bucket_exist: 129 | sys.stderr.write("Fail to create bucket '%s' within %s seconds\n" %\ 130 | (self.options.bucket, timeout_in_seconds)) 131 | sys.exit(1) 132 | 133 | #Query status for all bucket nodes 134 | uri = "http://%s:%s/pools/default/buckets/%s" % \ 135 | (server_info["ip"], server_info["port"], self.options.bucket) 136 | all_node_ready = False 137 | start = time.time() 138 | while (time.time() - start) <= timeout_in_seconds and not all_node_ready: 139 | content, errors = cm._get(uri) 140 | _exitIfErrors(errors) 141 | 142 | all_node_ready = True 143 | for node in content["nodes"]: 144 | if node["status"] != "healthy": 145 | all_node_ready = False 146 | break 147 | if not all_node_ready: 148 | sys.stderr.write(".") 149 | time.sleep(2) 150 | if not all_node_ready: 151 | sys.stderr.write("\nNode status is not ready after creating bucket '%s' within %s seconds\n" %\ 152 | (self.options.bucket, timeout_in_seconds)) 153 | sys.exit(1) 154 | else: 155 | print "bucket creation is successful" 156 | 157 | def bucket_exists(self, bucket): 158 | host, port= util.hostport(self.options.node) 159 | cm = ClusterManager(host, port, self.options.username, self.options.password) 160 | buckets, errors = cm.list_buckets() 161 | _exitIfErrors(errors) 162 | return bucket in buckets 163 | 164 | def find_handlers(self, opts, source, sink): 165 | return pump_json.JSONSource, pump.PumpingStation.find_handler(opts, sink, pump_transfer.SINKS) 166 | 167 | def main(self, argv): 168 | 169 | src, sink, common_opts, count_opts = self.opt_construct(argv) 170 | local_args = [argv[0]] 171 | local_args.append(src) 172 | local_args.append(sink) 173 | for v in common_opts.itervalues(): 174 | local_args.append(v[0]) 175 | local_args.append(v[1]) 176 | 177 | for v in count_opts.itervalues(): 178 | if v[1] is not None: 179 | for i in range(v[1]): 180 | local_args.append(v[0]) 181 | 182 | # create new bucket if it doesn't exist 183 | self.init_bucket() 184 | 185 | #use cbtransfer to upload documents 186 | pump_transfer.Transfer.main(self, local_args) 187 | 188 | def _exitIfErrors(errors, prefix=""): 189 | if errors: 190 | for error in errors: 191 | print prefix + error 192 | sys.exit(1) 193 | 194 | if __name__ == '__main__': 195 | if os.name == 'nt': 196 | mydir = os.path.dirname(sys.argv[0]) 197 | bin_dir = os.path.join(mydir, '..') 198 | path = [mydir, bin_dir, os.environ['PATH']] 199 | os.environ['PATH'] = ';'.join(path) 200 | 201 | pump_transfer.exit_handler(DocLoader().main(sys.argv)) 202 | -------------------------------------------------------------------------------- /gamesim-sample.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/couchbase/couchbase-examples/39ee1c2451ac2f2e86a6ff308e21e1201c49cafe/gamesim-sample.zip -------------------------------------------------------------------------------- /generate-travel-sample.rb: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | # 3 | # This script generates sample data for `travel-sample` bucket. 4 | # 5 | # Dependencies: 6 | # * redis and rubygem 'redis' to cache responses for geocoder APIs 7 | # * rubygem geocoder to resolve country, city and state using geo coordinates 8 | # 9 | # To install dependencies, use the following commands: 10 | # 11 | # sudo yum install redis 12 | # gem install geocoder redis 13 | # 14 | # IMPORTANT NOTE: the script is using random generator to generate route 15 | # tables, therefore the output might be different. 16 | 17 | # Initialize random generator. Without any arguments to the script, it uses 18 | # zero as seed. 19 | srand(ARGV[0].to_i) 20 | 21 | # This modification time will be assigned to all files in the archive 22 | GLOBAL_MTIME = Time.utc(2015, 1, 1, 0, 0, 0) 23 | 24 | start = Time.new 25 | puts "== START: #{start}" 26 | at_exit do 27 | finish = Time.now 28 | puts "== END: #{finish}" 29 | puts "== TOTAL: #{finish - start} seconds" 30 | end 31 | 32 | require 'rubygems' 33 | 34 | begin 35 | gem 'redis' 36 | rescue LoadError => ex 37 | abort "#{ex}.\nUse 'gem install redis' to install it" 38 | end 39 | require 'redis' 40 | CACHE = Redis.new 41 | 42 | begin 43 | gem 'geocoder' 44 | rescue LoadError => ex 45 | abort "#{ex}.\nUse 'gem install geocoder' to install it" 46 | end 47 | require 'geocoder' 48 | # https://github.com/alexreisner/geocoder#readme 49 | # by default it is using yandex maps because it allows 25k requests 50 | # per day, but for some landmarks it is necessary to switch to google, 51 | # because they are missing on yandex maps 52 | Geocoder.configure(cache: CACHE, timeout: 10, units: :km, 53 | # http_proxy: '127.0.0.1:3128', 54 | always_raise: [Geocoder::OverQueryLimitError]) 55 | 56 | begin 57 | gem 'nokogiri' 58 | rescue LoadError => ex 59 | abort "#{ex}.\nUse 'gem install nokogiri' to install it" 60 | end 61 | require 'nokogiri' 62 | 63 | begin 64 | gem 'cld' 65 | rescue LoadError => ex 66 | abort "#{ex}.\nUse 'gem install cld' to install it" 67 | end 68 | require 'cld' 69 | 70 | begin 71 | gem 'ffaker' 72 | rescue LoadError => ex 73 | abort "#{ex}.\nUse 'gem install ffaker' to install it" 74 | end 75 | require 'ffaker' 76 | 77 | require 'yaml' 78 | begin 79 | gem 'obscenity' 80 | rescue LoadError => ex 81 | abort "#{ex}.\nUse 'gem install obscenity' to install it" 82 | end 83 | require 'obscenity' 84 | 85 | require 'csv' 86 | require 'json' 87 | require 'fileutils' 88 | require 'English' 89 | require 'byebug' 90 | require 'digest/sha1' 91 | require 'shellwords' 92 | 93 | include FileUtils 94 | 95 | rm_rf('failed.geo.txt') 96 | 97 | def blank?(value) 98 | value.nil? || (value.is_a?(String) && value.strip.empty?) 99 | end 100 | 101 | def nullify_blank_keys(doc) 102 | doc.keys.each do |key| 103 | if doc[key].is_a?(Hash) 104 | nullify_blank_keys(doc[key]) 105 | else 106 | doc[key] = nil if blank?(doc[key]) 107 | end 108 | end 109 | end 110 | 111 | def random_date(from = Time.new(2012, 1, 1), to = Time.new(2016, 1, 1)) 112 | Time.at(rand(from.to_i..to.to_i)) 113 | end 114 | 115 | reviews_url = 'http://times.cs.uiuc.edu/~wang296/Data/LARA/TripAdvisor/TripAdvisorJson.tar.bz2' 116 | reviews_file = File.basename(reviews_url) 117 | unless File.exist?(reviews_file) 118 | puts("downloading #{reviews_url} to #{reviews_file}...") 119 | system("curl -O #{reviews_url}") 120 | end 121 | unless File.directory?('json') 122 | puts("uncompressing #{reviews_file}...") 123 | system("tar jxf #{reviews_file}") 124 | end 125 | # load first 200k reviews in english 126 | reviews = [] 127 | if File.exist?('reviews-filtered.tar') 128 | system('tar xf reviews-filtered.tar') 129 | Dir['reviews-filtered/**/*.json'].sort.each do |review| 130 | reviews.push(JSON.load(File.read(review))) 131 | end 132 | else 133 | Dir['json/*.json'].sort.each do |hotel| 134 | raw = JSON.load(File.read(hotel)) 135 | raw['Reviews'].each do |r| 136 | lang = CLD.detect_language(r['Content']) 137 | if lang[:reliable] && lang[:code] == 'en' 138 | ratings = r['Ratings'] 139 | ratings.each do |k, v| 140 | ratings[k] = v.to_f 141 | end 142 | review = { 143 | content: Obscenity.sanitize(r['Content']), 144 | ratings: ratings, 145 | author: FFaker::Name.name, 146 | date: random_date 147 | } 148 | reviews.push(review) 149 | end 150 | end if raw.key?('Reviews') 151 | break unless reviews.size < 200_000 152 | end 153 | end 154 | reviews.shuffle 155 | 156 | wikivoyage_url = 'https://ckannet-storage.commondatastorage.googleapis.com/2015-01-06T06:01:38.068Z/enwikivoyage-20141226-pages-articles-xml.csv' 157 | wikivoyage_file = ARGV[0] || 'enwikivoyage-20141226-pages-articles-xml.csv' 158 | unless File.exist?(wikivoyage_file) 159 | puts("downloading #{wikivoyage_url} to #{wikivoyage_file}...") 160 | system("curl -O#{wikivoyage_file} #{wikivoyage_url}") 161 | end 162 | 163 | print('Fixing XML double quotes where they conflict with CSV quotes... ') 164 | fixed_lines = 0 165 | temp_file = "#{wikivoyage_file}.tmp" 166 | File.open(wikivoyage_file) do |input| 167 | File.open(temp_file, 'w+') do |output| 168 | loop do 169 | line = input.gets 170 | break unless line 171 | fixed = line.gsub(/=\s*"([^"]*)"/, "='\1'") 172 | fixed_lines += 1 if fixed != line 173 | output.puts(fixed) 174 | end 175 | end 176 | end 177 | if fixed_lines == 0 178 | rm(temp_file) 179 | puts('ok') 180 | else 181 | mv(temp_file, wikivoyage_file) 182 | puts("fixed #{fixed_lines} lines") 183 | end 184 | 185 | rm_rf('travel/docs') 186 | rm_rf('travel-sample/docs') 187 | puts("converting #{wikivoyage_file} to JSON files into travel/docs/...") 188 | mkdir_p('travel/docs') 189 | mkdir_p('travel-sample/docs') 190 | csv = CSV.open(wikivoyage_file, headers: true, col_sep: ';', header_converters: :downcase) 191 | idx = 0 192 | missing_on_yandex_maps = [ 193 | 9034, 15_484, 15_485, 15_486, 17_360, 17_361, 17_362, 17_363, 194 | 17_364, 17_365, 17_366, 17_367, 17_368, 17_369, 40_385 195 | ] 196 | swapped_coordinates = [ 197 | 634, 3495, 33_129 198 | ] 199 | chosen_countries = ['United States', 'France', 'United Kingdom'] 200 | hotels = [] 201 | csv.each do |row| 202 | Geocoder.configure(lookup: :yandex) 203 | key = "landmark_#{idx}" 204 | doc = row.to_h 205 | lat = doc.delete('lat').to_f 206 | lon = doc.delete('lon').to_f 207 | next if lat == 0 || lon == 0 || blank?(doc['name']) || blank?(doc['content']) 208 | doc['geo'] = {lat: lat, lon: lon, accuracy: ['ROOFTOP', 'RANGE_INTERPOLATED', 'APPROXIMATE'].sample} 209 | doc['activity'] = doc.delete('type') 210 | doc['type'] = 'landmark' 211 | doc['id'] = idx 212 | doc['geo'] = {lat: lon, lon: lat} if swapped_coordinates.include?(doc['id']) 213 | if missing_on_yandex_maps.include?(doc['id']) 214 | Geocoder.configure(lookup: :google) 215 | end 216 | t = 0 217 | geo = begin 218 | Geocoder.search(doc['geo'].values_at(:lat, :lon).join(',')) 219 | rescue Geocoder::OverQueryLimitError 220 | sleep 2 + t 221 | STDERR.print '.' 222 | t += 1 223 | if t > 10 224 | STDERR.puts('timeout') 225 | File.open('failed.geo.txt', 'a+') do |f| 226 | f.puts("Geocoder.search(#{doc['geo'].values_at(:lat, :lon).join(',').inspect})") 227 | f.puts(doc.inspect) 228 | f.puts 229 | end 230 | else 231 | retry 232 | end 233 | end 234 | if geo.empty? 235 | Geocoder.configure(lookup: :google) 236 | t = 0 237 | begin 238 | geo = Geocoder.search(doc['geo'].values_at(:lat, :lon).join(',')) 239 | rescue Geocoder::OverQueryLimitError 240 | sleep 2 + t 241 | STDERR.print '.' 242 | t += 1 243 | if t > 10 244 | STDERR.puts('timeout') 245 | File.open('failed.geo.txt', 'a+') do |f| 246 | f.puts("Geocoder.search(#{doc['geo'].values_at(:lat, :lon).join(',').inspect})") 247 | f.puts(doc.inspect) 248 | f.puts 249 | end 250 | else 251 | retry 252 | end 253 | end 254 | end 255 | if geo && geo = geo.first 256 | doc['country'] = geo.country 257 | doc['country'] = 'United Kingdom' if doc['country'] =~ /^United Kingdom/ 258 | doc['city'] = geo.city 259 | doc['state'] = geo.state 260 | else 261 | puts "\n#{doc['geo'].values_at(:lat, :lon).join(',')}\t#{doc['id']}\n" 262 | end 263 | nullify_blank_keys(doc) 264 | unless blank?(doc['image']) 265 | doc['image'] = "https://en.wikivoyage.org/wiki/File:#{doc['image']}" 266 | cache_key = "image:#{Digest::SHA1.hexdigest(doc['image'])}" 267 | if url = CACHE.get(cache_key) 268 | doc['image_direct_url'] = url 269 | else 270 | # try to resolve original image 271 | begin 272 | html = Nokogiri::HTML(`curl -sL #{doc['image'].shellescape}`) 273 | unless $CHILD_STATUS.success? 274 | puts "\nERROR: curl -sL #{doc['image'].shellescape}\n" 275 | end 276 | links = html.css('div.fullMedia a') 277 | doc['image_direct_url'] = "https:#{links.first['href']}" if links && links.first 278 | CACHE.set(cache_key, doc['image_direct_url']) 279 | rescue => ex 280 | abort "#{doc['image']}: #{ex}" 281 | end 282 | end 283 | end 284 | if doc['activity'] == 'sleep' 285 | key = "hotel_#{idx}" 286 | doc['type'] = 'hotel' 287 | doc.delete('activity') 288 | doc['reviews'] = reviews.shift(rand(10)) 289 | doc['public_likes'] = Array.new(rand(10)) { FFaker::Name.name } 290 | doc['vacancy'] = [true, false].sample 291 | end 292 | File.write("travel/docs/#{key}.json", doc.to_json) 293 | if doc['country'] == 'United Kingdom' || doc['country'] == 'France' || 294 | (doc['country'] == 'United States' && doc['state'] == 'California') 295 | File.write("travel-sample/docs/#{key}.json", doc.to_json) 296 | end 297 | print("\r#{key}.json") 298 | STDOUT.flush 299 | idx += 1 300 | end 301 | puts 302 | 303 | air_url = 'https://github.com/ToddGreenstein/try-cb-nodejs/raw/1c6bea3f1ae56a4ad54d096c02e7c86e7f5632f8/model/raw/rawJsonAir.js' 304 | air_file = 'rawJsonAir.js' 305 | unless File.exist?(air_file) 306 | puts("downloading #{air_url} to #{air_file}...") 307 | system("curl -L -O#{air_file} #{air_url}") 308 | end 309 | 310 | puts("extracting airline data from #{air_file}... ") 311 | def random_schedule(airline) 312 | schedule = [] 313 | # adds a schedule entry for at least every day 314 | 7.times do |day| 315 | rand(1..5).times do 316 | schedule.push( 317 | day: day, 318 | utc: format('%02d:%02d:00', rand(0..23), rand(0..59)), 319 | flight: format('%s%d%d%d', airline, rand(0..9), rand(0..9), rand(0..9)) 320 | ) 321 | end 322 | end 323 | schedule 324 | end 325 | 326 | old_sep, $INPUT_RECORD_SEPARATOR = $INPUT_RECORD_SEPARATOR, "\r" 327 | prev_type = nil 328 | inactive_airlines = [] 329 | chosen_airlines = [] 330 | chosen_airports = [] 331 | airports = {} 332 | File.open(air_file) do |input| 333 | loop do 334 | line = input.gets 335 | break unless line 336 | next unless line[0] == '{' 337 | line = line.sub(/"Peau Vava.*"/, '"Peau Vavaʻu"') 338 | doc = JSON.load(line.sub(/[\r,]*$/, '')) 339 | active = doc.delete('active') 340 | doc['id'] = doc['id'].to_i 341 | key = "#{doc['type']}_#{doc['id']}" 342 | if doc.key?('icao') && (blank?(doc['icao']) || doc['icao'] == 'N' || doc['icao'] == '...') 343 | doc['icao'] = nil 344 | end 345 | if active == 'N' || doc['id'] == 18_860 || 346 | ( # skip airports or airlines without sensible codes 347 | (doc.key?('faa') || doc.key?('icao')) && 348 | blank?(doc['faa']) && blank?(doc['icao']) 349 | ) 350 | inactive_airlines << key 351 | next 352 | end 353 | doc['stops'] = doc['stops'].to_i if doc.key?('stops') 354 | # doc['name'] = doc.delete('airportname') if doc.key?('airportname') 355 | geo = doc.delete('geo') 356 | if geo 357 | doc['geo'] = { 358 | lat: geo['latitude'].to_f, 359 | lon: geo['longitude'].to_f, 360 | alt: geo['altitude'].to_f 361 | } 362 | next if doc['geo'][:lat] == 0 || doc['geo'][:lon] == 0 363 | end 364 | doc.delete('keywords') 365 | doc.delete('gmtoffset') 366 | doc.delete('dst') 367 | prev_type ||= doc['type'] 368 | doc['schedule'] = random_schedule(doc['airline']) if doc['type'] == 'route' 369 | nullify_blank_keys(doc) 370 | if doc['type'] == 'airport' 371 | airports[doc['faa']] = doc 372 | airports[doc['icao']] = doc 373 | end 374 | File.write("travel/docs/#{key}.json", doc.to_json) 375 | if chosen_countries.include?(doc['country']) || doc['type'] == 'route' 376 | File.write("travel-sample/docs/#{key}.json", doc.to_json) 377 | chosen_airlines << key if doc['type'] == 'airline' 378 | chosen_airports << doc['icao'] << doc['faa'] if doc['type'] == 'airport' 379 | end 380 | if prev_type != doc['type'] 381 | prev_type = doc['type'] 382 | puts 383 | end 384 | print(" \r#{key}.json") 385 | STDOUT.flush 386 | end 387 | end 388 | $INPUT_RECORD_SEPARATOR = old_sep 389 | puts 390 | 391 | chosen_airports.compact! 392 | airports.delete(nil) 393 | unless inactive_airlines.empty? 394 | count = 0 395 | puts("removing routes from #{inactive_airlines.size} inactive airlines... ") 396 | Dir['travel-sample/docs/route_*.json'].sort_by { |name| name[/(\d+)/, 1].to_i }.each do |route_file| 397 | route = JSON.load(File.read(route_file)) 398 | if inactive_airlines.include?(route['airlineid']) || 399 | !airports.key?(route['sourceairport']) || 400 | !airports.key?(route['destinationairport']) || 401 | !(chosen_airlines.include?(route['airlineid']) || 402 | chosen_airports.include?(route['sourceairport']) || 403 | chosen_airports.include?(route['destinationairport'])) 404 | rm_rf(route_file) 405 | print(" \r#{File.basename(route_file)}") 406 | STDOUT.flush 407 | count += 1 408 | else 409 | from = airports[route['sourceairport']]['geo'] 410 | to = airports[route['destinationairport']]['geo'] 411 | route[:distance] = Geocoder::Calculations.distance_between([from[:lat], from[:lon]], 412 | [to[:lat], to[:lon]]) 413 | File.write(route_file, route.to_json) 414 | end 415 | end 416 | puts "\nremoved #{count} routes in reduced dataset" 417 | count = 0 418 | Dir['travel/docs/route_*.json'].sort_by { |name| name[/(\d+)/, 1].to_i }.each do |route_file| 419 | route = JSON.load(File.read(route_file)) 420 | if inactive_airlines.include?(route['airlineid']) || 421 | !airports.key?(route['sourceairport']) || 422 | !airports.key?(route['destinationairport']) 423 | rm_rf(route_file) 424 | print(" \r#{File.basename(route_file)}") 425 | STDOUT.flush 426 | count += 1 427 | else 428 | from = airports[route['sourceairport']]['geo'] 429 | to = airports[route['destinationairport']]['geo'] 430 | route[:distance] = Geocoder::Calculations.distance_between([from[:lat], from[:lon]], 431 | [to[:lat], to[:lon]], 432 | units: :km) 433 | File.write(route_file, route.to_json) 434 | end 435 | end 436 | puts "\nremoved #{count} routes" 437 | end 438 | 439 | design_docs = { 440 | spatial: 441 | { 442 | _id: '_design/spatial', 443 | language: 'javascript', 444 | spatial: { 445 | poi: <