├── .gitignore ├── README.md ├── Rakefile ├── clj.gemspec ├── ext └── clj │ ├── extconf.rb │ └── parser.c ├── lib ├── clj.rb └── clj │ ├── parser.rb │ └── types.rb └── test ├── bench ├── bench.py ├── bench.rb └── clj.py └── clj_spec.rb /.gitignore: -------------------------------------------------------------------------------- 1 | Makefile 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Like json, but with clojure sexps 2 | ================================= 3 | 4 | It gives `#to_clj` methods to various standard objects and has `Clojure.parse` and `Clojure.dump`, 5 | check out `test/clj_spec.rb` to see what it can do. 6 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env ruby 2 | require 'rake' 3 | 4 | task :default => :test 5 | 6 | task :test do 7 | Dir.chdir 'test' 8 | 9 | sh 'rspec clj_spec.rb --color --format doc' 10 | end 11 | 12 | task :bench do 13 | puts "Ruby: #{`test/bench/bench.rb`.strip}" 14 | puts "Python: #{`test/bench/bench.py`.strip}" 15 | end 16 | -------------------------------------------------------------------------------- /clj.gemspec: -------------------------------------------------------------------------------- 1 | Gem::Specification.new {|s| 2 | s.name = 'clj' 3 | s.version = '0.0.8.2' 4 | s.author = 'meh.' 5 | s.email = 'meh@paranoici.org' 6 | s.homepage = 'http://github.com/meh/ruby-clj' 7 | s.platform = Gem::Platform::RUBY 8 | s.summary = 'Like json, but with clojure sexps.' 9 | s.files = Dir['ext/**/*.{c,h,rb}'] + Dir['lib/**/*.rb'] 10 | s.extensions = 'ext/clj/extconf.rb' 11 | 12 | s.add_development_dependency 'rake' 13 | s.add_development_dependency 'rspec' 14 | } 15 | -------------------------------------------------------------------------------- /ext/clj/extconf.rb: -------------------------------------------------------------------------------- 1 | require 'mkmf' 2 | 3 | unless $CFLAGS.gsub!(/ -O[\dsz]?/, ' -O3') 4 | $CFLAGS << ' -O3' 5 | end 6 | 7 | if CONFIG['CC'] =~ /gcc/ 8 | $CFLAGS << ' -Wall' 9 | 10 | if $DEBUG && !$CFLAGS.gsub!(/ -O[\dsz]?/, ' -O0 -ggdb') 11 | $CFLAGS << ' -O0 -ggdb' 12 | end 13 | end 14 | 15 | create_makefile 'clj/parser_ext' 16 | -------------------------------------------------------------------------------- /ext/clj/parser.c: -------------------------------------------------------------------------------- 1 | /** 2 | * DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE 3 | * Version 2, December 2004 4 | * 5 | * DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE 6 | * TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 7 | * 8 | * 0. You just DO WHAT THE FUCK YOU WANT TO. 9 | **/ 10 | 11 | #include 12 | #include 13 | 14 | #include "ruby.h" 15 | 16 | static VALUE cClojure; 17 | static VALUE cParser; 18 | 19 | static VALUE UNICODE_REGEX; 20 | static VALUE OCTAL_REGEX; 21 | 22 | typedef enum { 23 | NODE_METADATA, 24 | NODE_NUMBER, 25 | NODE_BOOLEAN, 26 | NODE_NIL, 27 | NODE_CHAR, 28 | NODE_KEYWORD, 29 | NODE_STRING, 30 | NODE_MAP, 31 | NODE_LIST, 32 | NODE_VECTOR, 33 | NODE_INSTANT, 34 | NODE_SET, 35 | NODE_REGEXP, 36 | NODE_SYMBOL 37 | } NodeType; 38 | 39 | #define CALL(what) (what(self, string, position)) 40 | #define STATE VALUE self, char* string, size_t* position 41 | #define IS_EOF (string[*position] == '\0') 42 | #define IS_EOF_AFTER(n) (string[*position + (n)] == '\0') 43 | #define CURRENT (string[*position]) 44 | #define CURRENT_PTR (&string[*position]) 45 | #define AFTER(n) (string[*position + (n)]) 46 | #define AFTER_PTR(n) (&string[*position + (n)]) 47 | #define BEFORE(n) (string[*position - (n)]) 48 | #define BEFORE_PTR(n) (&string[*position - (n)]) 49 | #define SEEK(n) (*position += (n)) 50 | #define IS_NOT_EOF_UP_TO(n) (is_not_eof_up_to(string, position, n)) 51 | #define IS_EQUAL_UP_TO(str, n) (strncmp(CURRENT_PTR, str, (n)) == 0) 52 | #define IS_EQUAL(str) IS_EQUAL_UP_TO(str, strlen(str)) 53 | #define IS_IGNORED(ch) (isspace(ch) || ch == ',') 54 | #define IS_SYMBOL(ch) (isdigit(ch) || isalpha(ch) || ch == '+' || ch == '!' || ch == '-' || ch == '_' || ch == '?' || ch == '.' || ch == ':' || ch == '/') 55 | #define IS_BOTH_SEPARATOR(ch) (ch == '\0' || ch == ' ' || ch == ',' || ch == '"' || ch == '{' || ch == '}' || ch == '(' || ch == ')' || ch == '[' || ch == ']' || ch == '#' || ch == ':' || ch == '\n' || ch == '\r' || ch == '\t') 56 | #define IS_KEYWORD_SEPARATOR(ch) (ch == '\0' || ch == ' ' || ch == ',' || ch == '"' || ch == '{' || ch == '}' || ch == '(' || ch == ')' || ch == '[' || ch == ']' || ch == '#' || ch == ':' || ch == '\'' || ch == '^' || ch == '@' || ch == '`' || ch == '~' || ch == '\\' || ch == ';' || ch == '\n' || ch == '\r' || ch == '\t') 57 | 58 | static VALUE read_next (STATE); 59 | 60 | static inline bool is_not_eof_up_to (char* string, size_t* position, size_t n) 61 | { 62 | size_t i; 63 | 64 | for (i = 0; i < n; i++) { 65 | if (IS_EOF_AFTER(i)) { 66 | return false; 67 | } 68 | } 69 | 70 | return true; 71 | } 72 | 73 | static void ignore (STATE) 74 | { 75 | while (!IS_EOF && IS_IGNORED(CURRENT)) { 76 | SEEK(1); 77 | } 78 | } 79 | 80 | static NodeType next_type (STATE) 81 | { 82 | if (isdigit(CURRENT) || CURRENT == '-' || CURRENT == '+') { 83 | return NODE_NUMBER; 84 | } 85 | 86 | switch (CURRENT) { 87 | case '^': return NODE_METADATA; 88 | case 't': case 'f': return NODE_BOOLEAN; 89 | case 'n': return NODE_NIL; 90 | case '\\': return NODE_CHAR; 91 | case ':': return NODE_KEYWORD; 92 | case '"': return NODE_STRING; 93 | case '{': return NODE_MAP; 94 | case '(': return NODE_LIST; 95 | case '[': return NODE_VECTOR; 96 | } 97 | 98 | if (CURRENT == '#') { 99 | if (IS_EOF_AFTER(1)) { 100 | rb_raise(rb_eSyntaxError, "unexpected EOF"); 101 | } 102 | 103 | switch (AFTER(1)) { 104 | case 'i': return NODE_INSTANT; 105 | case '{': return NODE_SET; 106 | case '"': return NODE_REGEXP; 107 | } 108 | } 109 | 110 | return NODE_SYMBOL; 111 | } 112 | 113 | static VALUE read_metadata (STATE) 114 | { 115 | VALUE result; 116 | VALUE metadatas = rb_ary_new(); 117 | size_t i; 118 | 119 | while (CURRENT == '^') { 120 | SEEK(1); 121 | 122 | rb_ary_push(metadatas, CALL(read_next)); 123 | } 124 | 125 | result = CALL(read_next); 126 | 127 | if (!rb_respond_to(result, rb_intern("metadata="))) { 128 | rb_raise(rb_eSyntaxError, "the object cannot hold metadata"); 129 | } 130 | 131 | for (i = 0; i < RARRAY_LEN(metadatas); i++) { 132 | rb_funcall(result, rb_intern("metadata="), 1, RARRAY_PTR(metadatas)[i]); 133 | } 134 | 135 | return result; 136 | } 137 | 138 | static VALUE read_symbol (STATE) 139 | { 140 | size_t length = 0; 141 | 142 | while (IS_SYMBOL(AFTER(length))) { 143 | length++; 144 | } 145 | 146 | SEEK(length); 147 | 148 | return rb_funcall(rb_funcall(rb_str_new(BEFORE_PTR(length), length), rb_intern("to_sym"), 0), 149 | rb_intern("symbol!"), 0); 150 | } 151 | 152 | static VALUE read_nil (STATE) 153 | { 154 | if (!IS_NOT_EOF_UP_TO(3) || !IS_EQUAL_UP_TO("nil", 3) || !IS_BOTH_SEPARATOR(AFTER(3))) { 155 | return CALL(read_symbol); 156 | } 157 | 158 | SEEK(3); 159 | 160 | return Qnil; 161 | } 162 | 163 | static VALUE read_boolean (STATE) 164 | { 165 | if (CURRENT == 't') { 166 | if (!IS_NOT_EOF_UP_TO(4) || !IS_EQUAL_UP_TO("true", 4) || !IS_BOTH_SEPARATOR(AFTER(4))) { 167 | return CALL(read_symbol); 168 | } 169 | 170 | SEEK(4); 171 | 172 | return Qtrue; 173 | } 174 | else { 175 | if (!IS_NOT_EOF_UP_TO(5) || !IS_EQUAL_UP_TO("false", 5) || !IS_BOTH_SEPARATOR(AFTER(5))) { 176 | return CALL(read_symbol); 177 | } 178 | 179 | SEEK(5); 180 | 181 | return Qfalse; 182 | } 183 | } 184 | 185 | static VALUE read_number (STATE) 186 | { 187 | size_t length = 0; 188 | VALUE rbPiece; 189 | char* cPiece; 190 | char* tmp; 191 | 192 | while (!IS_EOF_AFTER(length) && !IS_BOTH_SEPARATOR(AFTER(length))) { 193 | length++; 194 | } 195 | 196 | SEEK(length); 197 | 198 | rbPiece = rb_str_new(BEFORE_PTR(length), length); 199 | cPiece = StringValueCStr(rbPiece); 200 | 201 | if (strchr(cPiece, '/')) { 202 | return rb_funcall(rb_cObject, rb_intern("Rational"), 1, rbPiece); 203 | } 204 | else if ((tmp = strchr(cPiece, 'r')) || (tmp = strchr(cPiece, 'R'))) { 205 | return rb_funcall(rb_str_new2(tmp + 1), rb_intern("to_i"), 1, 206 | rb_funcall(rb_str_new(cPiece, tmp - cPiece), rb_intern("to_i"), 0)); 207 | } 208 | else if (strchr(cPiece, '.') || strchr(cPiece, 'e') || strchr(cPiece, 'E') || cPiece[length - 1] == 'M') { 209 | if (cPiece[length - 1] == 'M') { 210 | return rb_funcall(rb_cObject, rb_intern("BigDecimal"), 1, rbPiece); 211 | } 212 | else { 213 | return rb_funcall(rb_cObject, rb_intern("Float"), 1, rbPiece); 214 | } 215 | } 216 | else { 217 | if (cPiece[length - 1] == 'N') { 218 | rb_str_set_len(rbPiece, length - 1); 219 | } 220 | 221 | return rb_funcall(rb_cObject, rb_intern("Integer"), 1, rbPiece); 222 | } 223 | } 224 | 225 | static VALUE read_char (STATE) 226 | { 227 | SEEK(1); 228 | 229 | if (IS_EOF_AFTER(1) || IS_BOTH_SEPARATOR(AFTER(1))) { 230 | SEEK(1); return rb_str_new(BEFORE_PTR(1), 1); 231 | } 232 | else if (IS_NOT_EOF_UP_TO(7) && IS_EQUAL_UP_TO("newline", 7) && IS_BOTH_SEPARATOR(AFTER(7))) { 233 | SEEK(7); return rb_str_new2("\n"); 234 | } 235 | else if (IS_NOT_EOF_UP_TO(5) && IS_EQUAL_UP_TO("space", 5) && IS_BOTH_SEPARATOR(AFTER(5))) { 236 | SEEK(5); return rb_str_new2(" "); 237 | } 238 | else if (IS_NOT_EOF_UP_TO(3) && IS_EQUAL_UP_TO("tab", 3) && IS_BOTH_SEPARATOR(AFTER(3))) { 239 | SEEK(3); return rb_str_new2("\t"); 240 | } 241 | else if (IS_NOT_EOF_UP_TO(9) && IS_EQUAL_UP_TO("backspace", 9) && IS_BOTH_SEPARATOR(AFTER(9))) { 242 | SEEK(9); return rb_str_new2("\b"); 243 | } 244 | else if (IS_NOT_EOF_UP_TO(8) && IS_EQUAL_UP_TO("formfeed", 8) && IS_BOTH_SEPARATOR(AFTER(8))) { 245 | SEEK(8); return rb_str_new2("\f"); 246 | } 247 | else if (IS_NOT_EOF_UP_TO(6) && IS_EQUAL_UP_TO("return", 6) && IS_BOTH_SEPARATOR(AFTER(6))) { 248 | SEEK(6); return rb_str_new2("\r"); 249 | } 250 | else if (CURRENT == 'u' && IS_NOT_EOF_UP_TO(5) && !NIL_P(rb_funcall(rb_str_new(AFTER_PTR(1), 4), rb_intern("=~"), 1, UNICODE_REGEX)) && IS_BOTH_SEPARATOR(AFTER(5))) { 251 | SEEK(5); return rb_funcall(rb_ary_new3(1, rb_funcall(rb_str_new(BEFORE_PTR(4), 4), rb_intern("to_i"), 1, INT2FIX(16))), 252 | rb_intern("pack"), 1, rb_str_new2("U")); 253 | } 254 | else if (CURRENT == 'o') { 255 | size_t length = 1; 256 | size_t i; 257 | 258 | for (i = 1; i < 5; i++) { 259 | if (IS_BOTH_SEPARATOR(AFTER(i))) { 260 | break; 261 | } 262 | 263 | length++; 264 | } 265 | 266 | if (length > 1 && !NIL_P(rb_funcall(rb_str_new(AFTER_PTR(1), length - 1), rb_intern("=~"), 1, OCTAL_REGEX)) && IS_BOTH_SEPARATOR(AFTER(length))) { 267 | SEEK(length); return rb_funcall(rb_funcall(rb_str_new(BEFORE_PTR(length - 1), length - 1), rb_intern("to_i"), 1, INT2FIX(8)), 268 | rb_intern("chr"), 0); 269 | } 270 | } 271 | 272 | rb_raise(rb_eSyntaxError, "unknown character type"); 273 | } 274 | 275 | static VALUE read_keyword (STATE) 276 | { 277 | size_t length = 0; 278 | 279 | SEEK(1); 280 | 281 | while (!IS_KEYWORD_SEPARATOR(AFTER(length))) { 282 | length++; 283 | } 284 | 285 | SEEK(length); 286 | 287 | return rb_funcall(rb_funcall(rb_str_new(BEFORE_PTR(length), length), rb_intern("to_sym"), 0), 288 | rb_intern("keyword!"), 0); 289 | } 290 | 291 | static VALUE read_string (STATE) 292 | { 293 | size_t length = 0; 294 | 295 | SEEK(1); 296 | 297 | while (AFTER(length) != '"') { 298 | if (IS_EOF_AFTER(length)) { 299 | rb_raise(rb_eSyntaxError, "unexpected EOF"); 300 | } 301 | 302 | if (AFTER(length) == '\\') { 303 | length++; 304 | } 305 | 306 | length++; 307 | } 308 | 309 | SEEK(length + 1); 310 | 311 | return rb_funcall(cClojure, rb_intern("unescape"), 1, rb_str_new(BEFORE_PTR(length + 1), length)); 312 | } 313 | 314 | static VALUE read_regexp (STATE) 315 | { 316 | size_t length = 0; 317 | VALUE args[] = { Qnil }; 318 | 319 | SEEK(2); 320 | 321 | while (AFTER(length) != '"') { 322 | if (IS_EOF_AFTER(length)) { 323 | rb_raise(rb_eSyntaxError, "unexpected EOF"); 324 | } 325 | 326 | if (AFTER(length) == '\\') { 327 | length++; 328 | } 329 | 330 | length++; 331 | } 332 | 333 | SEEK(length + 1); 334 | 335 | args[0] = rb_str_new(BEFORE_PTR(length + 1), length); 336 | 337 | return rb_class_new_instance(1, args, rb_cRegexp); 338 | } 339 | 340 | static VALUE read_instant (STATE) 341 | { 342 | SEEK(1); 343 | 344 | if (!IS_NOT_EOF_UP_TO(4)) { 345 | rb_raise(rb_eSyntaxError, "unexpected EOF"); 346 | } 347 | 348 | if (!IS_EQUAL_UP_TO("inst", 4)) { 349 | rb_raise(rb_eSyntaxError, "expected inst, got %c%c%c%c", AFTER(0), AFTER(1), AFTER(2), AFTER(3)); 350 | } 351 | 352 | SEEK(4); 353 | 354 | CALL(ignore); 355 | 356 | return rb_funcall(rb_const_get(rb_cObject, rb_intern("DateTime")), rb_intern("rfc3339"), 1, CALL(read_string)); 357 | } 358 | 359 | static VALUE read_list (STATE) 360 | { 361 | VALUE result = rb_class_new_instance(0, NULL, rb_iv_get(self, "@list_class")); 362 | 363 | SEEK(1); CALL(ignore); 364 | 365 | while (CURRENT != ')') { 366 | rb_funcall(result, rb_intern("<<"), 1, CALL(read_next)); 367 | 368 | CALL(ignore); 369 | } 370 | 371 | SEEK(1); 372 | 373 | return result; 374 | } 375 | 376 | static VALUE read_vector (STATE) 377 | { 378 | VALUE result = rb_class_new_instance(0, NULL, rb_iv_get(self, "@vector_class")); 379 | 380 | SEEK(1); CALL(ignore); 381 | 382 | while (CURRENT != ']') { 383 | rb_funcall(result, rb_intern("<<"), 1, CALL(read_next)); 384 | 385 | CALL(ignore); 386 | } 387 | 388 | SEEK(1); 389 | 390 | return result; 391 | } 392 | 393 | static VALUE read_set (STATE) 394 | { 395 | VALUE result = rb_class_new_instance(0, NULL, rb_iv_get(self, "@set_class")); 396 | 397 | SEEK(2); CALL(ignore); 398 | 399 | while (CURRENT != '}') { 400 | if (NIL_P(rb_funcall(result, rb_intern("add?"), 1, CALL(read_next)))) { 401 | rb_raise(rb_eSyntaxError, "the set contains non unique values"); 402 | } 403 | 404 | CALL(ignore); 405 | } 406 | 407 | SEEK(1); 408 | 409 | return result; 410 | } 411 | 412 | static VALUE read_map (STATE) 413 | { 414 | VALUE result = rb_class_new_instance(0, NULL, rb_iv_get(self, "@map_class")); 415 | VALUE key; 416 | VALUE value; 417 | 418 | SEEK(1); CALL(ignore); 419 | 420 | while (CURRENT != '}') { 421 | key = CALL(read_next); 422 | CALL(ignore); 423 | value = CALL(read_next); 424 | CALL(ignore); 425 | 426 | rb_funcall(result, rb_intern("[]="), 2, key, value); 427 | } 428 | 429 | SEEK(1); 430 | 431 | return result; 432 | } 433 | 434 | static VALUE read_next (STATE) 435 | { 436 | CALL(ignore); 437 | 438 | if (IS_EOF) { 439 | rb_raise(rb_eSyntaxError, "unexpected EOF"); 440 | } 441 | 442 | switch (CALL(next_type)) { 443 | case NODE_METADATA: return CALL(read_metadata); 444 | case NODE_NUMBER: return CALL(read_number); 445 | case NODE_BOOLEAN: return CALL(read_boolean); 446 | case NODE_NIL: return CALL(read_nil); 447 | case NODE_CHAR: return CALL(read_char); 448 | case NODE_KEYWORD: return CALL(read_keyword); 449 | case NODE_STRING: return CALL(read_string); 450 | case NODE_MAP: return CALL(read_map); 451 | case NODE_LIST: return CALL(read_list); 452 | case NODE_VECTOR: return CALL(read_vector); 453 | case NODE_INSTANT: return CALL(read_instant); 454 | case NODE_SET: return CALL(read_set); 455 | case NODE_REGEXP: return CALL(read_regexp); 456 | case NODE_SYMBOL: return CALL(read_symbol); 457 | } 458 | } 459 | 460 | static VALUE t_init (int argc, VALUE* argv, VALUE self) 461 | { 462 | VALUE tmp; 463 | VALUE source; 464 | VALUE options; 465 | 466 | if (argc < 1) { 467 | rb_raise(rb_eArgError, "wrong number of arguments (0 for 1)"); 468 | } 469 | else if (argc > 2) { 470 | rb_raise(rb_eArgError, "wrong number of arguments (%d for 2)", argc); 471 | } 472 | 473 | if (!rb_obj_is_kind_of(argv[0], rb_cString) && !rb_obj_is_kind_of(argv[0], rb_cIO)) { 474 | rb_raise(rb_eArgError, "you have to pass a String or an IO"); 475 | } 476 | 477 | source = argv[0]; 478 | options = argc == 2 ? argv[1] : rb_hash_new(); 479 | 480 | rb_iv_set(self, "@source", source); 481 | rb_iv_set(self, "@options", options); 482 | 483 | if (!NIL_P(tmp = rb_hash_aref(options, rb_intern("map_class")))) { 484 | rb_iv_set(self, "@map_class", tmp); 485 | } 486 | else { 487 | rb_iv_set(self, "@map_class", rb_const_get(cClojure, rb_intern("Map"))); 488 | } 489 | 490 | if (!NIL_P(tmp = rb_hash_aref(options, rb_intern("vector_class")))) { 491 | rb_iv_set(self, "@vector_class", tmp); 492 | } 493 | else { 494 | rb_iv_set(self, "@vector_class", rb_const_get(cClojure, rb_intern("Vector"))); 495 | } 496 | 497 | if (!NIL_P(tmp = rb_hash_aref(options, rb_intern("list_class")))) { 498 | rb_iv_set(self, "@list_class", tmp); 499 | } 500 | else { 501 | rb_iv_set(self, "@list_class", rb_const_get(cClojure, rb_intern("List"))); 502 | } 503 | 504 | if (!NIL_P(tmp = rb_hash_aref(options, rb_intern("set_class")))) { 505 | rb_iv_set(self, "@set_class", tmp); 506 | } 507 | else { 508 | rb_iv_set(self, "@set_class", rb_const_get(cClojure, rb_intern("Set"))); 509 | } 510 | 511 | return self; 512 | } 513 | 514 | static VALUE t_parse (VALUE self) 515 | { 516 | char* string; 517 | size_t position = 0; 518 | VALUE source = rb_iv_get(self, "@source"); 519 | VALUE result; 520 | 521 | if (!rb_obj_is_kind_of(source, rb_cString)) { 522 | if (rb_obj_is_kind_of(source, rb_cIO)) { 523 | source = rb_funcall(source, rb_intern("read"), 0); 524 | } 525 | else { 526 | source = rb_funcall(source, rb_intern("to_str"), 0); 527 | } 528 | } 529 | 530 | string = StringValueCStr(source); 531 | result = read_next(self, string, &position); 532 | 533 | ignore(self, string, &position); 534 | 535 | if (string[position] != '\0') { 536 | rb_raise(rb_eSyntaxError, "there is some unconsumed input"); 537 | } 538 | 539 | return result; 540 | } 541 | 542 | void 543 | Init_parser_ext (void) 544 | { 545 | cClojure = rb_const_get(rb_cObject, rb_intern("Clojure")); 546 | cParser = rb_define_class_under(cClojure, "Parser", rb_cObject); 547 | 548 | rb_define_method(cParser, "initialize", t_init, -1); 549 | rb_define_method(cParser, "parse", t_parse, 0); 550 | 551 | VALUE args[] = { Qnil }; 552 | 553 | args[0] = rb_str_new2("[0-9|a-f|A-F]{4}"); 554 | UNICODE_REGEX = rb_class_new_instance(1, args, rb_cRegexp); 555 | rb_define_const(cClojure, "UNICODE_REGEX", UNICODE_REGEX); 556 | 557 | args[0] = rb_str_new2("[0-3]?[0-7]?[0-7]"); 558 | OCTAL_REGEX = rb_class_new_instance(1, args, rb_cRegexp); 559 | rb_define_const(cClojure, "OCTAL_REGEX", OCTAL_REGEX); 560 | } 561 | -------------------------------------------------------------------------------- /lib/clj.rb: -------------------------------------------------------------------------------- 1 | #-- 2 | # DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE 3 | # Version 2, December 2004 4 | # 5 | # DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE 6 | # TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 7 | # 8 | # 0. You just DO WHAT THE FUCK YOU WANT TO. 9 | #++ 10 | 11 | module Clojure 12 | def self.parse (*args) 13 | Clojure::Parser.new(*args).parse 14 | end 15 | 16 | def self.dump (what, options = {}) 17 | raise ArgumentError, 'cannot convert the passed value to clojure' unless what.respond_to? :to_clj 18 | 19 | what.to_clj(options) 20 | end 21 | 22 | UNESCAPE_REGEX = %r((?:\\[\\bfnrt"/]|(?:\\u(?:[A-Fa-f\d]{4}))+|\\[\x20-\xff]))n 23 | 24 | # Unescape characters in strings. 25 | UNESCAPE_MAP = Hash.new { |h, k| h[k] = k.chr } 26 | UNESCAPE_MAP.merge!( 27 | ?" => '"', 28 | ?\\ => '\\', 29 | ?/ => '/', 30 | ?b => "\b", 31 | ?f => "\f", 32 | ?n => "\n", 33 | ?r => "\r", 34 | ?t => "\t", 35 | ?u => nil 36 | ) 37 | 38 | EMPTY_8BIT_STRING = '' 39 | 40 | if EMPTY_8BIT_STRING.respond_to? :force_encoding 41 | EMPTY_8BIT_STRING.force_encoding Encoding::ASCII_8BIT 42 | end 43 | 44 | def self.unescape (string) 45 | string.gsub(UNESCAPE_REGEX) {|escape| 46 | if u = UNESCAPE_MAP[$&[1]] 47 | next u 48 | end 49 | 50 | bytes = EMPTY_8BIT_STRING.dup 51 | 52 | i = 0 53 | while escape[6 * i] == ?\\ && escape[6 * i + 1] == ?u 54 | bytes << escape[6 * i + 2, 2].to_i(16) << escape[6 * i + 4, 2].to_i(16) 55 | 56 | i += 1 57 | end 58 | 59 | if bytes.respond_to? :force_encoding 60 | bytes.force_encoding 'UTF-16be' 61 | bytes.encode 'UTF-8' 62 | else 63 | bytes 64 | end 65 | } 66 | end 67 | end 68 | 69 | require 'clj/types' 70 | 71 | begin 72 | raise LoadError if RUBY_ENGINE == 'jruby' || ENV['CLJ_NO_C_EXT'] 73 | 74 | require 'clj/parser_ext' 75 | rescue LoadError 76 | require 'clj/parser' 77 | end 78 | -------------------------------------------------------------------------------- /lib/clj/parser.rb: -------------------------------------------------------------------------------- 1 | #-- 2 | # DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE 3 | # Version 2, December 2004 4 | # 5 | # DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE 6 | # TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 7 | # 8 | # 0. You just DO WHAT THE FUCK YOU WANT TO. 9 | #++ 10 | 11 | require 'stringio' 12 | 13 | module Clojure 14 | 15 | class Parser 16 | NUMBERS = '0' .. '9' 17 | 18 | UNICODE_REGEX = /[0-9|a-f|A-F]{4}/ 19 | OCTAL_REGEX = /[0-3]?[0-7]?[0-7]/ 20 | 21 | def initialize (source, options = {}) 22 | @source = source.is_a?(String) ? StringIO.new(source) : source 23 | @options = options 24 | 25 | @map_class = options[:map_class] || Clojure::Map 26 | @vector_class = options[:vector_class] || Clojure::Vector 27 | @list_class = options[:list_class] || Clojure::List 28 | @set_class = options[:set_class] || Clojure::Set 29 | end 30 | 31 | def parse 32 | result = read_next 33 | 34 | ignore(false) 35 | 36 | if @source.read(1) 37 | raise SyntaxError, 'there is some unconsumed input' 38 | end 39 | 40 | result 41 | end 42 | 43 | private 44 | def next_type (ch) 45 | case ch 46 | when '^' then :metadata 47 | when NUMBERS, '-', '+' then :number 48 | when 't', 'f' then :boolean 49 | when 'n' then :nil 50 | when '\\' then :char 51 | when ':' then :keyword 52 | when '"' then :string 53 | when '{' then :map 54 | when '(' then :list 55 | when '[' then :vector 56 | when '#' 57 | case @source.read(1) 58 | when 'i' then :instant 59 | when '{' then :set 60 | when '"' then :regexp 61 | end 62 | else :symbol 63 | end 64 | end 65 | 66 | def read_next 67 | ch = ignore(false) 68 | 69 | raise SyntaxError, 'unexpected EOF' unless ch 70 | 71 | __send__ "read_#{next_type ch}", ch 72 | end 73 | 74 | def read_metadata (ch) 75 | metadatas = [read_next] 76 | 77 | while lookahead(1) == '^' 78 | raise SyntaxError, 'unexpected EOF' unless @source.read(1) 79 | 80 | metadatas.push(read_next) 81 | end 82 | 83 | value = read_next 84 | 85 | unless value.respond_to? :metadata= 86 | raise SyntaxError, 'the object cannot hold metadata' 87 | end 88 | 89 | metadatas.each { |m| value.metadata = m } 90 | 91 | value 92 | end 93 | 94 | def read_nil (ch) 95 | check = @source.read(3) 96 | 97 | if check[0, 2] != 'il' || !both_separator?(check[2]) 98 | revert(check.length) and read_symbol(ch) 99 | else 100 | nil 101 | end 102 | end 103 | 104 | def read_boolean (ch) 105 | if ch == 't' 106 | check = @source.read(4) 107 | 108 | if check[0, 3] != 'rue' || !both_separator?(check[3]) 109 | revert(check.length) and read_symbol(ch) 110 | else 111 | true 112 | end 113 | else 114 | check = @source.read(5) 115 | 116 | if check[0, 4] != 'alse' || !both_separator?(check[4]) 117 | revert(check.length) and read_symbol(ch) 118 | else 119 | false 120 | end 121 | end 122 | end 123 | 124 | def read_number (ch) 125 | piece = ch 126 | 127 | while (ch = @source.read(1)) && !both_separator?(ch) 128 | piece << ch 129 | end 130 | 131 | revert if ch 132 | 133 | if piece.include? '/' 134 | Rational(piece) 135 | elsif piece.include? 'r' or piece.include? 'R' 136 | base, number = piece.split(/r/i, 2) 137 | 138 | number.to_i(base.to_i) 139 | elsif piece.include? '.' or piece.include? 'e' or piece.include? 'E' or piece.end_with? 'M' 140 | if piece.end_with? 'M' 141 | BigDecimal(piece) 142 | else 143 | Float(piece) 144 | end 145 | else 146 | if piece.end_with? 'N' 147 | piece[-1] = '' 148 | end 149 | 150 | Integer(piece) 151 | end 152 | end 153 | 154 | def read_char (ch) 155 | if (ahead = lookahead(2)) && both_separator?(ahead[1]) 156 | @source.read(1) 157 | elsif (ahead = lookahead(8)) && ahead[0, 7] == 'newline' && both_separator?(ahead[7]) 158 | @source.read(7) and "\n" 159 | elsif (ahead = lookahead(6)) && ahead[0, 5] == 'space' && both_separator?(ahead[5]) 160 | @source.read(5) and ' ' 161 | elsif (ahead = lookahead(4)) && ahead[0, 3] == 'tab' && both_separator?(ahead[3]) 162 | @source.read(3) and "\t" 163 | elsif (ahead = lookahead(10)) && ahead[0, 9] == 'backspace' && both_separator?(ahead[9]) 164 | @source.read(9) and "\b" 165 | elsif (ahead = lookahead(9)) && ahead[0, 8] == 'formfeed' && both_separator?(ahead[8]) 166 | @source.read(8) and "\f" 167 | elsif (ahead = lookahead(7)) && ahead[0, 6] == 'return' && both_separator?(ahead[6]) 168 | @source.read(6) and "\r" 169 | elsif (ahead = lookahead(6)) && ahead[0] == 'u' && ahead[1, 5] =~ UNICODE_REGEX && both_separator?(ahead[5]) 170 | [@source.read(5)[1, 4].to_i(16)].pack('U') 171 | elsif (ahead = lookahead(5)) && ahead[0] == 'o' && matches = ahead[1, 3].match(OCTAL_REGEX) 172 | length = matches[0].length + 1 173 | 174 | if both_separator?(ahead[length]) 175 | @source.read(length)[1, 3].to_i(8).chr 176 | end 177 | end or raise SyntaxError, 'unknown character type' 178 | end 179 | 180 | def read_symbol (ch) 181 | result = ch 182 | 183 | while (ch = @source.read(1)) && is_symbol?(ch) 184 | result << ch 185 | end 186 | 187 | revert if ch 188 | 189 | if result.include? '::' 190 | raise SyntaxError, 'symbols cannot have repeating :' 191 | end 192 | 193 | result.to_sym.symbol! 194 | end 195 | 196 | def read_keyword (ch) 197 | result = '' 198 | 199 | while (ch = @source.read(1)) && !keyword_separator?(ch) 200 | result << ch 201 | end 202 | 203 | revert if ch 204 | 205 | result.to_sym.keyword! 206 | end 207 | 208 | def read_string (ch) 209 | result = '' 210 | 211 | while (ch = @source.read(1)) != '"' 212 | raise SyntaxError, 'unexpected EOF' unless ch 213 | 214 | result << ch 215 | 216 | if ch == '\\' 217 | result << @source.read(1) 218 | end 219 | end 220 | 221 | Clojure.unescape(result) 222 | end 223 | 224 | def read_instant (ch) 225 | check = @source.read(3) 226 | 227 | if check.length != 3 228 | raise SyntaxError, 'unexpected EOF' 229 | elsif check != 'nst' 230 | raise SyntaxError, "expected inst, found i#{check}" 231 | end 232 | 233 | DateTime.rfc3339(read_string(ignore(false))) 234 | end 235 | 236 | def read_regexp (ch) 237 | result = '' 238 | 239 | while (ch = @source.read(1)) != '"' 240 | raise SyntaxError, 'unexpected EOF' unless ch 241 | 242 | result << ch 243 | 244 | if ch == '\\' 245 | result << @source.read(1) 246 | end 247 | end 248 | 249 | /#{result}/ 250 | end 251 | 252 | def read_list (ch) 253 | result = @list_class.new 254 | 255 | ignore 256 | 257 | while lookahead(1) != ')' 258 | result << read_next 259 | ignore 260 | end 261 | 262 | @source.read(1) 263 | 264 | result 265 | end 266 | 267 | def read_vector (ch) 268 | result = @vector_class.new 269 | 270 | ignore 271 | 272 | while lookahead(1) != ']' 273 | result << read_next 274 | ignore 275 | end 276 | 277 | @source.read(1) 278 | 279 | result 280 | end 281 | 282 | def read_set (ch) 283 | result = @set_class.new 284 | 285 | ignore 286 | 287 | while lookahead(1) != '}' 288 | unless result.add? read_next 289 | raise SyntaxError, 'the set contains non unique values' 290 | end 291 | 292 | ignore 293 | end 294 | 295 | @source.read(1) 296 | 297 | result 298 | end 299 | 300 | def read_map (ch) 301 | result = @map_class.new 302 | 303 | ignore 304 | 305 | while lookahead(1) != '}' 306 | key = read_next 307 | ignore 308 | value = read_next 309 | ignore 310 | 311 | result[key] = value 312 | end 313 | 314 | @source.read(1) 315 | 316 | result 317 | end 318 | 319 | def lookahead (length) 320 | result = @source.read(length) 321 | 322 | if result 323 | @source.seek(-result.length, IO::SEEK_CUR) 324 | end 325 | 326 | result 327 | end 328 | 329 | def ignore (rev = true) 330 | while ignore?(ch = @source.read(1)); end 331 | 332 | rev ? revert : ch if ch 333 | end 334 | 335 | def revert (n = 1) 336 | @source.seek -n, IO::SEEK_CUR 337 | end 338 | 339 | def ignore? (ch) 340 | ch == ' ' || ch == ',' || ch == "\n" || ch == "\r" || ch == "\t" 341 | end 342 | 343 | def is_symbol? (ch) 344 | (ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'z') || (ch >= 'A' || ch <= 'Z') || ch == '+' || ch == '!' || ch == '-' || ch == '_' || ch == '?' || ch == '.' || ch == ':' || ch == '/' 345 | end 346 | 347 | def both_separator? (ch) 348 | ch == nil || ch == ' ' || ch == ',' || ch == '"' || ch == '{' || ch == '}' || ch == '(' || ch == ')' || ch == '[' || ch == ']' || ch == '#' || ch == ':' || ch == "\n" || ch == "\r" || ch == "\t" 349 | end 350 | 351 | def keyword_separator? (ch) 352 | ch == nil || ch == ' ' || ch == ',' || ch == '"' || ch == '{' || ch == '}' || ch == '(' || ch == ')' || ch == '[' || ch == ']' || ch == '#' || ch == ':' || ch == "'" || ch == '^' || ch == '@' || ch == '`' || ch == '~' || ch == '\\' || ch == ';' || ch == "\n" || ch == "\r" || ch == "\t" 353 | end 354 | end 355 | 356 | end 357 | -------------------------------------------------------------------------------- /lib/clj/types.rb: -------------------------------------------------------------------------------- 1 | #-- 2 | # DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE 3 | # Version 2, December 2004 4 | # 5 | # DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE 6 | # TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 7 | # 8 | # 0. You just DO WHAT THE FUCK YOU WANT TO. 9 | #++ 10 | 11 | require 'forwardable' 12 | require 'set' 13 | require 'date' 14 | require 'bigdecimal' 15 | 16 | module Clojure 17 | module Metadata 18 | def metadata 19 | @metadata ||= Clojure::Map.new 20 | end 21 | 22 | def metadata= (value) 23 | metadata.merge! case value 24 | when ::Hash then value 25 | when ::Symbol then { value => true } 26 | when ::String then { :tag => value } 27 | else raise ArgumentError, 'the passed value is not suitable as metadata' 28 | end 29 | end 30 | 31 | def metadata_to_clj (options = {}) 32 | return '' unless options[:metadata] != false && @metadata && !@metadata.empty? 33 | 34 | '^' + if @metadata.length == 1 35 | piece = @metadata.first 36 | 37 | if piece.first.is_a?(::Symbol) && piece.last == true 38 | piece.first.to_clj(options) 39 | elsif piece.first == :tag && piece.last.is_a?(::String) 40 | piece.last.to_clj(options) 41 | else 42 | @metadata.to_clj(options) 43 | end 44 | else 45 | @metadata.to_clj(options) 46 | end + ' ' 47 | end 48 | end 49 | 50 | class Map < Hash 51 | include Clojure::Metadata 52 | 53 | def to_clj (options = {}) 54 | metadata_to_clj(options) + '{' + map { |k, v| k.to_clj(options) + ' ' + v.to_clj(options) }.join(' ') + '}' 55 | end 56 | end 57 | 58 | class Vector < Array 59 | include Clojure::Metadata 60 | 61 | def to_clj (options = {}) 62 | metadata_to_clj(options) + '[' + map { |o| o.to_clj(options) }.join(' ') + ']' 63 | end 64 | end 65 | 66 | class List < Array 67 | include Clojure::Metadata 68 | 69 | def to_clj (options = {}) 70 | metadata_to_clj(options) + '(' + map { |o| o.to_clj(options) }.join(' ') + ')' 71 | end 72 | end 73 | 74 | class Set < Set 75 | include Clojure::Metadata 76 | 77 | def to_clj (options = {}) 78 | metadata_to_clj(options) + '#{' + map { |o| o.to_clj(options) }.join(' ') + '}' 79 | end 80 | end 81 | 82 | class Symbol 83 | def initialize (sym) 84 | @internal = sym 85 | end 86 | 87 | def keyword?; false; end 88 | def symbol?; true; end 89 | 90 | def to_clj (*) 91 | result = to_sym.to_s 92 | 93 | unless result =~ %r([\w:+!-_?./][\w\d:+!-_?./]*) 94 | raise ArgumentError, "#{result} cannot be transformed into clojure" 95 | end 96 | 97 | result 98 | end 99 | 100 | def == (other) 101 | return false unless other.is_a?(Symbol) 102 | 103 | to_sym == other.to_sym 104 | end 105 | 106 | def to_sym; @internal; end 107 | def to_s; to_sym.to_s; end 108 | def inspect; to_s end 109 | end 110 | end 111 | 112 | [Numeric, TrueClass, FalseClass, NilClass].each {|klass| 113 | klass.instance_eval { 114 | define_method :to_clj do |*| 115 | inspect 116 | end 117 | } 118 | } 119 | 120 | class Symbol 121 | def keyword! 122 | self 123 | end 124 | 125 | def symbol! 126 | Clojure::Symbol.new(self) 127 | end 128 | 129 | def keyword?; true; end 130 | def symbol?; false; end 131 | 132 | def to_clj (*) 133 | result = to_sym.inspect 134 | 135 | unless result =~ /:([^(\[{'^@`~\"\\,\s;)\]}]+)/ 136 | raise ArgumentError, "#{result} cannot be transformed into clojure" 137 | end 138 | 139 | result 140 | end 141 | end 142 | 143 | class String 144 | def to_clj (*) 145 | result = (encode('UTF-16be') rescue self).inspect 146 | 147 | result.gsub!(/(^|[^\\])\\e/, '\1\u001b') 148 | result.gsub!(/(^|[^\\])\\a/, '\1\u0003') 149 | 150 | result 151 | end 152 | end 153 | 154 | class Rational 155 | def to_clj (*) 156 | to_s 157 | end 158 | end 159 | 160 | class Regexp 161 | def to_clj (*) 162 | '#"' + inspect[1 .. -2] + '"' 163 | end 164 | end 165 | 166 | class DateTime 167 | def to_clj (options = {}) 168 | options[:alpha] ? '#inst "' + rfc3339 + '"' : to_time.to_i.to_s 169 | end 170 | end 171 | 172 | class Date 173 | def to_clj (options = {}) 174 | to_datetime.to_clj(options) 175 | end 176 | end 177 | 178 | class Time 179 | def to_clj (options = {}) 180 | to_datetime.to_clj(options) 181 | end 182 | end 183 | 184 | class Bignum < Integer 185 | def to_clj (*) 186 | to_s + 'N' 187 | end 188 | end 189 | 190 | class BigDecimal < Numeric 191 | def to_clj (*) 192 | to_s('F') + 'M' 193 | end 194 | end 195 | 196 | class Array 197 | def to_clj (options = {}) 198 | to_vector.to_clj(options) 199 | end 200 | 201 | def to_set 202 | Clojure::Set.new(self) 203 | end 204 | 205 | def to_vector 206 | Clojure::Vector.new(self) 207 | end 208 | 209 | def to_list 210 | Clojure::List.new(self) 211 | end 212 | end 213 | 214 | class Set 215 | def to_clj (options = {}) 216 | Clojure::Set.new(self).to_clj(options) 217 | end 218 | end 219 | 220 | class Hash 221 | def to_clj (options = {}) 222 | to_map.to_clj(options) 223 | end 224 | 225 | def to_map 226 | Clojure::Map[self] 227 | end 228 | end 229 | -------------------------------------------------------------------------------- /test/bench/bench.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | 3 | import clj 4 | import time 5 | 6 | s = "[1 2 3 true false nil {:a 21.3 :b 43.2} \"Hello\"]" 7 | 8 | t1 = time.time() 9 | for i in range(10000): 10 | clj.loads(s) 11 | 12 | print time.time()-t1 13 | -------------------------------------------------------------------------------- /test/bench/bench.rb: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env ruby 2 | require 'rubygems' 3 | require 'clj' 4 | 5 | s = "[1 2 3 true false nil {:a 21.3 :b 43.2} \"Hello\"]" 6 | 7 | t1 = Time.now() 8 | 9 | 0.upto 10000 do 10 | Clojure.parse(s) 11 | end 12 | 13 | puts Time.now()-t1 14 | -------------------------------------------------------------------------------- /test/bench/clj.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (C) 2012 Sun Ning 3 | # 4 | # Permission is hereby granted, free of charge, to any person 5 | # obtaining a copy of this software and associated documentation files 6 | # (the "Software"), to deal in the Software without restriction, 7 | # including without limitation the rights to use, copy, modify, merge, 8 | # publish, distribute, sublicense, and/or sell copies of the Software, 9 | # and to permit persons to whom the Software is furnished to do so, 10 | # subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be 13 | # included in all copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 19 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 20 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 21 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | # SOFTWARE. 23 | 24 | 25 | # clojure literals => python types 26 | # 27 | # clojure vector [1 2 3 4] => python list [1 2 3 4] *coll 28 | # clojure list (1 2 3 4) => python list [1 2 3 4] *coll 29 | # clojure set #{1 2 3 4} => python set set(1 2 3 4) *coll 30 | # clojure map {:a 1 :b 2} => python dict dict(a=1,b=2) *coll 31 | # clojure string "a" => python unicode "a" 32 | # clojure character \a => python unicode "a" 33 | # clojure keyword :a => python unicode "a" 34 | # clojure integer 123 => python integer 123 35 | # clojure float 12.3 => python float 12.3 36 | # clojure boolean true => python boolean true 37 | # clojure nil => python None 38 | # 39 | 40 | 41 | __all__ = ["dump", "dumps", "load", "loads"] 42 | 43 | import os 44 | from cStringIO import StringIO 45 | 46 | def number(v): 47 | if '.' in v: 48 | return float(v) 49 | else: 50 | return int(v) 51 | 52 | _STOP_CHARS = [" ", ",", "\n", "\r"] 53 | _COLL_OPEN_CHARS = ["#", "[", "{"] 54 | _EXTRA_NUM_CHARS = ["-", "+", ".", "e", "E"] 55 | 56 | class CljDecoder(object): 57 | def __init__(self, fd): 58 | self.fd = fd 59 | self.value_stack = [] 60 | self.terminator = None ## for collection type 61 | 62 | def decode(self): 63 | while True: 64 | v = self.__read_token() 65 | if len(self.value_stack) == 0: 66 | return v 67 | 68 | def __get_type_from_char(self, c): 69 | """return a tuple of type information 70 | * type name 71 | * a flag to indicate if it's a collection 72 | """ 73 | if c.isdigit() or c =='-': 74 | return ("number", False, None) 75 | elif c == 't' or c == 'f': ## true/false 76 | return ("boolean", False, None) 77 | elif c == 'n': ## nil 78 | return ("nil", False, None) 79 | elif c == '\\' : 80 | return ("char", False, None) 81 | elif c == ':': 82 | return ("keyword", False, None) 83 | elif c == '"': 84 | return ("string", False, None) 85 | elif c == '#': 86 | return ("set", True, "}") 87 | elif c == '{': 88 | return ("dict", True, "}") 89 | elif c == '(': 90 | return ("list", True, ")") 91 | elif c == '[': 92 | return ('list', True, "]") 93 | else: 94 | return (None, False, None) 95 | 96 | def __read_token(self): 97 | fd = self.fd 98 | 99 | c = fd.read(1) 100 | 101 | ## skip all stop chars if necessary 102 | while c in _STOP_CHARS: 103 | c = fd.read(1) 104 | 105 | ## raise exception when unexpected EOF found 106 | if c == '': 107 | raise ValueError("Unexpected EOF") 108 | 109 | t, coll, term = self.__get_type_from_char(c) 110 | if coll: 111 | ## move cursor 112 | if t == "set": 113 | ## skip { 114 | fd.read(1) 115 | 116 | self.terminator = term 117 | 118 | self.value_stack.append(([], self.terminator, t)) 119 | return None 120 | else: 121 | v = None ## token value 122 | e = None ## end char 123 | r = True ## the token contains data or not 124 | 125 | if t == "boolean": 126 | if c == 't': 127 | e = fd.read(4)[-1] 128 | v = True 129 | else: 130 | e = fd.read(5)[-1] 131 | v = False 132 | 133 | elif t == "char": 134 | buf = [] 135 | while c is not self.terminator and c is not "" and c not in _STOP_CHARS: 136 | c = fd.read(1) 137 | buf.append(c) 138 | 139 | e = c 140 | v = ''.join(buf[:-1]) 141 | 142 | elif t == "nil": 143 | e = fd.read(3)[-1] 144 | v = None 145 | 146 | elif t == "number": 147 | buf = [] 148 | while c.isdigit() or (c in _EXTRA_NUM_CHARS): 149 | buf.append(c) 150 | c = fd.read(1) 151 | e = c 152 | numstr = ''.join(buf) 153 | v = number(numstr) 154 | 155 | ## special case for 156 | ## [23[12]] 157 | ## this is a valid clojure form 158 | if e in _COLL_OPEN_CHARS: 159 | fd.seek(-1, os.SEEK_CUR) 160 | 161 | elif t == "keyword": 162 | buf = [] ##skip the leading ":" 163 | while c is not self.terminator and c is not "" and c not in _STOP_CHARS: 164 | c = fd.read(1) 165 | buf.append(c) 166 | 167 | e = c 168 | v = ''.join(buf[:-1]) 169 | 170 | elif t == "string": 171 | buf = [] 172 | cp = c = fd.read(1) ## to check escaping character \ 173 | 174 | while not(c == '"' and cp != '\\'): 175 | buf.append(c) 176 | cp = c 177 | c = fd.read(1) 178 | e = c 179 | #v = u''.join(buf).decode('unicode-escape') 180 | v = ''.join(buf).decode('string-escape') 181 | else: 182 | r = False 183 | e = c 184 | 185 | if e is self.terminator: 186 | current_scope, _, container = self.value_stack.pop() 187 | 188 | if r: 189 | current_scope.append(v) 190 | 191 | if container == "set": 192 | v = set(current_scope) 193 | elif container == "list": 194 | v = current_scope 195 | elif container == "dict": 196 | v = {} 197 | for i in range(0, len(current_scope), 2): 198 | v[current_scope[i]] = current_scope[i+1] 199 | 200 | if len(self.value_stack) > 0: 201 | self.value_stack[-1][0].append(v) 202 | self.terminator = self.value_stack[-1][1] 203 | 204 | return v 205 | 206 | 207 | class CljEncoder(object): 208 | def __init__(self, data, fd): 209 | self.data = data 210 | self.fd = fd 211 | 212 | def encode(self): 213 | self.__do_encode(self.data) 214 | 215 | def get_type(self,t): 216 | if t is None: 217 | return ("None", False) 218 | elif isinstance(t, str) or isinstance(t, unicode): 219 | return ("string", False) 220 | elif isinstance(t, bool): 221 | return ("boolean", False) 222 | elif isinstance(t, float) or isinstance(t, int): 223 | return ("number", False) 224 | elif isinstance(t, dict): 225 | return ("dict", True) 226 | elif isinstance(t, list): 227 | return ("list", True) 228 | elif isinstance(t, set): 229 | return ("set", True) 230 | else: 231 | return ("unknown", False) 232 | 233 | def __do_encode(self, d): 234 | fd = self.fd 235 | t,coll = self.get_type(d) 236 | 237 | if coll: 238 | if t == "dict": 239 | fd.write("{") 240 | for k,v in d.items(): 241 | self.__do_encode(k) 242 | fd.write(" ") 243 | self.__do_encode(v) 244 | fd.write(" ") 245 | fd.seek(-1, os.SEEK_CUR) 246 | fd.write("}") 247 | elif t == "list": 248 | fd.write("[") 249 | for v in d: 250 | self.__do_encode(v) 251 | fd.write(" ") 252 | fd.seek(-1, os.SEEK_CUR) 253 | fd.write("]") 254 | elif t == "set": 255 | fd.write("#{") 256 | for v in d: 257 | self.__do_encode(v) 258 | fd.write(" ") 259 | fd.seek(-1, os.SEEK_CUR) 260 | fd.write("}") 261 | else: 262 | if t == "number": 263 | fd.write(str(d)) 264 | elif t == "string": 265 | s = d.encode("unicode-escape").replace('"', '\\"') 266 | fd.write('"'+s+'"') 267 | elif t == "boolean": 268 | if d: 269 | fd.write('true') 270 | else: 271 | fd.write('false') 272 | elif t == 'None': 273 | fd.write('nil') 274 | else: 275 | fd.write('"'+str(d)+'"') 276 | 277 | def dump(obj, fp): 278 | return CljEncoder(obj, fp).encode() 279 | 280 | def dumps(obj): 281 | buf = StringIO() 282 | dump(obj, buf) 283 | result = buf.getvalue() 284 | buf.close() 285 | return result 286 | 287 | def load(fp): 288 | decoder = CljDecoder(fp) 289 | return decoder.decode() 290 | 291 | def loads(s): 292 | buf = StringIO(s) 293 | result = load(buf) 294 | buf.close() 295 | return result 296 | 297 | -------------------------------------------------------------------------------- /test/clj_spec.rb: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env ruby 2 | require 'rubygems' 3 | require 'clj' 4 | require 'bigdecimal' 5 | 6 | describe Clojure do 7 | describe '#dump' do 8 | it 'dumps correctly true' do 9 | Clojure.dump(true).should == 'true' 10 | end 11 | 12 | it 'dumps correctly false' do 13 | Clojure.dump(false).should == 'false' 14 | end 15 | 16 | it 'dumps correctly nil' do 17 | Clojure.dump(nil).should == 'nil' 18 | end 19 | 20 | it 'dumps correctly strings' do 21 | Clojure.dump("lol").should == '"lol"' 22 | Clojure.dump("lol\nlol").should == '"lol\nlol"' 23 | Clojure.dump("\\e\e").should == '"\\\\e\u001b"' 24 | Clojure.dump("\\a\a").should == '"\\\\a\u0003"' 25 | end 26 | 27 | it 'dumps correctly symbols' do 28 | Clojure.dump(:wat.symbol!).should == 'wat' 29 | end 30 | 31 | it 'dumps correctly keywords' do 32 | Clojure.dump(:wat).should == ':wat' 33 | 34 | expect { 35 | Clojure.dump(:"lol wat") 36 | }.should raise_error 37 | end 38 | 39 | it 'dumps correctly integers' do 40 | Clojure.dump(2).should == '2' 41 | Clojure.dump(1337).should == '1337' 42 | end 43 | 44 | it 'dumps correctly floats' do 45 | Clojure.dump(2.3).should == '2.3' 46 | end 47 | 48 | it 'dumps correctly rationals' do 49 | unless RUBY_VERSION.include? '1.8' 50 | Clojure.dump(Rational('2/3')).should == '2/3' 51 | end 52 | end 53 | 54 | it 'dumps correctly bignums' do 55 | Clojure.dump(324555555555555555555555555555555555555555555555324445555555555555).should == '324555555555555555555555555555555555555555555555324445555555555555N' 56 | end 57 | 58 | it 'dumps correctly bigdecimals' do 59 | Clojure.dump(BigDecimal('0.2345636456')).should == '0.2345636456M' 60 | end 61 | 62 | it 'dumps correctly regexps' do 63 | Clojure.dump(/(\d+)/).should == '#"(\d+)"' 64 | end 65 | 66 | it 'dumps correctly dates' do 67 | unless RUBY_VERSION.include? '1.8' 68 | Clojure.dump(DateTime.rfc3339("2012-02-03T15:20:59+01:00")).should == '1328278859' 69 | Clojure.dump(DateTime.rfc3339("2012-02-03T15:20:59+01:00"), :alpha => true).should == '#inst "2012-02-03T15:20:59+01:00"' 70 | end 71 | end 72 | 73 | it 'dumps correctly arrays' do 74 | Clojure.dump([]).should == '[]' 75 | Clojure.dump([[]]).should == '[[]]' 76 | Clojure.dump([[], [], []]).should == '[[] [] []]' 77 | 78 | Clojure.dump([1, 2, 3]).should == '[1 2 3]' 79 | Clojure.dump([1, 2, 3].to_list).should == '(1 2 3)' 80 | end 81 | 82 | it 'dumps correctly hashes' do 83 | Clojure.dump({ :a => 'b' }).should == '{:a "b"}' 84 | end 85 | 86 | it 'dumps correctly metadata' do 87 | Clojure.dump([1, 2, 3].to_vector.tap { |x| x.metadata = :lol }).should == '^:lol [1 2 3]' 88 | end 89 | 90 | it 'dumps correctly sets' do 91 | Clojure.dump(Set.new([1, 2, 3])).should == '#{1 2 3}' 92 | end 93 | end 94 | 95 | describe '#parse' do 96 | it 'parses correctly true' do 97 | Clojure.parse('true').should == true 98 | 99 | Clojure.parse('truf').should == :truf.symbol! 100 | end 101 | 102 | it 'parses correctly false' do 103 | Clojure.parse('false').should == false 104 | 105 | Clojure.parse('falfe').should == :falfe.symbol! 106 | end 107 | 108 | it 'parses correctly nil' do 109 | Clojure.parse('nil').should == nil 110 | 111 | Clojure.parse('nol').should == :nol.symbol! 112 | end 113 | 114 | it 'parses correctly chars' do 115 | Clojure.parse('\d').should == 'd' 116 | Clojure.parse('\a').should == 'a' 117 | Clojure.parse('\0').should == '0' 118 | 119 | Clojure.parse('\newline').should == "\n" 120 | Clojure.parse('\space').should == ' ' 121 | Clojure.parse('\tab').should == "\t" 122 | Clojure.parse('\backspace').should == "\b" 123 | Clojure.parse('\formfeed').should == "\f" 124 | Clojure.parse('\return').should == "\r" 125 | 126 | Clojure.parse('\o54').should == ',' 127 | Clojure.parse('[\o3 "lol"]').should == ["\x03", "lol"] 128 | 129 | unless RUBY_VERSION.include? '1.8' 130 | Clojure.parse('\u4343').should == "\u4343" 131 | end 132 | end 133 | 134 | it 'parses correctly strings' do 135 | Clojure.parse('"lol"').should == "lol" 136 | Clojure.parse('"lol\nlol"').should == "lol\nlol" 137 | 138 | unless RUBY_VERSION.include? '1.8' 139 | Clojure.parse('"\u4343"').should == "\u4343" 140 | end 141 | end 142 | 143 | it 'parses correctly symbols' do 144 | Clojure.parse('ni').should == :ni.symbol! 145 | end 146 | 147 | it 'parses correctly keywords' do 148 | Clojure.parse(':wat').should == :wat 149 | end 150 | 151 | it 'parses correctly numbers' do 152 | Clojure.parse('2').should == 2 153 | Clojure.parse('1337').should == 1337 154 | 155 | Clojure.parse('16rFF').should == 255 156 | Clojure.parse('2r11').should == 3 157 | 158 | Clojure.parse('2.3').should == 2.3 159 | Clojure.parse('2e3').should == 2000 160 | end 161 | 162 | it 'parses correctly rationals' do 163 | unless RUBY_VERSION.include? '1.8' 164 | Clojure.parse('2/3').should == Rational('2/3') 165 | end 166 | end 167 | 168 | it 'parses correctly bignums' do 169 | Clojure.parse('324555555555555555555555555555555555555555555555324445555555555555N').should == 324555555555555555555555555555555555555555555555324445555555555555 170 | end 171 | 172 | it 'parses correctly bigdecimals' do 173 | Clojure.parse('0.2345636456M').should == BigDecimal('0.2345636456') 174 | end 175 | 176 | it 'parses correctly regexps' do 177 | Clojure.parse('#"(\d+)"').should == /(\d+)/ 178 | end 179 | 180 | it 'parses correctly dates' do 181 | unless RUBY_VERSION.include? '1.8' 182 | Clojure.parse('#inst "2012-02-03T15:20:59+01:00"').should == DateTime.rfc3339("2012-02-03T15:20:59+01:00") 183 | end 184 | end 185 | 186 | it 'parses correctly vectors' do 187 | Clojure.parse('[]').should == [] 188 | Clojure.parse('[[]]').should == [[]] 189 | Clojure.parse('[[] [] []]').should == [[], [], []] 190 | 191 | Clojure.parse('[1 2 3]').should == [1, 2, 3] 192 | Clojure.parse('[23[]]').should == [23, []] 193 | end 194 | 195 | it 'parses correctly lists' do 196 | Clojure.parse('()').should == [] 197 | Clojure.parse('(())').should == [[]] 198 | Clojure.parse('(() () ())').should == [[], [], []] 199 | 200 | Clojure.parse('(1 2 3)').should == [1, 2, 3] 201 | Clojure.parse('(23())').should == [23, []] 202 | end 203 | 204 | it 'parses correctly sets' do 205 | Clojure.parse('#{1 2 3}').should == [1, 2, 3].to_set 206 | 207 | expect { Clojure.parse('#{1 1}') }.should raise_error 208 | end 209 | 210 | it 'parses correctly maps' do 211 | Clojure.parse('{:a "b"}').should == { :a => 'b' } 212 | end 213 | 214 | it 'parses correctly metadata' do 215 | Clojure.parse('^:lol [1 2 3]').tap { |data| 216 | data.should == [1, 2, 3] 217 | data.metadata.should == { :lol => true } 218 | } 219 | end 220 | end 221 | end 222 | --------------------------------------------------------------------------------