├── .gitignore
├── README.md
├── Rakefile
├── clj.gemspec
├── ext
    └── clj
    │   ├── extconf.rb
    │   └── parser.c
├── lib
    ├── clj.rb
    └── clj
    │   ├── parser.rb
    │   └── types.rb
└── test
    ├── bench
        ├── bench.py
        ├── bench.rb
        └── clj.py
    └── clj_spec.rb


/.gitignore:
--------------------------------------------------------------------------------
1 | Makefile
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | Like json, but with clojure sexps
2 | =================================
3 | 
4 | It gives `#to_clj` methods to various standard objects and has `Clojure.parse` and `Clojure.dump`,
5 | check out `test/clj_spec.rb` to see what it can do.
6 | 


--------------------------------------------------------------------------------
/Rakefile:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env ruby
 2 | require 'rake'
 3 | 
 4 | task :default => :test
 5 | 
 6 | task :test do
 7 |   Dir.chdir 'test'
 8 | 
 9 |   sh 'rspec clj_spec.rb --color --format doc'
10 | end
11 | 
12 | task :bench do
13 | 	puts "Ruby: #{`test/bench/bench.rb`.strip}"
14 | 	puts "Python: #{`test/bench/bench.py`.strip}"
15 | end
16 | 


--------------------------------------------------------------------------------
/clj.gemspec:
--------------------------------------------------------------------------------
 1 | Gem::Specification.new {|s|
 2 | 	s.name         = 'clj'
 3 | 	s.version      = '0.0.8.2'
 4 | 	s.author       = 'meh.'
 5 | 	s.email        = 'meh@paranoici.org'
 6 | 	s.homepage     = 'http://github.com/meh/ruby-clj'
 7 | 	s.platform     = Gem::Platform::RUBY
 8 | 	s.summary      = 'Like json, but with clojure sexps.'
 9 | 	s.files        = Dir['ext/**/*.{c,h,rb}'] + Dir['lib/**/*.rb']
10 | 	s.extensions   = 'ext/clj/extconf.rb'
11 | 
12 | 	s.add_development_dependency 'rake'
13 | 	s.add_development_dependency 'rspec'
14 | }
15 | 


--------------------------------------------------------------------------------
/ext/clj/extconf.rb:
--------------------------------------------------------------------------------
 1 | require 'mkmf'
 2 | 
 3 | unless $CFLAGS.gsub!(/ -O[\dsz]?/, ' -O3')
 4 | 	$CFLAGS << ' -O3'
 5 | end
 6 | 
 7 | if CONFIG['CC'] =~ /gcc/
 8 | 	$CFLAGS << ' -Wall'
 9 | 
10 | 	if $DEBUG && !$CFLAGS.gsub!(/ -O[\dsz]?/, ' -O0 -ggdb')
11 | 		$CFLAGS << ' -O0 -ggdb'
12 | 	end
13 | end
14 | 
15 | create_makefile 'clj/parser_ext'
16 | 


--------------------------------------------------------------------------------
/ext/clj/parser.c:
--------------------------------------------------------------------------------
  1 | /**
  2 |  *            DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
  3 |  *                    Version 2, December 2004
  4 |  *
  5 |  *            DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
  6 |  *   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
  7 |  *
  8 |  *  0. You just DO WHAT THE FUCK YOU WANT TO.
  9 |  **/
 10 | 
 11 | #include <stdbool.h>
 12 | #include <ctype.h>
 13 | 
 14 | #include "ruby.h"
 15 | 
 16 | static VALUE cClojure;
 17 | static VALUE cParser;
 18 | 
 19 | static VALUE UNICODE_REGEX;
 20 | static VALUE OCTAL_REGEX;
 21 | 
 22 | typedef enum {
 23 | 	NODE_METADATA,
 24 | 	NODE_NUMBER,
 25 | 	NODE_BOOLEAN,
 26 | 	NODE_NIL,
 27 | 	NODE_CHAR,
 28 | 	NODE_KEYWORD,
 29 | 	NODE_STRING,
 30 | 	NODE_MAP,
 31 | 	NODE_LIST,
 32 | 	NODE_VECTOR,
 33 | 	NODE_INSTANT,
 34 | 	NODE_SET,
 35 | 	NODE_REGEXP,
 36 | 	NODE_SYMBOL
 37 | } NodeType;
 38 | 
 39 | #define CALL(what) (what(self, string, position))
 40 | #define STATE VALUE self, char* string, size_t* position
 41 | #define IS_EOF (string[*position] == '\0')
 42 | #define IS_EOF_AFTER(n) (string[*position + (n)] == '\0')
 43 | #define CURRENT (string[*position])
 44 | #define CURRENT_PTR (&string[*position])
 45 | #define AFTER(n) (string[*position + (n)])
 46 | #define AFTER_PTR(n) (&string[*position + (n)])
 47 | #define BEFORE(n) (string[*position - (n)])
 48 | #define BEFORE_PTR(n) (&string[*position - (n)])
 49 | #define SEEK(n) (*position += (n))
 50 | #define IS_NOT_EOF_UP_TO(n) (is_not_eof_up_to(string, position, n))
 51 | #define IS_EQUAL_UP_TO(str, n) (strncmp(CURRENT_PTR, str, (n)) == 0)
 52 | #define IS_EQUAL(str) IS_EQUAL_UP_TO(str, strlen(str))
 53 | #define IS_IGNORED(ch) (isspace(ch) || ch == ',')
 54 | #define IS_SYMBOL(ch) (isdigit(ch) || isalpha(ch) || ch == '+' || ch == '!' || ch == '-' || ch == '_' || ch == '?' || ch == '.' || ch == ':' || ch == '/')
 55 | #define IS_BOTH_SEPARATOR(ch) (ch == '\0' || ch == ' ' || ch == ',' || ch == '"' || ch == '{' || ch == '}' || ch == '(' || ch == ')' || ch == '[' || ch == ']' || ch == '#' || ch == ':' || ch == '\n' || ch == '\r' || ch == '\t')
 56 | #define IS_KEYWORD_SEPARATOR(ch) (ch == '\0' || ch == ' ' || ch == ',' || ch == '"' || ch == '{' || ch == '}' || ch == '(' || ch == ')' || ch == '[' || ch == ']' || ch == '#' || ch == ':' || ch == '\'' || ch == '^' || ch == '@' || ch == '`' || ch == '~' || ch == '\\' || ch == ';' || ch == '\n' || ch == '\r' || ch == '\t')
 57 | 
 58 | static VALUE read_next (STATE);
 59 | 
 60 | static inline bool is_not_eof_up_to (char* string, size_t* position, size_t n)
 61 | {
 62 | 	size_t i;
 63 | 
 64 | 	for (i = 0; i < n; i++) {
 65 | 		if (IS_EOF_AFTER(i)) {
 66 | 			return false;
 67 | 		}
 68 | 	}
 69 | 
 70 | 	return true;
 71 | }
 72 | 
 73 | static void ignore (STATE)
 74 | {
 75 | 	while (!IS_EOF && IS_IGNORED(CURRENT)) {
 76 | 		SEEK(1);
 77 | 	}
 78 | }
 79 | 
 80 | static NodeType next_type (STATE)
 81 | {
 82 | 	if (isdigit(CURRENT) || CURRENT == '-' || CURRENT == '+') {
 83 | 		return NODE_NUMBER;
 84 | 	}
 85 | 
 86 | 	switch (CURRENT) {
 87 | 		case '^':           return NODE_METADATA;
 88 | 		case 't': case 'f': return NODE_BOOLEAN;
 89 | 		case 'n':           return NODE_NIL;
 90 | 		case '\\':          return NODE_CHAR;
 91 | 		case ':':           return NODE_KEYWORD;
 92 | 		case '"':           return NODE_STRING;
 93 | 		case '{':           return NODE_MAP;
 94 | 		case '(':           return NODE_LIST;
 95 | 		case '[':           return NODE_VECTOR;
 96 | 	}
 97 | 
 98 | 	if (CURRENT == '#') {
 99 | 		if (IS_EOF_AFTER(1)) {
100 | 			rb_raise(rb_eSyntaxError, "unexpected EOF");
101 | 		}
102 | 
103 | 		switch (AFTER(1)) {
104 | 			case 'i': return NODE_INSTANT;
105 | 			case '{': return NODE_SET;
106 | 			case '"': return NODE_REGEXP;
107 | 		}
108 | 	}
109 | 
110 | 	return NODE_SYMBOL;
111 | }
112 | 
113 | static VALUE read_metadata (STATE)
114 | {
115 | 	VALUE  result;
116 | 	VALUE  metadatas = rb_ary_new();
117 | 	size_t i;
118 | 
119 | 	while (CURRENT == '^') {
120 | 		SEEK(1);
121 | 
122 | 		rb_ary_push(metadatas, CALL(read_next));
123 | 	}
124 | 
125 | 	result = CALL(read_next);
126 | 
127 | 	if (!rb_respond_to(result, rb_intern("metadata="))) {
128 | 		rb_raise(rb_eSyntaxError, "the object cannot hold metadata");
129 | 	}
130 | 
131 | 	for (i = 0; i < RARRAY_LEN(metadatas); i++) {
132 | 		rb_funcall(result, rb_intern("metadata="), 1, RARRAY_PTR(metadatas)[i]);
133 | 	}
134 | 
135 | 	return result;
136 | }
137 | 
138 | static VALUE read_symbol (STATE)
139 | {
140 | 	size_t length = 0;
141 | 
142 | 	while (IS_SYMBOL(AFTER(length))) {
143 | 		length++;
144 | 	}
145 | 
146 | 	SEEK(length);
147 | 
148 | 	return rb_funcall(rb_funcall(rb_str_new(BEFORE_PTR(length), length), rb_intern("to_sym"), 0),
149 | 		rb_intern("symbol!"), 0);
150 | }
151 | 
152 | static VALUE read_nil (STATE)
153 | {
154 | 	if (!IS_NOT_EOF_UP_TO(3) || !IS_EQUAL_UP_TO("nil", 3) || !IS_BOTH_SEPARATOR(AFTER(3))) {
155 | 		return CALL(read_symbol);
156 | 	}
157 | 
158 | 	SEEK(3);
159 | 
160 | 	return Qnil;
161 | }
162 | 
163 | static VALUE read_boolean (STATE)
164 | {
165 | 	if (CURRENT == 't') {
166 | 		if (!IS_NOT_EOF_UP_TO(4) || !IS_EQUAL_UP_TO("true", 4) || !IS_BOTH_SEPARATOR(AFTER(4))) {
167 | 			return CALL(read_symbol);
168 | 		}
169 | 		
170 | 		SEEK(4);
171 | 
172 | 		return Qtrue;
173 | 	}
174 | 	else {
175 | 		if (!IS_NOT_EOF_UP_TO(5) || !IS_EQUAL_UP_TO("false", 5) || !IS_BOTH_SEPARATOR(AFTER(5))) {
176 | 			return CALL(read_symbol);
177 | 		}
178 | 
179 | 		SEEK(5);
180 | 
181 | 		return Qfalse;
182 | 	}
183 | }
184 | 
185 | static VALUE read_number (STATE)
186 | {
187 | 	size_t length = 0;
188 | 	VALUE  rbPiece;
189 | 	char*  cPiece;
190 | 	char*  tmp;
191 | 
192 | 	while (!IS_EOF_AFTER(length) && !IS_BOTH_SEPARATOR(AFTER(length))) {
193 | 		length++;
194 | 	}
195 | 
196 | 	SEEK(length);
197 | 
198 | 	rbPiece = rb_str_new(BEFORE_PTR(length), length);
199 | 	cPiece  = StringValueCStr(rbPiece);
200 | 
201 | 	if (strchr(cPiece, '/')) {
202 | 		return rb_funcall(rb_cObject, rb_intern("Rational"), 1, rbPiece);
203 | 	}
204 | 	else if ((tmp = strchr(cPiece, 'r')) || (tmp = strchr(cPiece, 'R'))) {
205 | 		return rb_funcall(rb_str_new2(tmp + 1), rb_intern("to_i"), 1,
206 | 			rb_funcall(rb_str_new(cPiece, tmp - cPiece), rb_intern("to_i"), 0));
207 | 	}
208 | 	else if (strchr(cPiece, '.') || strchr(cPiece, 'e') || strchr(cPiece, 'E') || cPiece[length - 1] == 'M') {
209 | 		if (cPiece[length - 1] == 'M') {
210 | 			return rb_funcall(rb_cObject, rb_intern("BigDecimal"), 1, rbPiece);
211 | 		}
212 | 		else {
213 | 			return rb_funcall(rb_cObject, rb_intern("Float"), 1, rbPiece);
214 | 		}
215 | 	}
216 | 	else {
217 | 		if (cPiece[length - 1] == 'N') {
218 | 			rb_str_set_len(rbPiece, length - 1);
219 | 		}
220 | 
221 | 		return rb_funcall(rb_cObject, rb_intern("Integer"), 1, rbPiece);
222 | 	}
223 | }
224 | 
225 | static VALUE read_char (STATE)
226 | {
227 | 	SEEK(1);
228 | 
229 | 	if (IS_EOF_AFTER(1) || IS_BOTH_SEPARATOR(AFTER(1))) {
230 | 		SEEK(1); return rb_str_new(BEFORE_PTR(1), 1);
231 | 	}
232 | 	else if (IS_NOT_EOF_UP_TO(7) && IS_EQUAL_UP_TO("newline", 7) && IS_BOTH_SEPARATOR(AFTER(7))) {
233 | 		SEEK(7); return rb_str_new2("\n");
234 | 	}
235 | 	else if (IS_NOT_EOF_UP_TO(5) && IS_EQUAL_UP_TO("space", 5) && IS_BOTH_SEPARATOR(AFTER(5))) {
236 | 		SEEK(5); return rb_str_new2(" ");
237 | 	}
238 | 	else if (IS_NOT_EOF_UP_TO(3) && IS_EQUAL_UP_TO("tab", 3) && IS_BOTH_SEPARATOR(AFTER(3))) {
239 | 		SEEK(3); return rb_str_new2("\t");
240 | 	}
241 | 	else if (IS_NOT_EOF_UP_TO(9) && IS_EQUAL_UP_TO("backspace", 9) && IS_BOTH_SEPARATOR(AFTER(9))) {
242 | 		SEEK(9); return rb_str_new2("\b");
243 | 	}
244 | 	else if (IS_NOT_EOF_UP_TO(8) && IS_EQUAL_UP_TO("formfeed", 8) && IS_BOTH_SEPARATOR(AFTER(8))) {
245 | 		SEEK(8); return rb_str_new2("\f");
246 | 	}
247 | 	else if (IS_NOT_EOF_UP_TO(6) && IS_EQUAL_UP_TO("return", 6) && IS_BOTH_SEPARATOR(AFTER(6))) {
248 | 		SEEK(6); return rb_str_new2("\r");
249 | 	}
250 | 	else if (CURRENT == 'u' && IS_NOT_EOF_UP_TO(5) && !NIL_P(rb_funcall(rb_str_new(AFTER_PTR(1), 4), rb_intern("=~"), 1, UNICODE_REGEX)) && IS_BOTH_SEPARATOR(AFTER(5))) {
251 | 		SEEK(5); return rb_funcall(rb_ary_new3(1, rb_funcall(rb_str_new(BEFORE_PTR(4), 4), rb_intern("to_i"), 1, INT2FIX(16))),
252 | 			rb_intern("pack"), 1, rb_str_new2("U"));
253 | 	}
254 | 	else if (CURRENT == 'o') {
255 | 		size_t length = 1;
256 | 		size_t i;
257 | 
258 | 		for (i = 1; i < 5; i++) {
259 | 			if (IS_BOTH_SEPARATOR(AFTER(i))) {
260 | 				break;
261 | 			}
262 | 
263 | 			length++;
264 | 		}
265 | 
266 | 		if (length > 1 && !NIL_P(rb_funcall(rb_str_new(AFTER_PTR(1), length - 1), rb_intern("=~"), 1, OCTAL_REGEX)) && IS_BOTH_SEPARATOR(AFTER(length))) {
267 | 			SEEK(length); return rb_funcall(rb_funcall(rb_str_new(BEFORE_PTR(length - 1), length - 1), rb_intern("to_i"), 1, INT2FIX(8)),
268 | 				rb_intern("chr"), 0);
269 | 		}
270 | 	}
271 | 
272 | 	rb_raise(rb_eSyntaxError, "unknown character type");
273 | }
274 | 
275 | static VALUE read_keyword (STATE)
276 | {
277 | 	size_t length = 0;
278 | 
279 | 	SEEK(1);
280 | 
281 | 	while (!IS_KEYWORD_SEPARATOR(AFTER(length))) {
282 | 		length++;
283 | 	}
284 | 
285 | 	SEEK(length);
286 | 
287 | 	return rb_funcall(rb_funcall(rb_str_new(BEFORE_PTR(length), length), rb_intern("to_sym"), 0),
288 | 		rb_intern("keyword!"), 0);
289 | }
290 | 
291 | static VALUE read_string (STATE)
292 | {
293 | 	size_t length = 0;
294 | 
295 | 	SEEK(1);
296 | 
297 | 	while (AFTER(length) != '"') {
298 | 		if (IS_EOF_AFTER(length)) {
299 | 			rb_raise(rb_eSyntaxError, "unexpected EOF");
300 | 		}
301 | 
302 | 		if (AFTER(length) == '\\') {
303 | 			length++;
304 | 		}
305 | 
306 | 		length++;
307 | 	}
308 | 
309 | 	SEEK(length + 1);
310 | 
311 | 	return rb_funcall(cClojure, rb_intern("unescape"), 1, rb_str_new(BEFORE_PTR(length + 1), length));
312 | }
313 | 
314 | static VALUE read_regexp (STATE)
315 | {
316 | 	size_t length = 0;
317 | 	VALUE  args[] = { Qnil };
318 | 
319 | 	SEEK(2);
320 | 
321 | 	while (AFTER(length) != '"') {
322 | 		if (IS_EOF_AFTER(length)) {
323 | 			rb_raise(rb_eSyntaxError, "unexpected EOF");
324 | 		}
325 | 
326 | 		if (AFTER(length) == '\\') {
327 | 			length++;
328 | 		}
329 | 
330 | 		length++;
331 | 	}
332 | 
333 | 	SEEK(length + 1);
334 | 
335 | 	args[0] = rb_str_new(BEFORE_PTR(length + 1), length);
336 | 
337 | 	return rb_class_new_instance(1, args, rb_cRegexp);
338 | }
339 | 
340 | static VALUE read_instant (STATE)
341 | {
342 | 	SEEK(1);
343 | 
344 | 	if (!IS_NOT_EOF_UP_TO(4)) {
345 | 		rb_raise(rb_eSyntaxError, "unexpected EOF");
346 | 	}
347 | 
348 | 	if (!IS_EQUAL_UP_TO("inst", 4)) {
349 | 		rb_raise(rb_eSyntaxError, "expected inst, got %c%c%c%c", AFTER(0), AFTER(1), AFTER(2), AFTER(3));
350 | 	}
351 | 
352 | 	SEEK(4);
353 | 
354 | 	CALL(ignore);
355 | 
356 | 	return rb_funcall(rb_const_get(rb_cObject, rb_intern("DateTime")), rb_intern("rfc3339"), 1, CALL(read_string));
357 | }
358 | 
359 | static VALUE read_list (STATE)
360 | {
361 | 	VALUE result = rb_class_new_instance(0, NULL, rb_iv_get(self, "@list_class"));
362 | 
363 | 	SEEK(1); CALL(ignore);
364 | 
365 | 	while (CURRENT != ')') {
366 | 		rb_funcall(result, rb_intern("<<"), 1, CALL(read_next));
367 | 
368 | 		CALL(ignore);
369 | 	}
370 | 
371 | 	SEEK(1);
372 | 
373 | 	return result;
374 | }
375 | 
376 | static VALUE read_vector (STATE)
377 | {
378 | 	VALUE result = rb_class_new_instance(0, NULL, rb_iv_get(self, "@vector_class"));
379 | 
380 | 	SEEK(1); CALL(ignore);
381 | 
382 | 	while (CURRENT != ']') {
383 | 		rb_funcall(result, rb_intern("<<"), 1, CALL(read_next));
384 | 
385 | 		CALL(ignore);
386 | 	}
387 | 
388 | 	SEEK(1);
389 | 
390 | 	return result;
391 | }
392 | 
393 | static VALUE read_set (STATE)
394 | {
395 | 	VALUE result = rb_class_new_instance(0, NULL, rb_iv_get(self, "@set_class"));
396 | 
397 | 	SEEK(2); CALL(ignore);
398 | 
399 | 	while (CURRENT != '}') {
400 | 		if (NIL_P(rb_funcall(result, rb_intern("add?"), 1, CALL(read_next)))) {
401 | 			rb_raise(rb_eSyntaxError, "the set contains non unique values");
402 | 		}
403 | 
404 | 		CALL(ignore);
405 | 	}
406 | 
407 | 	SEEK(1);
408 | 
409 | 	return result;
410 | }
411 | 
412 | static VALUE read_map (STATE)
413 | {
414 | 	VALUE result = rb_class_new_instance(0, NULL, rb_iv_get(self, "@map_class"));
415 | 	VALUE key;
416 | 	VALUE value;
417 | 
418 | 	SEEK(1); CALL(ignore);
419 | 
420 | 	while (CURRENT != '}') {
421 | 		key = CALL(read_next);
422 | 		CALL(ignore);
423 | 		value = CALL(read_next);
424 | 		CALL(ignore);
425 | 
426 | 		rb_funcall(result, rb_intern("[]="), 2, key, value);
427 | 	}
428 | 
429 | 	SEEK(1);
430 | 
431 | 	return result;
432 | }
433 | 
434 | static VALUE read_next (STATE)
435 | {
436 | 	CALL(ignore);
437 | 
438 | 	if (IS_EOF) {
439 | 		rb_raise(rb_eSyntaxError, "unexpected EOF");
440 | 	}
441 | 
442 | 	switch (CALL(next_type)) {
443 | 		case NODE_METADATA: return CALL(read_metadata);
444 | 		case NODE_NUMBER:   return CALL(read_number);
445 | 		case NODE_BOOLEAN:  return CALL(read_boolean);
446 | 		case NODE_NIL:      return CALL(read_nil);
447 | 		case NODE_CHAR:     return CALL(read_char);
448 | 		case NODE_KEYWORD:  return CALL(read_keyword);
449 | 		case NODE_STRING:   return CALL(read_string);
450 | 		case NODE_MAP:      return CALL(read_map);
451 | 		case NODE_LIST:     return CALL(read_list);
452 | 		case NODE_VECTOR:   return CALL(read_vector);
453 | 		case NODE_INSTANT:  return CALL(read_instant);
454 | 		case NODE_SET:      return CALL(read_set);
455 | 		case NODE_REGEXP:   return CALL(read_regexp);
456 | 		case NODE_SYMBOL:   return CALL(read_symbol);
457 | 	}
458 | }
459 | 
460 | static VALUE t_init (int argc, VALUE* argv, VALUE self)
461 | {
462 | 	VALUE tmp;
463 | 	VALUE source;
464 | 	VALUE options;
465 | 
466 | 	if (argc < 1) {
467 | 		rb_raise(rb_eArgError, "wrong number of arguments (0 for 1)");
468 | 	}
469 | 	else if (argc > 2) {
470 | 		rb_raise(rb_eArgError, "wrong number of arguments (%d for 2)", argc);
471 | 	}
472 | 
473 | 	if (!rb_obj_is_kind_of(argv[0], rb_cString) && !rb_obj_is_kind_of(argv[0], rb_cIO)) {
474 | 		rb_raise(rb_eArgError, "you have to pass a String or an IO");
475 | 	}
476 | 
477 | 	source  = argv[0];
478 | 	options = argc == 2 ? argv[1] : rb_hash_new();
479 | 
480 | 	rb_iv_set(self, "@source", source);
481 | 	rb_iv_set(self, "@options", options);
482 | 
483 | 	if (!NIL_P(tmp = rb_hash_aref(options, rb_intern("map_class")))) {
484 | 		rb_iv_set(self, "@map_class", tmp);
485 | 	}
486 | 	else {
487 | 		rb_iv_set(self, "@map_class", rb_const_get(cClojure, rb_intern("Map")));
488 | 	}
489 | 
490 | 	if (!NIL_P(tmp = rb_hash_aref(options, rb_intern("vector_class")))) {
491 | 		rb_iv_set(self, "@vector_class", tmp);
492 | 	}
493 | 	else {
494 | 		rb_iv_set(self, "@vector_class", rb_const_get(cClojure, rb_intern("Vector")));
495 | 	}
496 | 
497 | 	if (!NIL_P(tmp = rb_hash_aref(options, rb_intern("list_class")))) {
498 | 		rb_iv_set(self, "@list_class", tmp);
499 | 	}
500 | 	else {
501 | 		rb_iv_set(self, "@list_class", rb_const_get(cClojure, rb_intern("List")));
502 | 	}
503 | 
504 | 	if (!NIL_P(tmp = rb_hash_aref(options, rb_intern("set_class")))) {
505 | 		rb_iv_set(self, "@set_class", tmp);
506 | 	}
507 | 	else {
508 | 		rb_iv_set(self, "@set_class", rb_const_get(cClojure, rb_intern("Set")));
509 | 	}
510 | 
511 | 	return self;
512 | }
513 | 
514 | static VALUE t_parse (VALUE self)
515 | {
516 | 	char*  string;
517 | 	size_t position = 0;
518 | 	VALUE  source = rb_iv_get(self, "@source");
519 | 	VALUE  result;
520 | 
521 | 	if (!rb_obj_is_kind_of(source, rb_cString)) {
522 | 		if (rb_obj_is_kind_of(source, rb_cIO)) {
523 | 			source = rb_funcall(source, rb_intern("read"), 0);
524 | 		}
525 | 		else {
526 | 			source = rb_funcall(source, rb_intern("to_str"), 0);
527 | 		}
528 | 	}
529 | 
530 | 	string = StringValueCStr(source);
531 | 	result = read_next(self, string, &position);
532 | 
533 | 	ignore(self, string, &position);
534 | 
535 | 	if (string[position] != '\0') {
536 | 		rb_raise(rb_eSyntaxError, "there is some unconsumed input");
537 | 	}
538 | 
539 | 	return result;
540 | }
541 | 
542 | void
543 | Init_parser_ext (void)
544 | {
545 | 	cClojure = rb_const_get(rb_cObject, rb_intern("Clojure"));
546 | 	cParser  = rb_define_class_under(cClojure, "Parser", rb_cObject);
547 | 
548 | 	rb_define_method(cParser, "initialize", t_init, -1);
549 | 	rb_define_method(cParser, "parse", t_parse, 0);
550 | 
551 | 	VALUE args[] = { Qnil };
552 | 
553 | 	args[0]       = rb_str_new2("[0-9|a-f|A-F]{4}");
554 | 	UNICODE_REGEX = rb_class_new_instance(1, args, rb_cRegexp);
555 | 	rb_define_const(cClojure, "UNICODE_REGEX", UNICODE_REGEX);
556 | 
557 | 	args[0]     = rb_str_new2("[0-3]?[0-7]?[0-7]");
558 | 	OCTAL_REGEX = rb_class_new_instance(1, args, rb_cRegexp);
559 | 	rb_define_const(cClojure, "OCTAL_REGEX", OCTAL_REGEX);
560 | }
561 | 


--------------------------------------------------------------------------------
/lib/clj.rb:
--------------------------------------------------------------------------------
 1 | #--
 2 | #            DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
 3 | #                    Version 2, December 2004
 4 | #
 5 | #            DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
 6 | #   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
 7 | #
 8 | #  0. You just DO WHAT THE FUCK YOU WANT TO.
 9 | #++
10 | 
11 | module Clojure
12 | 	def self.parse (*args)
13 | 		Clojure::Parser.new(*args).parse
14 | 	end
15 | 
16 | 	def self.dump (what, options = {})
17 | 		raise ArgumentError, 'cannot convert the passed value to clojure' unless what.respond_to? :to_clj
18 | 
19 | 		what.to_clj(options)
20 | 	end
21 | 
22 | 	UNESCAPE_REGEX = %r((?:\\[\\bfnrt"/]|(?:\\u(?:[A-Fa-f\d]{4}))+|\\[\x20-\xff]))n
23 | 
24 | 	# Unescape characters in strings.
25 | 	UNESCAPE_MAP = Hash.new { |h, k| h[k] = k.chr }
26 | 	UNESCAPE_MAP.merge!(
27 | 		?"  => '"',
28 | 		?\\ => '\\',
29 | 		?/  => '/',
30 | 		?b  => "\b",
31 | 		?f  => "\f",
32 | 		?n  => "\n",
33 | 		?r  => "\r",
34 | 		?t  => "\t",
35 | 		?u  => nil
36 | 	)
37 | 
38 | 	EMPTY_8BIT_STRING = ''
39 | 
40 | 	if EMPTY_8BIT_STRING.respond_to? :force_encoding
41 | 		EMPTY_8BIT_STRING.force_encoding Encoding::ASCII_8BIT
42 | 	end
43 | 
44 | 	def self.unescape (string)
45 | 		string.gsub(UNESCAPE_REGEX) {|escape|
46 | 			if u = UNESCAPE_MAP[$&[1]]
47 | 				next u
48 | 			end
49 | 
50 | 			bytes = EMPTY_8BIT_STRING.dup
51 | 
52 | 			i = 0
53 | 			while escape[6 * i] == ?\\ && escape[6 * i + 1] == ?u
54 | 				bytes << escape[6 * i + 2, 2].to_i(16) << escape[6 * i + 4, 2].to_i(16)
55 | 
56 | 				i += 1
57 | 			end
58 | 
59 | 			if bytes.respond_to? :force_encoding
60 | 				bytes.force_encoding 'UTF-16be'
61 | 				bytes.encode 'UTF-8'
62 | 			else
63 | 				bytes
64 | 			end
65 | 		}
66 | 	end
67 | end
68 | 
69 | require 'clj/types'
70 | 
71 | begin
72 | 	raise LoadError if RUBY_ENGINE == 'jruby' || ENV['CLJ_NO_C_EXT']
73 | 
74 | 	require 'clj/parser_ext'
75 | rescue LoadError
76 | 	require 'clj/parser'
77 | end
78 | 


--------------------------------------------------------------------------------
/lib/clj/parser.rb:
--------------------------------------------------------------------------------
  1 | #--
  2 | #            DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
  3 | #                    Version 2, December 2004
  4 | #
  5 | #            DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
  6 | #   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
  7 | #
  8 | #  0. You just DO WHAT THE FUCK YOU WANT TO.
  9 | #++
 10 | 
 11 | require 'stringio'
 12 | 
 13 | module Clojure
 14 | 
 15 | class Parser
 16 | 	NUMBERS = '0' .. '9'
 17 | 
 18 | 	UNICODE_REGEX = /[0-9|a-f|A-F]{4}/
 19 | 	OCTAL_REGEX   = /[0-3]?[0-7]?[0-7]/
 20 | 
 21 | 	def initialize (source, options = {})
 22 | 		@source  = source.is_a?(String) ? StringIO.new(source) : source
 23 | 		@options = options
 24 | 
 25 | 		@map_class    = options[:map_class]    || Clojure::Map
 26 | 		@vector_class = options[:vector_class] || Clojure::Vector
 27 | 		@list_class   = options[:list_class]   || Clojure::List
 28 | 		@set_class    = options[:set_class]    || Clojure::Set
 29 | 	end
 30 | 
 31 | 	def parse
 32 | 		result = read_next
 33 | 
 34 | 		ignore(false)
 35 | 
 36 | 		if @source.read(1)
 37 | 			raise SyntaxError, 'there is some unconsumed input'
 38 | 		end
 39 | 
 40 | 		result
 41 | 	end
 42 | 
 43 | private
 44 | 	def next_type (ch)
 45 | 		case ch
 46 | 		when '^'               then :metadata
 47 | 		when NUMBERS, '-', '+' then :number
 48 | 		when 't', 'f'          then :boolean
 49 | 		when 'n'               then :nil
 50 | 		when '\\'              then :char
 51 | 		when ':'               then :keyword
 52 | 		when '"'               then :string
 53 | 		when '{'               then :map
 54 | 		when '('               then :list
 55 | 		when '['               then :vector
 56 | 		when '#'
 57 | 			case @source.read(1)
 58 | 			when 'i' then :instant
 59 | 			when '{' then :set
 60 | 			when '"' then :regexp
 61 | 			end
 62 | 		else :symbol
 63 | 		end
 64 | 	end
 65 | 
 66 | 	def read_next
 67 | 		ch = ignore(false)
 68 | 
 69 | 		raise SyntaxError, 'unexpected EOF' unless ch
 70 | 
 71 | 		__send__ "read_#{next_type ch}", ch
 72 | 	end
 73 | 
 74 | 	def read_metadata (ch)
 75 | 		metadatas = [read_next]
 76 | 
 77 | 		while lookahead(1) == '^'
 78 | 			raise SyntaxError, 'unexpected EOF' unless @source.read(1)
 79 | 
 80 | 			metadatas.push(read_next)
 81 | 		end
 82 | 
 83 | 		value = read_next
 84 | 
 85 | 		unless value.respond_to? :metadata=
 86 | 			raise SyntaxError, 'the object cannot hold metadata'
 87 | 		end
 88 | 
 89 | 		metadatas.each { |m| value.metadata = m }
 90 | 
 91 | 		value
 92 | 	end
 93 | 
 94 | 	def read_nil (ch)
 95 | 		check = @source.read(3)
 96 | 
 97 | 		if check[0, 2] != 'il' || !both_separator?(check[2])
 98 | 			revert(check.length) and read_symbol(ch)
 99 | 		else
100 | 			nil
101 | 		end
102 | 	end
103 | 
104 | 	def read_boolean (ch)
105 | 		if ch == 't'
106 | 			check = @source.read(4)
107 | 
108 | 			if check[0, 3] != 'rue' || !both_separator?(check[3])
109 | 				revert(check.length) and read_symbol(ch)
110 | 			else
111 | 				true
112 | 			end
113 | 		else
114 | 			check = @source.read(5)
115 | 
116 | 			if check[0, 4] != 'alse' || !both_separator?(check[4])
117 | 				revert(check.length) and read_symbol(ch)
118 | 			else
119 | 				false
120 | 			end
121 | 		end
122 | 	end
123 | 
124 | 	def read_number (ch)
125 | 		piece = ch
126 | 
127 | 		while (ch = @source.read(1)) && !both_separator?(ch)
128 | 			piece << ch
129 | 		end
130 | 
131 | 		revert if ch
132 | 
133 | 		if piece.include? '/'
134 | 			Rational(piece)
135 | 		elsif piece.include? 'r' or piece.include? 'R'
136 | 			base, number = piece.split(/r/i, 2)
137 | 
138 | 			number.to_i(base.to_i)
139 | 		elsif piece.include? '.' or piece.include? 'e' or piece.include? 'E' or piece.end_with? 'M'
140 | 			if piece.end_with? 'M'
141 | 				BigDecimal(piece)
142 | 			else
143 | 				Float(piece)
144 | 			end
145 | 		else
146 | 			if piece.end_with? 'N'
147 | 				piece[-1] = ''
148 | 			end
149 | 
150 | 			Integer(piece)
151 | 		end
152 | 	end
153 | 
154 | 	def read_char (ch)
155 | 		if (ahead = lookahead(2)) && both_separator?(ahead[1])
156 | 			@source.read(1)
157 | 		elsif (ahead = lookahead(8)) && ahead[0, 7] == 'newline' && both_separator?(ahead[7])
158 | 			@source.read(7) and "\n"
159 | 		elsif (ahead = lookahead(6)) && ahead[0, 5] == 'space' && both_separator?(ahead[5])
160 | 			@source.read(5) and ' '
161 | 		elsif (ahead = lookahead(4)) && ahead[0, 3] == 'tab' && both_separator?(ahead[3])
162 | 			@source.read(3) and "\t"
163 | 		elsif (ahead = lookahead(10)) && ahead[0, 9] == 'backspace' && both_separator?(ahead[9])
164 | 			@source.read(9) and "\b"
165 | 		elsif (ahead = lookahead(9)) && ahead[0, 8] == 'formfeed' && both_separator?(ahead[8])
166 | 			@source.read(8) and "\f"
167 | 		elsif (ahead = lookahead(7)) && ahead[0, 6] == 'return' && both_separator?(ahead[6])
168 | 			@source.read(6) and "\r"
169 | 		elsif (ahead = lookahead(6)) && ahead[0] == 'u' && ahead[1, 5] =~ UNICODE_REGEX && both_separator?(ahead[5])
170 | 			[@source.read(5)[1, 4].to_i(16)].pack('U')
171 | 		elsif (ahead = lookahead(5)) && ahead[0] == 'o' && matches = ahead[1, 3].match(OCTAL_REGEX)
172 | 			length = matches[0].length + 1
173 | 
174 | 			if both_separator?(ahead[length])
175 | 				@source.read(length)[1, 3].to_i(8).chr
176 | 			end
177 | 		end or raise SyntaxError, 'unknown character type'
178 | 	end
179 | 
180 | 	def read_symbol (ch)
181 | 		result = ch
182 | 
183 | 		while (ch = @source.read(1)) && is_symbol?(ch)
184 | 			result << ch
185 | 		end
186 | 
187 | 		revert if ch
188 | 
189 | 		if result.include? '::'
190 | 			raise SyntaxError, 'symbols cannot have repeating :'
191 | 		end
192 | 
193 | 		result.to_sym.symbol!
194 | 	end
195 | 
196 | 	def read_keyword (ch)
197 | 		result = ''
198 | 
199 | 		while (ch = @source.read(1)) && !keyword_separator?(ch)
200 | 			result << ch
201 | 		end
202 | 
203 | 		revert if ch
204 | 
205 | 		result.to_sym.keyword!
206 | 	end
207 | 
208 | 	def read_string (ch)
209 | 		result = ''
210 | 
211 | 		while (ch = @source.read(1)) != '"'
212 | 			raise SyntaxError, 'unexpected EOF' unless ch
213 | 
214 | 			result << ch
215 | 
216 | 			if ch == '\\'
217 | 				result << @source.read(1)
218 | 			end
219 | 		end
220 | 
221 | 		Clojure.unescape(result)
222 | 	end
223 | 
224 | 	def read_instant (ch)
225 | 		check = @source.read(3)
226 | 
227 | 		if check.length != 3
228 | 			raise SyntaxError, 'unexpected EOF'
229 | 		elsif check != 'nst'
230 | 			raise SyntaxError, "expected inst, found i#{check}"
231 | 		end
232 | 
233 | 		DateTime.rfc3339(read_string(ignore(false)))
234 | 	end
235 | 
236 | 	def read_regexp (ch)
237 | 		result = ''
238 | 
239 | 		while (ch = @source.read(1)) != '"'
240 | 			raise SyntaxError, 'unexpected EOF' unless ch
241 | 
242 | 			result << ch
243 | 
244 | 			if ch == '\\'
245 | 				result << @source.read(1)
246 | 			end
247 | 		end
248 | 
249 | 		/#{result}/
250 | 	end
251 | 
252 | 	def read_list (ch)
253 | 		result = @list_class.new
254 | 
255 | 		ignore
256 | 
257 | 		while lookahead(1) != ')'
258 | 			result << read_next
259 | 			ignore
260 | 		end
261 | 
262 | 		@source.read(1)
263 | 
264 | 		result
265 | 	end
266 | 
267 | 	def read_vector (ch)
268 | 		result = @vector_class.new
269 | 
270 | 		ignore
271 | 
272 | 		while lookahead(1) != ']'
273 | 			result << read_next
274 | 			ignore
275 | 		end
276 | 
277 | 		@source.read(1)
278 | 
279 | 		result
280 | 	end
281 | 
282 | 	def read_set (ch)
283 | 		result = @set_class.new
284 | 
285 | 		ignore
286 | 
287 | 		while lookahead(1) != '}'
288 | 			unless result.add? read_next
289 | 				raise SyntaxError, 'the set contains non unique values'
290 | 			end
291 | 
292 | 			ignore
293 | 		end
294 | 
295 | 		@source.read(1)
296 | 
297 | 		result
298 | 	end
299 | 
300 | 	def read_map (ch)
301 | 		result = @map_class.new
302 | 
303 | 		ignore
304 | 
305 | 		while lookahead(1) != '}'
306 | 			key = read_next
307 | 			ignore
308 | 			value = read_next
309 | 			ignore
310 | 
311 | 			result[key] = value
312 | 		end
313 | 
314 | 		@source.read(1)
315 | 
316 | 		result
317 | 	end
318 | 
319 | 	def lookahead (length)
320 | 		result = @source.read(length)
321 | 
322 | 		if result
323 | 			@source.seek(-result.length, IO::SEEK_CUR)
324 | 		end
325 | 
326 | 		result
327 | 	end
328 | 
329 | 	def ignore (rev = true)
330 | 		while ignore?(ch = @source.read(1)); end
331 | 
332 | 		rev ? revert : ch if ch
333 | 	end
334 | 
335 | 	def revert (n = 1)
336 | 		@source.seek -n, IO::SEEK_CUR
337 | 	end
338 | 
339 | 	def ignore? (ch)
340 | 		ch == ' ' || ch == ',' || ch == "\n" || ch == "\r" || ch == "\t"
341 | 	end
342 | 
343 | 	def is_symbol? (ch)
344 | 		(ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'z') || (ch >= 'A' || ch <= 'Z') || ch == '+' || ch == '!' || ch == '-' || ch == '_' || ch == '?' || ch == '.' || ch == ':' || ch == '/'
345 | 	end
346 | 
347 | 	def both_separator? (ch)
348 | 		ch == nil || ch == ' ' || ch == ',' || ch == '"' || ch == '{' || ch == '}' || ch == '(' || ch == ')' || ch == '[' || ch == ']' || ch == '#' || ch == ':' || ch == "\n" || ch == "\r" || ch == "\t"
349 | 	end
350 | 
351 | 	def keyword_separator? (ch)
352 | 		ch == nil || ch == ' ' || ch == ',' || ch == '"' || ch == '{' || ch == '}' || ch == '(' || ch == ')' || ch == '[' || ch == ']' || ch == '#' || ch == ':' || ch == "'" || ch == '^' || ch == '@' || ch == '`' || ch == '~' || ch == '\\' || ch == ';' || ch == "\n" || ch == "\r" || ch == "\t"
353 | 	end
354 | end
355 | 
356 | end
357 | 


--------------------------------------------------------------------------------
/lib/clj/types.rb:
--------------------------------------------------------------------------------
  1 | #--
  2 | #            DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
  3 | #                    Version 2, December 2004
  4 | #
  5 | #            DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
  6 | #   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
  7 | #
  8 | #  0. You just DO WHAT THE FUCK YOU WANT TO.
  9 | #++
 10 | 
 11 | require 'forwardable'
 12 | require 'set'
 13 | require 'date'
 14 | require 'bigdecimal'
 15 | 
 16 | module Clojure
 17 | 	module Metadata
 18 | 		def metadata
 19 | 			@metadata ||= Clojure::Map.new
 20 | 		end
 21 | 
 22 | 		def metadata= (value)
 23 | 			metadata.merge! case value
 24 | 				when ::Hash   then value
 25 | 				when ::Symbol then { value => true }
 26 | 				when ::String then { :tag => value }
 27 | 				else raise ArgumentError, 'the passed value is not suitable as metadata'
 28 | 			end
 29 | 		end
 30 | 
 31 | 		def metadata_to_clj (options = {})
 32 | 			return '' unless options[:metadata] != false && @metadata && !@metadata.empty?
 33 | 
 34 | 			'^' + if @metadata.length == 1
 35 | 				piece = @metadata.first
 36 | 
 37 | 				if piece.first.is_a?(::Symbol) && piece.last == true
 38 | 					piece.first.to_clj(options)
 39 | 				elsif piece.first == :tag && piece.last.is_a?(::String)
 40 | 					piece.last.to_clj(options)
 41 | 				else
 42 | 					@metadata.to_clj(options)
 43 | 				end
 44 | 			else
 45 | 				@metadata.to_clj(options)
 46 | 			end + ' '
 47 | 		end
 48 | 	end
 49 | 
 50 | 	class Map < Hash
 51 | 		include Clojure::Metadata
 52 | 
 53 | 		def to_clj (options = {})
 54 | 			metadata_to_clj(options) + '{' + map { |k, v| k.to_clj(options) + ' ' + v.to_clj(options) }.join(' ') + '}'
 55 | 		end
 56 | 	end
 57 | 
 58 | 	class Vector < Array
 59 | 		include Clojure::Metadata
 60 | 
 61 | 		def to_clj (options = {})
 62 | 			metadata_to_clj(options) + '[' + map { |o| o.to_clj(options) }.join(' ') + ']'
 63 | 		end
 64 | 	end
 65 | 
 66 | 	class List < Array
 67 | 		include Clojure::Metadata
 68 | 
 69 | 		def to_clj (options = {})
 70 | 			metadata_to_clj(options) + '(' + map { |o| o.to_clj(options) }.join(' ') + ')'
 71 | 		end
 72 | 	end
 73 | 
 74 | 	class Set < Set
 75 | 		include Clojure::Metadata
 76 | 
 77 | 		def to_clj (options = {})
 78 | 			metadata_to_clj(options) + '#{' + map { |o| o.to_clj(options) }.join(' ') + '}'
 79 | 		end
 80 | 	end
 81 | 
 82 | 	class Symbol
 83 | 		def initialize (sym)
 84 | 			@internal = sym
 85 | 		end
 86 | 
 87 | 		def keyword?; false; end
 88 | 		def symbol?;  true;  end
 89 | 
 90 | 		def to_clj (*)
 91 | 			result = to_sym.to_s
 92 | 
 93 | 			unless result =~ %r([\w:+!-_?./][\w\d:+!-_?./]*)
 94 | 				raise ArgumentError, "#{result} cannot be transformed into clojure"
 95 | 			end
 96 | 
 97 | 			result
 98 | 		end
 99 | 
100 | 		def == (other)
101 | 			return false unless other.is_a?(Symbol)
102 | 
103 | 			to_sym == other.to_sym
104 | 		end
105 | 
106 | 		def to_sym;  @internal;    end
107 | 		def to_s;    to_sym.to_s;  end
108 | 		def inspect; to_s          end
109 | 	end
110 | end
111 | 
112 | [Numeric, TrueClass, FalseClass, NilClass].each {|klass|
113 | 	klass.instance_eval {
114 | 		define_method :to_clj do |*|
115 | 			inspect
116 | 		end
117 | 	}
118 | }
119 | 
120 | class Symbol
121 | 	def keyword!
122 | 		self
123 | 	end
124 | 
125 | 	def symbol!
126 | 		Clojure::Symbol.new(self)
127 | 	end
128 | 
129 | 	def keyword?; true;  end
130 | 	def symbol?;  false; end
131 | 
132 | 	def to_clj (*)
133 | 		result = to_sym.inspect
134 | 
135 | 		unless result =~ /:([^(\[{'^@`~\"\\,\s;)\]}]+)/
136 | 			raise ArgumentError, "#{result} cannot be transformed into clojure"
137 | 		end
138 | 
139 | 		result
140 | 	end
141 | end
142 | 
143 | class String
144 | 	def to_clj (*)
145 | 		result = (encode('UTF-16be') rescue self).inspect
146 | 		
147 | 		result.gsub!(/(^|[^\\])\\e/, '\1\u001b')
148 | 		result.gsub!(/(^|[^\\])\\a/, '\1\u0003')
149 | 
150 | 		result
151 | 	end
152 | end
153 | 
154 | class Rational
155 | 	def to_clj (*)
156 | 		to_s
157 | 	end
158 | end
159 | 
160 | class Regexp
161 | 	def to_clj (*)
162 | 		'#"' + inspect[1 .. -2] + '"'
163 | 	end
164 | end
165 | 
166 | class DateTime
167 | 	def to_clj (options = {})
168 | 		options[:alpha] ? '#inst "' + rfc3339 + '"' : to_time.to_i.to_s
169 | 	end
170 | end
171 | 
172 | class Date
173 | 	def to_clj (options = {})
174 | 		to_datetime.to_clj(options)
175 | 	end
176 | end
177 | 
178 | class Time
179 | 	def to_clj (options = {})
180 | 		to_datetime.to_clj(options)
181 | 	end
182 | end
183 | 
184 | class Bignum < Integer
185 | 	def to_clj (*)
186 | 		to_s + 'N'
187 | 	end
188 | end
189 | 
190 | class BigDecimal < Numeric
191 | 	def to_clj (*)
192 | 		to_s('F') + 'M'
193 | 	end
194 | end
195 | 
196 | class Array
197 | 	def to_clj (options = {})
198 | 		to_vector.to_clj(options)
199 | 	end
200 | 
201 | 	def to_set
202 | 		Clojure::Set.new(self)
203 | 	end
204 | 
205 | 	def to_vector
206 | 		Clojure::Vector.new(self)
207 | 	end
208 | 
209 | 	def to_list
210 | 		Clojure::List.new(self)
211 | 	end
212 | end
213 | 
214 | class Set
215 | 	def to_clj (options = {})
216 | 		Clojure::Set.new(self).to_clj(options)
217 | 	end
218 | end
219 | 
220 | class Hash
221 | 	def to_clj (options = {})
222 | 		to_map.to_clj(options)
223 | 	end
224 | 
225 | 	def to_map
226 | 		Clojure::Map[self]
227 | 	end
228 | end
229 | 


--------------------------------------------------------------------------------
/test/bench/bench.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | 
 3 | import clj
 4 | import time
 5 | 
 6 | s = "[1 2 3 true false nil {:a 21.3 :b 43.2} \"Hello\"]"
 7 | 
 8 | t1 = time.time()
 9 | for i in range(10000):
10 |   clj.loads(s)
11 | 
12 | print time.time()-t1
13 | 


--------------------------------------------------------------------------------
/test/bench/bench.rb:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env ruby
 2 | require 'rubygems'
 3 | require 'clj'
 4 | 
 5 | s = "[1 2 3 true false nil {:a 21.3 :b 43.2} \"Hello\"]"
 6 | 
 7 | t1 = Time.now()
 8 | 
 9 | 0.upto 10000 do
10 |   Clojure.parse(s)
11 | end
12 | 
13 | puts Time.now()-t1
14 | 


--------------------------------------------------------------------------------
/test/bench/clj.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # Copyright (C) 2012 Sun Ning<classicning@gmail.com>
  3 | #
  4 | # Permission is hereby granted, free of charge, to any person
  5 | # obtaining a copy of this software and associated documentation files
  6 | # (the "Software"), to deal in the Software without restriction,
  7 | # including without limitation the rights to use, copy, modify, merge,
  8 | # publish, distribute, sublicense, and/or sell copies of the Software,
  9 | # and to permit persons to whom the Software is furnished to do so,
 10 | # subject to the following conditions:
 11 | 
 12 | # The above copyright notice and this permission notice shall be
 13 | # included in all copies or substantial portions of the Software.
 14 | 
 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 16 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 17 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 18 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 19 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 20 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 21 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 22 | # SOFTWARE.
 23 | 
 24 | 
 25 | # clojure literals => python types
 26 | #
 27 | # clojure vector [1 2 3 4] => python list [1 2 3 4] *coll
 28 | # clojure list (1 2 3 4) => python list [1 2 3 4] *coll
 29 | # clojure set #{1 2 3 4} => python set set(1 2 3 4) *coll
 30 | # clojure map {:a 1 :b 2} => python dict dict(a=1,b=2) *coll
 31 | # clojure string "a" => python unicode "a"
 32 | # clojure character \a => python unicode "a"
 33 | # clojure keyword :a => python unicode "a"
 34 | # clojure integer 123 => python integer 123
 35 | # clojure float 12.3 => python float 12.3
 36 | # clojure boolean true => python boolean true
 37 | # clojure nil => python None 
 38 | #
 39 | 
 40 | 
 41 | __all__ = ["dump", "dumps", "load", "loads"]
 42 | 
 43 | import os
 44 | from cStringIO import StringIO
 45 | 
 46 | def number(v):
 47 |     if '.' in v:
 48 |         return float(v)
 49 |     else:
 50 |         return int(v)
 51 | 
 52 | _STOP_CHARS = [" ", ",", "\n", "\r"]
 53 | _COLL_OPEN_CHARS = ["#", "[", "{"]
 54 | _EXTRA_NUM_CHARS = ["-", "+", ".", "e", "E"]
 55 | 
 56 | class CljDecoder(object):
 57 |     def __init__(self, fd):
 58 |         self.fd = fd
 59 |         self.value_stack = []
 60 |         self.terminator = None ## for collection type
 61 | 
 62 |     def decode(self):
 63 |         while True:
 64 |             v = self.__read_token()
 65 |             if len(self.value_stack) == 0:
 66 |                 return v
 67 |         
 68 |     def __get_type_from_char(self, c):
 69 |         """return a tuple of type information
 70 |         * type name
 71 |         * a flag to indicate if it's a collection
 72 |         """
 73 |         if c.isdigit() or c =='-':
 74 |             return ("number", False, None)
 75 |         elif c == 't' or c == 'f': ## true/false
 76 |             return ("boolean", False, None)
 77 |         elif c == 'n': ## nil
 78 |             return ("nil", False, None)
 79 |         elif c == '\\' :
 80 |             return ("char", False, None)
 81 |         elif c == ':':
 82 |             return ("keyword", False, None)
 83 |         elif c == '"':
 84 |             return ("string", False, None)
 85 |         elif c == '#':
 86 |             return ("set", True, "}")
 87 |         elif c == '{':
 88 |             return ("dict", True, "}")
 89 |         elif c == '(':
 90 |             return ("list", True, ")")
 91 |         elif c == '[':
 92 |             return ('list', True, "]")
 93 |         else:
 94 |             return (None, False, None)
 95 | 
 96 |     def __read_token(self):
 97 |         fd = self.fd
 98 |         
 99 |         c = fd.read(1)
100 | 
101 |         ## skip all stop chars if necessary 
102 |         while c in _STOP_CHARS:
103 |             c = fd.read(1)
104 | 
105 |         ## raise exception when unexpected EOF found
106 |         if c == '':
107 |             raise ValueError("Unexpected EOF")
108 | 
109 |         t, coll, term = self.__get_type_from_char(c)
110 |         if coll:
111 |             ## move cursor 
112 |             if t == "set":
113 |                 ## skip {
114 |                 fd.read(1)
115 | 
116 |             self.terminator = term
117 |                 
118 |             self.value_stack.append(([], self.terminator, t))
119 |             return None
120 |         else:
121 |             v = None ## token value
122 |             e = None ## end char
123 |             r = True ## the token contains data or not
124 | 
125 |             if t == "boolean":
126 |                 if c == 't':
127 |                     e = fd.read(4)[-1]
128 |                     v = True
129 |                 else:
130 |                     e = fd.read(5)[-1]
131 |                     v = False
132 | 
133 |             elif t == "char":
134 |                 buf = []
135 |                 while c is not self.terminator and c is not "" and c not in _STOP_CHARS:
136 |                     c = fd.read(1)
137 |                     buf.append(c)
138 |                 
139 |                 e = c
140 |                 v = ''.join(buf[:-1])
141 | 
142 |             elif t == "nil":
143 |                 e = fd.read(3)[-1]
144 |                 v = None
145 | 
146 |             elif t == "number":
147 |                 buf = []
148 |                 while c.isdigit() or (c in _EXTRA_NUM_CHARS):
149 |                     buf.append(c)
150 |                     c = fd.read(1)
151 |                 e = c
152 |                 numstr = ''.join(buf)
153 |                 v = number(numstr)
154 | 
155 |                 ## special case for 
156 |                 ## [23[12]]
157 |                 ## this is a valid clojure form
158 |                 if e in _COLL_OPEN_CHARS:
159 |                     fd.seek(-1, os.SEEK_CUR)
160 | 
161 |             elif t == "keyword":
162 |                 buf = []    ##skip the leading ":"
163 |                 while c is not self.terminator and c is not "" and c not in _STOP_CHARS:
164 |                     c = fd.read(1)
165 |                     buf.append(c)
166 |  
167 |                 e = c
168 |                 v = ''.join(buf[:-1])
169 | 
170 |             elif t == "string":
171 |                 buf = []
172 |                 cp = c = fd.read(1) ## to check escaping character \
173 | 
174 |                 while not(c == '"' and cp != '\\'):
175 |                     buf.append(c)
176 |                     cp = c
177 |                     c = fd.read(1)
178 |                 e = c
179 |                 #v = u''.join(buf).decode('unicode-escape')
180 |                 v = ''.join(buf).decode('string-escape')
181 |             else:
182 |                 r = False
183 |                 e = c
184 | 
185 |             if e is self.terminator:
186 |                 current_scope, _, container = self.value_stack.pop()
187 | 
188 |                 if r:
189 |                     current_scope.append(v)
190 |                     
191 |                 if container == "set":
192 |                     v = set(current_scope)
193 |                 elif container == "list":
194 |                     v = current_scope
195 |                 elif container == "dict":
196 |                     v = {}
197 |                     for i in range(0, len(current_scope), 2):
198 |                         v[current_scope[i]] = current_scope[i+1]
199 | 
200 |             if len(self.value_stack) > 0:
201 |                 self.value_stack[-1][0].append(v)
202 |                 self.terminator = self.value_stack[-1][1]
203 | 
204 |             return v
205 | 
206 | 
207 | class CljEncoder(object):
208 |     def __init__(self, data, fd):
209 |         self.data = data
210 |         self.fd = fd
211 | 
212 |     def encode(self):
213 |         self.__do_encode(self.data)
214 | 
215 |     def get_type(self,t):
216 |         if t is None:
217 |             return ("None", False)
218 |         elif isinstance(t, str) or isinstance(t, unicode):
219 |             return ("string", False)
220 |         elif isinstance(t, bool):
221 |             return ("boolean", False)
222 |         elif isinstance(t, float) or isinstance(t, int):
223 |             return ("number", False)
224 |         elif isinstance(t, dict):
225 |             return ("dict", True)
226 |         elif isinstance(t, list):
227 |             return ("list", True)
228 |         elif isinstance(t, set):
229 |             return ("set", True)
230 |         else:
231 |             return ("unknown", False)
232 | 
233 |     def __do_encode(self, d):
234 |         fd = self.fd
235 |         t,coll = self.get_type(d)
236 | 
237 |         if coll:
238 |             if t == "dict":
239 |                 fd.write("{")
240 |                 for k,v in d.items():
241 |                     self.__do_encode(k)
242 |                     fd.write(" ")
243 |                     self.__do_encode(v)
244 |                     fd.write(" ")
245 |                 fd.seek(-1, os.SEEK_CUR)
246 |                 fd.write("}")
247 |             elif t == "list":
248 |                 fd.write("[")
249 |                 for v in d:
250 |                     self.__do_encode(v)
251 |                     fd.write(" ")
252 |                 fd.seek(-1, os.SEEK_CUR)
253 |                 fd.write("]")
254 |             elif t == "set":
255 |                 fd.write("#{")
256 |                 for v in d:
257 |                     self.__do_encode(v)
258 |                     fd.write(" ")
259 |                 fd.seek(-1, os.SEEK_CUR)
260 |                 fd.write("}")
261 |         else:
262 |             if t == "number":
263 |                 fd.write(str(d))
264 |             elif t == "string":
265 |                 s = d.encode("unicode-escape").replace('"', '\\"')
266 |                 fd.write('"'+s+'"')
267 |             elif t == "boolean":
268 |                 if d:
269 |                     fd.write('true')
270 |                 else:
271 |                     fd.write('false')
272 |             elif t == 'None':
273 |                 fd.write('nil')
274 |             else:
275 |                 fd.write('"'+str(d)+'"')
276 |     
277 | def dump(obj, fp):
278 |     return CljEncoder(obj, fp).encode()
279 | 
280 | def dumps(obj):
281 |     buf = StringIO()
282 |     dump(obj, buf)
283 |     result = buf.getvalue()
284 |     buf.close()
285 |     return result
286 | 
287 | def load(fp):
288 |     decoder = CljDecoder(fp)
289 |     return decoder.decode()
290 | 
291 | def loads(s):
292 |     buf = StringIO(s)
293 |     result = load(buf)
294 |     buf.close()
295 |     return result
296 | 
297 | 


--------------------------------------------------------------------------------
/test/clj_spec.rb:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env ruby
  2 | require 'rubygems'
  3 | require 'clj'
  4 | require 'bigdecimal'
  5 | 
  6 | describe Clojure do
  7 | 	describe '#dump' do
  8 | 		it 'dumps correctly true' do
  9 | 			Clojure.dump(true).should == 'true'
 10 | 		end
 11 | 
 12 | 		it 'dumps correctly false' do
 13 | 			Clojure.dump(false).should == 'false'
 14 | 		end
 15 | 
 16 | 		it 'dumps correctly nil' do
 17 | 			Clojure.dump(nil).should == 'nil'
 18 | 		end
 19 | 
 20 | 		it 'dumps correctly strings' do
 21 | 			Clojure.dump("lol").should      == '"lol"'
 22 | 			Clojure.dump("lol\nlol").should == '"lol\nlol"'
 23 | 			Clojure.dump("\\e\e").should    == '"\\\\e\u001b"'
 24 | 			Clojure.dump("\\a\a").should    == '"\\\\a\u0003"'
 25 | 		end
 26 | 
 27 | 		it 'dumps correctly symbols' do
 28 | 			Clojure.dump(:wat.symbol!).should == 'wat'
 29 | 		end
 30 | 
 31 | 		it 'dumps correctly keywords' do
 32 | 			Clojure.dump(:wat).should == ':wat'
 33 | 
 34 | 			expect {
 35 | 				Clojure.dump(:"lol wat")
 36 | 			}.should raise_error
 37 | 		end
 38 | 
 39 | 		it 'dumps correctly integers' do
 40 | 			Clojure.dump(2).should    == '2'
 41 | 			Clojure.dump(1337).should == '1337'
 42 | 		end
 43 | 
 44 | 		it 'dumps correctly floats' do
 45 | 			Clojure.dump(2.3).should == '2.3'
 46 | 		end
 47 | 
 48 | 		it 'dumps correctly rationals' do
 49 | 			unless RUBY_VERSION.include? '1.8'
 50 | 				Clojure.dump(Rational('2/3')).should == '2/3'
 51 | 			end
 52 | 		end
 53 | 
 54 | 		it 'dumps correctly bignums' do
 55 | 			Clojure.dump(324555555555555555555555555555555555555555555555324445555555555555).should == '324555555555555555555555555555555555555555555555324445555555555555N'
 56 | 		end
 57 | 
 58 | 		it 'dumps correctly bigdecimals' do
 59 | 			Clojure.dump(BigDecimal('0.2345636456')).should == '0.2345636456M'
 60 | 		end
 61 | 
 62 | 		it 'dumps correctly regexps' do
 63 | 			Clojure.dump(/(\d+)/).should == '#"(\d+)"'
 64 | 		end
 65 | 
 66 | 		it 'dumps correctly dates' do
 67 | 			unless RUBY_VERSION.include? '1.8'
 68 | 				Clojure.dump(DateTime.rfc3339("2012-02-03T15:20:59+01:00")).should == '1328278859'
 69 | 				Clojure.dump(DateTime.rfc3339("2012-02-03T15:20:59+01:00"), :alpha => true).should == '#inst "2012-02-03T15:20:59+01:00"'
 70 | 			end
 71 | 		end
 72 | 
 73 | 		it 'dumps correctly arrays' do
 74 | 			Clojure.dump([]).should           == '[]'
 75 | 			Clojure.dump([[]]).should         == '[[]]'
 76 | 			Clojure.dump([[], [], []]).should == '[[] [] []]'
 77 | 
 78 | 			Clojure.dump([1, 2, 3]).should         == '[1 2 3]'
 79 | 			Clojure.dump([1, 2, 3].to_list).should == '(1 2 3)'
 80 | 		end
 81 | 
 82 | 		it 'dumps correctly hashes' do
 83 | 			Clojure.dump({ :a => 'b' }).should == '{:a "b"}'
 84 | 		end
 85 | 
 86 | 		it 'dumps correctly metadata' do
 87 | 			Clojure.dump([1, 2, 3].to_vector.tap { |x| x.metadata = :lol }).should == '^:lol [1 2 3]'
 88 | 		end
 89 | 
 90 | 		it 'dumps correctly sets' do
 91 | 			Clojure.dump(Set.new([1, 2, 3])).should == '#{1 2 3}'
 92 | 		end
 93 | 	end
 94 | 
 95 | 	describe '#parse' do
 96 | 		it 'parses correctly true' do
 97 | 			Clojure.parse('true').should == true
 98 | 			
 99 | 			Clojure.parse('truf').should == :truf.symbol!
100 | 		end
101 | 
102 | 		it 'parses correctly false' do
103 | 			Clojure.parse('false').should == false
104 | 
105 | 		  Clojure.parse('falfe').should == :falfe.symbol!
106 | 		end
107 | 
108 | 		it 'parses correctly nil' do
109 | 			Clojure.parse('nil').should == nil
110 | 
111 | 			Clojure.parse('nol').should == :nol.symbol!
112 | 		end
113 | 
114 | 		it 'parses correctly chars' do
115 | 			Clojure.parse('\d').should == 'd'
116 | 			Clojure.parse('\a').should == 'a'
117 | 			Clojure.parse('\0').should == '0'
118 | 
119 | 			Clojure.parse('\newline').should   == "\n"
120 | 			Clojure.parse('\space').should     == ' '
121 | 			Clojure.parse('\tab').should       == "\t"
122 | 			Clojure.parse('\backspace').should == "\b"
123 | 			Clojure.parse('\formfeed').should  == "\f"
124 | 			Clojure.parse('\return').should    == "\r"
125 | 
126 | 			Clojure.parse('\o54').should == ','
127 | 			Clojure.parse('[\o3 "lol"]').should == ["\x03", "lol"]
128 | 
129 | 			unless RUBY_VERSION.include? '1.8'
130 | 				Clojure.parse('\u4343').should == "\u4343"
131 | 			end
132 | 		end
133 | 
134 | 		it 'parses correctly strings' do
135 | 			Clojure.parse('"lol"').should      == "lol"
136 | 			Clojure.parse('"lol\nlol"').should == "lol\nlol"
137 | 
138 | 			unless RUBY_VERSION.include? '1.8'
139 | 				Clojure.parse('"\u4343"').should   == "\u4343"
140 | 			end
141 | 		end
142 | 
143 | 		it 'parses correctly symbols' do
144 | 			Clojure.parse('ni').should == :ni.symbol!
145 | 		end
146 | 
147 | 		it 'parses correctly keywords' do
148 | 			Clojure.parse(':wat').should == :wat
149 | 		end
150 | 
151 | 		it 'parses correctly numbers' do
152 | 			Clojure.parse('2').should    == 2
153 | 			Clojure.parse('1337').should == 1337
154 | 
155 | 			Clojure.parse('16rFF').should == 255
156 | 			Clojure.parse('2r11').should  == 3
157 | 
158 | 			Clojure.parse('2.3').should == 2.3
159 | 			Clojure.parse('2e3').should == 2000
160 | 		end
161 | 
162 | 		it 'parses correctly rationals' do
163 | 			unless RUBY_VERSION.include? '1.8'
164 | 				Clojure.parse('2/3').should == Rational('2/3')
165 | 			end
166 | 		end
167 | 
168 | 		it 'parses correctly bignums' do
169 | 			Clojure.parse('324555555555555555555555555555555555555555555555324445555555555555N').should == 324555555555555555555555555555555555555555555555324445555555555555
170 | 		end
171 | 
172 | 		it 'parses correctly bigdecimals' do
173 | 			Clojure.parse('0.2345636456M').should == BigDecimal('0.2345636456')
174 | 		end
175 | 
176 | 		it 'parses correctly regexps' do
177 | 			Clojure.parse('#"(\d+)"').should == /(\d+)/
178 | 		end
179 | 
180 | 		it 'parses correctly dates' do
181 | 			unless RUBY_VERSION.include? '1.8'
182 | 				Clojure.parse('#inst "2012-02-03T15:20:59+01:00"').should == DateTime.rfc3339("2012-02-03T15:20:59+01:00")
183 | 			end
184 | 		end
185 | 
186 | 		it 'parses correctly vectors' do
187 | 			Clojure.parse('[]').should         == []
188 | 			Clojure.parse('[[]]').should       == [[]]
189 | 			Clojure.parse('[[] [] []]').should == [[], [], []]
190 | 
191 | 			Clojure.parse('[1 2 3]').should == [1, 2, 3]
192 | 			Clojure.parse('[23[]]').should  == [23, []]
193 | 		end
194 | 		
195 | 		it 'parses correctly lists' do
196 | 			Clojure.parse('()').should         == []
197 | 			Clojure.parse('(())').should       == [[]]
198 | 			Clojure.parse('(() () ())').should == [[], [], []]
199 | 
200 | 			Clojure.parse('(1 2 3)').should == [1, 2, 3]
201 | 			Clojure.parse('(23())').should == [23, []]
202 | 		end
203 | 
204 | 		it 'parses correctly sets' do
205 | 			Clojure.parse('#{1 2 3}').should == [1, 2, 3].to_set
206 | 
207 | 			expect { Clojure.parse('#{1 1}') }.should raise_error
208 | 		end
209 | 		
210 | 		it 'parses correctly maps' do
211 | 			Clojure.parse('{:a "b"}').should == { :a => 'b' }
212 | 		end
213 | 
214 | 		it 'parses correctly metadata' do
215 | 			Clojure.parse('^:lol [1 2 3]').tap { |data|
216 | 				data.should == [1, 2, 3]
217 | 				data.metadata.should == { :lol => true }
218 | 			}
219 | 		end
220 | 	end
221 | end
222 | 


--------------------------------------------------------------------------------