├── .gitignore ├── Cargo.toml ├── GRAMMAR_LICENSE ├── LICENSE ├── README.md ├── install.ps1 ├── install.sh ├── lua └── .gitignore └── src ├── json5.pest ├── lib.rs ├── parser.rs └── val.rs /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | Cargo.lock 3 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "lua-json5" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | [lib] 7 | crate-type = ["cdylib"] 8 | 9 | [dependencies] 10 | pest = "2.1" 11 | pest_derive = "2.1" 12 | mlua = { version = "0.9", features = ["luajit", "module", "macros"] } 13 | -------------------------------------------------------------------------------- /GRAMMAR_LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2018 Callum Oakley 2 | 3 | Permission to use, copy, modify, and/or distribute this software for any 4 | purpose with or without fee is hereby granted, provided that the above 5 | copyright notice and this permission notice appear in all copies. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH 8 | REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND 9 | FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, 10 | INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM 11 | LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR 12 | OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 13 | PERFORMANCE OF THIS SOFTWARE. 14 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Joaquín Andrés León Ulloa 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Json5 parser for luajit 2 | 3 | This crate provides json5 deserialization for luajit. 4 | 5 | Inspired and adapted from [json5-rs](https://github.com/callum-oakley/json5-rs) 6 | 7 | **NOTE**: When compiling for macos, please add this to your `$CARGO_HOME/config` 8 | per [this article](https://blog.kdheepak.com/loading-a-rust-library-as-a-lua-module-in-neovim.html) 9 | (which also inspired this project): 10 | 11 | ```TOML 12 | [target.x86_64-apple-darwin] 13 | rustflags = [ 14 | "-C", "link-arg=-undefined", 15 | "-C", "link-arg=dynamic_lookup", 16 | ] 17 | 18 | [target.aarch64-apple-darwin] 19 | rustflags = [ 20 | "-C", "link-arg=-undefined", 21 | "-C", "link-arg=dynamic_lookup", 22 | ] 23 | ``` 24 | 25 | Also, if you haven't already, add ';?.dylib' to your `package.cpath` so it will 26 | be recognized by the interpreter. 27 | 28 | ## Usage 29 | 30 | You can simply require the module in your scripts and parse a string using the 31 | `parse` method: 32 | 33 | ```lua 34 | local parse = require'json5'.parse 35 | local data = [[ 36 | { 37 | /* This is a comment */ 38 | ecma_identifier: 'works like a charm', 39 | "string keys": [1,2,3], // trailing comma 40 | } 41 | ]] 42 | local parsed_data = parse(data) 43 | ``` 44 | 45 | ## Use with neovim 46 | 47 | You must have `cargo` installed and in your `$PATH` 48 | 49 | Using [packer.nvim](https://github.com/wbthomason/packer.nvim): 50 | 51 | ```lua 52 | use { 53 | 'Joakker/lua-json5', 54 | -- if you're on windows 55 | -- run = 'powershell ./install.ps1' 56 | run = './install.sh' 57 | } 58 | ``` 59 | 60 | ## Performance 61 | 62 | Tested on neovim using the following script: 63 | 64 | ```lua 65 | local data = [[ {"hello":"world"} ]] 66 | local json5 = require('json5').parse 67 | local json_decode = vim.fn.json_decode 68 | 69 | local time_json5, time_json_decode = 0, 0 70 | 71 | local aux 72 | 73 | for _ = 1, 1000 do 74 | aux = os.clock() 75 | json5(data) 76 | time_json5 = time_json5 + (os.clock() - aux) 77 | end 78 | 79 | for _ = 1, 1000 do 80 | aux = os.clock() 81 | json_decode(data) 82 | time_json_decode = time_json_decode + (os.clock() - aux) 83 | end 84 | 85 | print(('json5: %.3fms'):format(time_json5)) 86 | print(('json_decode: %.3fms'):format(time_json_decode)) 87 | ``` 88 | 89 | On average: 90 | ``` 91 | json5: 0.023ms 92 | json_decode: 0.010ms 93 | ``` 94 | 95 | ## So, why should I use this instead of the builtin `json_decode`? 96 | 97 | If performance is your concern, I think you're better off using the builtin 98 | function `json_decode`. The advantage this package has over regular json, 99 | however, is that you get json5 features, such as comments, trailing commas and 100 | more flexible string literals. 101 | -------------------------------------------------------------------------------- /install.ps1: -------------------------------------------------------------------------------- 1 | cargo build --release 2 | mv .\target\release\lua_json5.dll lua\json5.dll 3 | -------------------------------------------------------------------------------- /install.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | cargo build --release 4 | 5 | case $OSTYPE in 6 | "linux-gnu"*) 7 | mv ./target/release/liblua_json5.so lua/json5.so 8 | strip lua/json5.so 9 | ;; 10 | "darwin"*) 11 | # Provide both just in case 12 | cp ./target/release/liblua_json5.dylib lua/json5.dylib 13 | cp lua/json5.dylib lua/json5.so 14 | ;; 15 | esac 16 | -------------------------------------------------------------------------------- /lua/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore 3 | -------------------------------------------------------------------------------- /src/json5.pest: -------------------------------------------------------------------------------- 1 | // Adapted from https://github.com/callum-oakley/json5-rs/blob/master/src/json5.pest 2 | 3 | // see https://spec.json5.org/#syntactic-grammar and 4 | // https://spec.json5.org/#lexical-grammar 5 | 6 | COMMENT = _{ "/*" ~ (!"*/" ~ ANY)* ~ "*/" | "//" ~ (!line_terminator ~ ANY)* } 7 | 8 | WHITESPACE = _{ 9 | "\u{0009}" | 10 | "\u{000B}" | 11 | "\u{000C}" | 12 | "\u{0020}" | 13 | "\u{00A0}" | 14 | "\u{FEFF}" | 15 | SPACE_SEPARATOR | 16 | line_terminator 17 | } 18 | 19 | array = { "[" ~ "]" | "[" ~ value ~ ("," ~ value)* ~ ","? ~ "]" } 20 | 21 | boolean = @{ "true" | "false" } 22 | 23 | char_escape_sequence = @{ single_escape_char | non_escape_char } 24 | 25 | char_literal = @{ !("\\" | line_terminator) ~ ANY } 26 | 27 | decimal_integer_literal = _{ "0" | ASCII_NONZERO_DIGIT ~ ASCII_DIGIT* } 28 | 29 | decimal_literal = _{ 30 | decimal_integer_literal ~ "." ~ ASCII_DIGIT* ~ exponent_part? | 31 | "." ~ ASCII_DIGIT+~ exponent_part? | 32 | decimal_integer_literal ~ exponent_part? 33 | } 34 | 35 | double_quote_char = _{ 36 | "\\" ~ escape_sequence | 37 | line_continuation | 38 | !"\"" ~ char_literal 39 | } 40 | 41 | escape_char = _{ single_escape_char | ASCII_DIGIT | "x" | "u" } 42 | 43 | escape_sequence = _{ 44 | char_escape_sequence | 45 | nul_escape_sequence | 46 | "x" ~ hex_escape_sequence | 47 | "u" ~ unicode_escape_sequence 48 | } 49 | 50 | exponent_part = _{ ^"e" ~ ("+" | "-")? ~ ASCII_DIGIT+ } 51 | 52 | hex_escape_sequence = @{ ASCII_HEX_DIGIT{2} } 53 | 54 | hex_integer_literal = _{ ^"0x" ~ ASCII_HEX_DIGIT+ } 55 | 56 | identifier = ${ identifier_start ~ identifier_part* } 57 | 58 | identifier_part = _{ 59 | identifier_start | 60 | &( 61 | NONSPACING_MARK | 62 | DIACRITIC | // not sure about this, spec says "Combining spacing mark (Mc)" 63 | DECIMAL_NUMBER | 64 | CONNECTOR_PUNCTUATION | 65 | "\u{200C}" | 66 | "\u{200D}" 67 | ) ~ char_literal 68 | } 69 | 70 | identifier_start = _{ 71 | &(unicode_letter | "$" | "_") ~ char_literal | 72 | "\\u" ~ unicode_escape_sequence 73 | } 74 | 75 | key = _{ identifier | string } 76 | 77 | line_continuation = _{ "\\" ~ line_terminator_sequence } 78 | 79 | line_terminator = _{ "\u{000A}" | "\u{000D}" | "\u{2028}" | "\u{2029}" } 80 | 81 | line_terminator_sequence = _{ "\u{000D}" ~ "\u{000A}" | line_terminator } 82 | 83 | non_escape_char = _{ !(escape_char | line_terminator) ~ ANY } 84 | 85 | nul_escape_sequence = @{ "0" } 86 | 87 | null = @{ "null" } 88 | 89 | number = @{ ("+" | "-")? ~ numeric_literal } 90 | 91 | numeric_literal = _{ 92 | hex_integer_literal | 93 | decimal_literal | 94 | "Infinity" | 95 | "NaN" 96 | } 97 | 98 | object = { "{" ~ "}" | "{" ~ pair ~ ("," ~ pair)* ~ ","? ~ "}" } 99 | 100 | pair = { key ~ ":" ~ value } 101 | 102 | single_escape_char = _{ "'" | "\"" | "\\" | "b" | "f" | "n" | "r" | "t" | "v" } 103 | 104 | single_quote_char = _{ 105 | "\\" ~ escape_sequence | 106 | line_continuation | 107 | !"'" ~ char_literal 108 | } 109 | 110 | string = ${ "\"" ~ double_quote_char* ~ "\"" | "'" ~ single_quote_char* ~ "'" } 111 | 112 | text = _{ SOI ~ value ~ EOI } 113 | 114 | unicode_escape_sequence = @{ ASCII_HEX_DIGIT{4} } 115 | 116 | unicode_letter = _{ 117 | UPPERCASE_LETTER | 118 | LOWERCASE_LETTER | 119 | TITLECASE_LETTER | 120 | MODIFIER_LETTER | 121 | OTHER_LETTER | 122 | LETTER_NUMBER 123 | } 124 | 125 | value = _{ null | boolean | string | number | object | array } 126 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | use mlua::{Lua, Result, Table}; 2 | pub mod parser; 3 | pub mod val; 4 | 5 | #[mlua::lua_module] 6 | fn json5(lua: &Lua) -> Result { 7 | let exports = lua.create_table()?; 8 | exports.set("parse", lua.create_function(parser::parse)?)?; 9 | Ok(exports) 10 | } 11 | -------------------------------------------------------------------------------- /src/parser.rs: -------------------------------------------------------------------------------- 1 | use mlua::{Error::ExternalError, IntoLua, Lua, Result, Value as LuaValue}; 2 | use pest::iterators::Pair; 3 | use pest::Parser; 4 | use std::collections::HashMap; 5 | use std::sync::Arc; 6 | 7 | use crate::val::Value; 8 | 9 | #[derive(pest_derive::Parser)] 10 | #[grammar = "json5.pest"] 11 | struct Json5Parser; 12 | 13 | // TODO(Joakker): Make this return a Result instead of a naked String. 14 | fn parse_str(pair: Pair) -> String { 15 | let mut buf = Vec::::with_capacity(pair.as_str().len()); 16 | for p in pair.into_inner() { 17 | match p.as_rule() { 18 | Rule::char_literal => buf.extend(p.as_str().encode_utf16()), 19 | Rule::nul_escape_sequence => buf.push(0), 20 | Rule::char_escape_sequence => match p.as_str() { 21 | "n" => buf.push(0xA), 22 | "r" => buf.push(0xD), 23 | "t" => buf.push(0x9), 24 | "b" => buf.push(0x8), 25 | "v" => buf.push(0xB), 26 | "f" => buf.push(0xC), 27 | k => buf.extend(k.encode_utf16()), 28 | }, 29 | Rule::hex_escape_sequence => { 30 | let s = p.as_str(); 31 | let hex = u8::from_str_radix(s, 16).unwrap_or(0); 32 | buf.push(hex as u16); 33 | } 34 | Rule::unicode_escape_sequence => { 35 | if let Ok(v) = u16::from_str_radix(p.as_str(), 16) { 36 | buf.push(v) 37 | } 38 | } 39 | _ => unreachable!(), 40 | } 41 | } 42 | String::from_utf16_lossy(&buf) 43 | } 44 | 45 | #[test] 46 | fn test_char_espace_sequence() { 47 | let mut pairs = Json5Parser::parse(Rule::string, r#""\t""#).unwrap(); 48 | let s = parse_str(pairs.next().unwrap()); 49 | assert_eq!(s, "\t") 50 | } 51 | 52 | #[test] 53 | fn test_hex_espace_sequence() { 54 | let mut pairs = Json5Parser::parse(Rule::string, r#""\x0A""#).unwrap(); 55 | let s = parse_str(pairs.next().unwrap()); 56 | assert_eq!(s, "\n") 57 | } 58 | 59 | #[test] 60 | fn test_unicode_espace_sequence_surrogate() { 61 | let mut pairs = Json5Parser::parse(Rule::string, r#""\ud834\udd1e""#).unwrap(); 62 | let s = parse_str(pairs.next().unwrap()); 63 | assert_eq!(s, "𝄞") 64 | } 65 | 66 | #[test] 67 | fn test_unicode_espace_sequence() { 68 | let mut pairs = Json5Parser::parse(Rule::string, r#""\u000a""#).unwrap(); 69 | let s = parse_str(pairs.next().unwrap()); 70 | assert_eq!(s, "\n") 71 | } 72 | 73 | fn parse_pair(pair: Pair) -> Value { 74 | match pair.as_rule() { 75 | Rule::array => Value::Array(pair.into_inner().map(parse_pair).collect()), 76 | Rule::null => Value::Null, 77 | Rule::string => Value::String(parse_str(pair)), 78 | Rule::number => Value::Number(pair.as_str().parse().unwrap()), 79 | Rule::boolean => Value::Boolean(pair.as_str().parse().unwrap()), 80 | Rule::object => { 81 | let pairs = pair.into_inner().map(|pair| { 82 | let mut inner_rule = pair.into_inner(); 83 | let name = { 84 | let pair = inner_rule.next().unwrap(); 85 | match pair.as_rule() { 86 | Rule::identifier => pair.as_str().to_string(), 87 | Rule::string => parse_str(pair), 88 | _ => unreachable!(), 89 | } 90 | }; 91 | let value = parse_pair(inner_rule.next().unwrap()); 92 | (name, value) 93 | }); 94 | let mut m = HashMap::with_capacity(pairs.len()); 95 | for (k, v) in pairs { 96 | m.insert(k, v); 97 | } 98 | Value::Object(m) 99 | } 100 | _ => unreachable!(), 101 | } 102 | } 103 | 104 | pub fn parse(lua: &Lua, data: String) -> Result> { 105 | let data = match Json5Parser::parse(Rule::text, data.as_str()) { 106 | Ok(mut data) => data.next().unwrap(), 107 | Err(err) => return Err(ExternalError(Arc::new(err))), 108 | }; 109 | parse_pair(data).into_lua(lua) 110 | } 111 | -------------------------------------------------------------------------------- /src/val.rs: -------------------------------------------------------------------------------- 1 | use mlua::{Lua, Nil, Result, IntoLua, Value as LuaValue}; 2 | use std::collections::HashMap; 3 | 4 | pub enum Value { 5 | Null, 6 | Array(Vec), 7 | Object(HashMap), 8 | String(String), 9 | Number(f64), 10 | Boolean(bool), 11 | } 12 | 13 | impl<'lua> IntoLua<'lua> for Value { 14 | fn into_lua(self, lua: &'lua Lua) -> Result> { 15 | match self { 16 | Self::Null => Ok(Nil), 17 | Self::Array(a) => a.into_lua(lua), 18 | Self::String(s) => s.into_lua(lua), 19 | Self::Number(n) => n.into_lua(lua), 20 | Self::Boolean(b) => b.into_lua(lua), 21 | Self::Object(o) => o.into_lua(lua), 22 | } 23 | } 24 | } 25 | --------------------------------------------------------------------------------