├── Setup.hs ├── test ├── Spec.hs ├── doctests.hs ├── UnescapeSpec.hs └── ParserSpec.hs ├── .gitignore ├── Data └── JsonStream │ ├── CLexType.hsc │ ├── Conduit.hs │ ├── TokenParser.hs │ ├── CLexer.hs │ └── Unescape.hs ├── benchmarks ├── json-data │ ├── twitter1.json │ ├── example.json │ ├── integers.json │ ├── twitter10.json │ ├── twitter20.json │ ├── numbers.json │ ├── jp10.json │ └── twitter50.json ├── AesonParse.hs ├── JStreamParse.hs ├── aeson-benchmarks.cabal ├── JStreamParseInt.hs ├── bench-parse.py └── JStreamParseObj.hs ├── c_lib ├── lexer.h ├── unescape_string.c └── lexer.c ├── .travis.yml ├── LICENSE ├── changelog.md ├── json-stream.cabal └── README.md /Setup.hs: -------------------------------------------------------------------------------- 1 | import Distribution.Simple 2 | main = defaultMain 3 | -------------------------------------------------------------------------------- /test/Spec.hs: -------------------------------------------------------------------------------- 1 | {-# OPTIONS_GHC -F -pgmF hspec-discover #-} 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | dist/ 2 | benchmarks/dist 3 | cabal.sandbox.config 4 | .cabal-sandbox/ 5 | *.o 6 | *.hi 7 | Data/JsonStream/CLexType.hs 8 | Data/JsonStream/CLexType_out.hs 9 | -------------------------------------------------------------------------------- /test/doctests.hs: -------------------------------------------------------------------------------- 1 | module Main where 2 | 3 | import Test.DocTest 4 | 5 | main :: IO () 6 | main = doctest [ "-idist/build", "dist/build/doctest/doctest-tmp/c_lib/lexer.o", "dist/build/doctest/doctest-tmp/c_lib/unescape_string.o", "Data/JsonStream/Parser.hs"] 7 | -------------------------------------------------------------------------------- /Data/JsonStream/CLexType.hsc: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE GeneralizedNewtypeDeriving #-} 2 | module Data.JsonStream.CLexType where 3 | 4 | import Foreign.C.Types 5 | import Foreign 6 | 7 | newtype LexResultType = LexResultType CInt deriving (Show, Eq, Storable) 8 | 9 | #include "lexer.h" 10 | 11 | #{enum LexResultType, LexResultType 12 | , resNumber = RES_NUMBER 13 | , resString = RES_STRING 14 | , resTrue = RES_TRUE 15 | , resFalse = RES_FALSE 16 | , resNull = RES_NULL 17 | 18 | , resOpenBrace = RES_OPEN_BRACE 19 | , resCloseBrace = RES_CLOSE_BRACE 20 | , resOpenBracket = RES_OPEN_BRACKET 21 | , resCloseBracket = RES_CLOSE_BRACKET 22 | 23 | , resStringPartial = RES_STRING_PARTIAL 24 | , resNumberPartial = RES_NUMBER_PARTIAL 25 | , resNumberSmall = RES_NUMBER_SMALL 26 | } 27 | -------------------------------------------------------------------------------- /benchmarks/json-data/twitter1.json: -------------------------------------------------------------------------------- 1 | {"results":[{"from_user_id_str":"80430860","profile_image_url":"http://a2.twimg.com/profile_images/536455139/icon32_normal.png","created_at":"Wed, 26 Jan 2011 07:07:02 +0000","from_user":"kazu_yamamoto","id_str":"30159761706061824","metadata":{"result_type":"recent"},"to_user_id":null,"text":"Haskell Server Pages \u3063\u3066\u3001\u307e\u3060\u7d9a\u3044\u3066\u3044\u305f\u306e\u304b\uff01","id":30159761706061824,"from_user_id":80430860,"geo":null,"iso_language_code":"no","to_user_id_str":null,"source":"<a href="http://twitter.com/">web</a>"}],"max_id":30159761706061824,"since_id":0,"refresh_url":"?since_id=30159761706061824&q=haskell","next_page":"?page=2&max_id=30159761706061824&rpp=1&q=haskell","results_per_page":1,"page":1,"completed_in":0.012606,"since_id_str":"0","max_id_str":"30159761706061824","query":"haskell"} -------------------------------------------------------------------------------- /Data/JsonStream/Conduit.hs: -------------------------------------------------------------------------------- 1 | -- | 2 | -- Module : Data.JsonStream.Conduit 3 | -- License : BSD-style 4 | -- 5 | -- Stability : experimental 6 | -- Portability : portable 7 | -- 8 | -- Use "Data.JsonStream.Parser" parsers in "Data.Conduit". 9 | 10 | module Data.JsonStream.Conduit ( 11 | parserConduit 12 | ) where 13 | 14 | import Data.ByteString (ByteString) 15 | import qualified Data.Conduit.Internal as C 16 | 17 | import Data.JsonStream.Parser 18 | 19 | -- |Use a 'Parser' as a conduit from 'ByteString' input chunks to results, finally returning any parse error or 'Nothing' on success. 20 | parserConduit :: Parser a -> C.ConduitT ByteString a m (Maybe String) 21 | parserConduit ps = C.ConduitT (parsePipe $ runParser ps) where 22 | parsePipe (ParseYield a p) r = C.HaveOutput (parsePipe p r) a 23 | parsePipe (ParseNeedData f) r = C.NeedInput (\i -> parsePipe (f i) r) (\() -> r (Just "Incomplete JSON")) 24 | parsePipe (ParseFailed e) r = r (Just e) 25 | parsePipe (ParseDone l) r = C.Leftover (r Nothing) l 26 | -------------------------------------------------------------------------------- /benchmarks/AesonParse.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE BangPatterns, OverloadedStrings #-} 2 | {-# LANGUAGE ScopedTypeVariables #-} 3 | 4 | import Control.Exception 5 | import Control.Monad 6 | import Data.Aeson 7 | import Data.Time.Clock 8 | import System.Environment (getArgs) 9 | import System.IO 10 | import qualified Data.ByteString as B 11 | import Control.DeepSeq (deepseq) 12 | 13 | main :: IO () 14 | main = do 15 | (bs:cnt:args) <- getArgs 16 | let count = read cnt :: Int 17 | forM_ args $ \arg -> withFile arg ReadMode $ \h -> do 18 | putStrLn $ arg ++ ":" 19 | start <- getCurrentTime 20 | let loop !good !bad 21 | | good+bad >= count = return (good, bad) 22 | | otherwise = do 23 | hSeek h AbsoluteSeek 0 24 | content <- B.hGet h 4200000 25 | let result = decodeStrict' content 26 | case result `deepseq` result of 27 | Just (_ :: Value) -> loop (good+1) bad 28 | Nothing -> loop good (bad+1) 29 | (good, _) <- loop 0 0 30 | delta <- flip diffUTCTime start `fmap` getCurrentTime 31 | putStrLn $ " " ++ show good ++ " good, " ++ show delta 32 | let rate = fromIntegral count / realToFrac delta :: Double 33 | putStrLn $ " " ++ show (round rate :: Int) ++ " per second" 34 | -------------------------------------------------------------------------------- /Data/JsonStream/TokenParser.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | 3 | module Data.JsonStream.TokenParser ( 4 | Element(..) 5 | , TokenResult(..) 6 | ) where 7 | 8 | import qualified Data.Aeson as AE 9 | import qualified Data.ByteString.Char8 as BS 10 | import Foreign.C.Types 11 | 12 | data Element = 13 | ArrayBegin 14 | | ArrayEnd !BS.ByteString -- Rest of the source string for correct (ParseDone ) 15 | | ObjectBegin 16 | | ObjectEnd !BS.ByteString -- Rest of the source string for correct (ParseDone ) 17 | | StringContent !BS.ByteString 18 | | StringRaw !BS.ByteString !Bool !BS.ByteString -- Allow raw strings to go into parser as bytestring/ isAscii 19 | | StringEnd !BS.ByteString 20 | | JValue !AE.Value 21 | | JInteger !CLong 22 | deriving (Show, Eq) 23 | 24 | -- | Public interface for parsing JSON tokens. 25 | data TokenResult = TokMoreData (BS.ByteString -> TokenResult) 26 | | PartialResult Element TokenResult 27 | -- ^ found element, continuation, actual parsing view - so that we can report the unparsed 28 | -- data when the parsing finishes. 29 | | TokFailed 30 | 31 | -- For debugging purposes 32 | instance Show TokenResult where 33 | show (TokMoreData _) = "TokMoreData" 34 | show TokFailed = "TokFailed" 35 | show (PartialResult el next) = "(PartialResult (" ++ show el ++ ") " ++ show next ++ ")" 36 | -------------------------------------------------------------------------------- /c_lib/lexer.h: -------------------------------------------------------------------------------- 1 | #ifndef _LEXER_H_ 2 | #define _LEXER_H_ 3 | 4 | #define RES_NUMBER 0 5 | #define RES_STRING 1 /* Add-data: 0 - single string, 1 - final part of partials */ 6 | #define RES_TRUE 2 7 | #define RES_FALSE 3 8 | #define RES_NULL 4 9 | 10 | #define RES_OPEN_BRACE 5 11 | #define RES_CLOSE_BRACE 6 12 | #define RES_OPEN_BRACKET 7 13 | #define RES_CLOSE_BRACKET 8 14 | 15 | #define RES_STRING_PARTIAL 9 /* Add-data: 0 - first part, 1 - other parts */ 16 | #define RES_NUMBER_PARTIAL 10 /* Add-data: 0 - first part, 1 - other parts */ 17 | #define RES_NUMBER_SMALL 12 18 | 19 | enum states { 20 | STATE_BASE = 0, 21 | STATE_STRING, 22 | STATE_NUMBER, 23 | STATE_TRUE, 24 | STATE_FALSE, 25 | STATE_NULL 26 | }; 27 | 28 | struct lexer_result { 29 | int restype; 30 | int startpos; // Startpos + length should point to unparsed data for } and ] 31 | int length; 32 | int _padding; // 64-bit architectures will align adddata to 8-bytes anyway, make it explicit and hope for best 33 | 34 | long adddata; // Additional data to result 35 | }; 36 | 37 | struct lexer { 38 | int current_state; 39 | int state_data; 40 | int state_data_2; 41 | 42 | int position; 43 | int length; 44 | 45 | int result_num; 46 | int result_limit; 47 | }; 48 | 49 | #define LEX_OK 0 50 | #define LEX_YIELD 1 51 | #define LEX_ERROR 2 52 | 53 | extern int lex_json(const char *input, struct lexer *lexer, struct lexer_result *result); 54 | 55 | 56 | #endif 57 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: c 2 | sudo: false 3 | matrix: 4 | include: 5 | - env: CABALVER=1.18 GHCVER=7.8.4 6 | addons: {apt: {packages: [cabal-install-1.18,ghc-7.8.4], sources: [hvr-ghc]}} 7 | - env: CABALVER=1.22 GHCVER=7.10.1 8 | addons: {apt: {packages: [cabal-install-1.22,ghc-7.10.1],sources: [hvr-ghc]}} 9 | - env: CABALVER=1.24 GHCVER=8.0.1 10 | addons: {apt: {packages: [cabal-install-1.24,ghc-8.0.1], sources: [hvr-ghc]}} 11 | - env: CABALVER=1.24 GHCVER=8.2.1 12 | addons: {apt: {packages: [cabal-install-1.24,ghc-8.2.1], sources: [hvr-ghc]}} 13 | - env: CABALVER=2.2 GHCVER=8.4.2 14 | addons: {apt: {packages: [cabal-install-2.2,ghc-8.4.2], sources: [hvr-ghc]}} 15 | - env: CABALVER=2.4 GHCVER=8.6.1 16 | addons: {apt: {packages: [cabal-install-2.4,ghc-8.6.1], sources: [hvr-ghc]}} 17 | - env: CABALVER=head GHCVER=head 18 | addons: {apt: {packages: [cabal-install-head,ghc-head], sources: [hvr-ghc]}} 19 | allow_failures: 20 | - env: CABALVER=head GHCVER=head 21 | 22 | before_install: 23 | - export PATH=/opt/ghc/$GHCVER/bin:/opt/cabal/$CABALVER/bin:$PATH 24 | - ghc --version 25 | - cabal --version 26 | 27 | install: 28 | - travis_retry cabal update 29 | - sed -i 's/^jobs:/-- jobs:/' ${HOME}/.cabal/config 30 | - cabal install --only-dependencies --enable-tests 31 | 32 | script: 33 | - cabal configure --enable-tests 34 | - cabal build 35 | - cabal test 36 | - cabal check 37 | - cabal sdist 38 | 39 | notifications: 40 | email: true 41 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015, Ondrej Palkovsky 2 | 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 11 | * Redistributions in binary form must reproduce the above 12 | copyright notice, this list of conditions and the following 13 | disclaimer in the documentation and/or other materials provided 14 | with the distribution. 15 | 16 | * Neither the name of Ondrej Palkovsky nor the names of other 17 | contributors may be used to endorse or promote products derived 18 | from this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | -------------------------------------------------------------------------------- /benchmarks/JStreamParse.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE BangPatterns, OverloadedStrings #-} 2 | {-# LANGUAGE ScopedTypeVariables #-} 3 | 4 | import Control.Exception 5 | import Control.Monad 6 | import Data.Time.Clock 7 | import System.Environment (getArgs) 8 | import System.IO 9 | import qualified Data.ByteString as B 10 | import Data.JsonStream.Parser 11 | import Data.Aeson (Value) 12 | 13 | parseWith :: IO B.ByteString -> Parser a -> B.ByteString -> IO [a] 14 | parseWith refill scheme inp = do 15 | let pout = runParser' scheme inp 16 | doparse pout [] 17 | where 18 | doparse (ParseDone _) acc = return acc 19 | doparse (ParseFailed err) _ = return [] 20 | doparse (ParseYield v next) acc = doparse next (v:acc) 21 | doparse (ParseNeedData cont) acc = do 22 | dta <- refill 23 | doparse (cont dta) acc 24 | 25 | main :: IO () 26 | main = do 27 | (bs:cnt:args) <- getArgs 28 | let count = read cnt :: Int 29 | forM_ args $ \arg -> withFile arg ReadMode $ \h -> do 30 | putStrLn $ arg ++ ":" 31 | start <- getCurrentTime 32 | let loop !good !bad 33 | | good+bad >= count = return (good, bad) 34 | | otherwise = do 35 | hSeek h AbsoluteSeek 0 36 | content <- B.hGet h 4200000 37 | let result = decodeStrict content 38 | case result of 39 | Just (_ :: Value) -> loop (good+1) bad 40 | Nothing -> loop good (bad+1) 41 | (good, _) <- loop 0 0 42 | delta <- flip diffUTCTime start `fmap` getCurrentTime 43 | putStrLn $ " " ++ show good ++ " good, " ++ show delta 44 | let rate = fromIntegral count / realToFrac delta :: Double 45 | putStrLn $ " " ++ show (round rate :: Int) ++ " per second" 46 | -------------------------------------------------------------------------------- /benchmarks/aeson-benchmarks.cabal: -------------------------------------------------------------------------------- 1 | name: aeson-benchmarks 2 | version: 0 3 | build-type: Simple 4 | 5 | cabal-version: >=1.8 6 | 7 | library 8 | hs-source-dirs: .. 9 | c-sources: ../c_lib/lexer.c, ../c_lib/unescape_string.c 10 | includes: ../c_lib/lexer.h 11 | include-dirs: ../c_lib 12 | 13 | exposed-modules: 14 | Data.JsonStream.Parser 15 | 16 | other-modules: 17 | Data.JsonStream.TokenParser 18 | Data.JsonStream.CLexType 19 | Data.JsonStream.CLexer 20 | Data.JsonStream.Unescape 21 | 22 | build-depends: base >=4.7 && <5 23 | , bytestring 24 | , text 25 | , aeson (>=2.2) 26 | , vector 27 | , unordered-containers 28 | , scientific 29 | , primitive 30 | , containers 31 | , deepseq 32 | 33 | ghc-options: -O2 -Wall 34 | cc-options: -O2 -Wall 35 | 36 | executable aeson-benchmark-jstream-parse 37 | main-is: JStreamParse.hs 38 | ghc-options: -Wall -O2 -rtsopts 39 | build-depends: 40 | aeson-benchmarks, 41 | aeson (>=0.7), 42 | base, 43 | bytestring, 44 | time 45 | 46 | executable aeson-benchmark-aeson-parse 47 | main-is: AesonParse.hs 48 | ghc-options: -Wall -O2 -rtsopts 49 | build-depends: 50 | aeson-benchmarks, 51 | attoparsec, 52 | base, 53 | bytestring, 54 | time, 55 | aeson (>=0.9), 56 | deepseq 57 | 58 | executable aeson-benchmark-fastobj 59 | main-is: JStreamParseObj.hs 60 | ghc-options: -Wall -O2 -rtsopts 61 | build-depends: 62 | aeson-benchmarks, 63 | aeson (>=0.7), 64 | base, 65 | bytestring, 66 | time, 67 | text 68 | -------------------------------------------------------------------------------- /benchmarks/JStreamParseInt.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE BangPatterns, OverloadedStrings #-} 2 | 3 | import Control.Exception 4 | import Control.Monad 5 | import Data.Time.Clock 6 | import System.Environment (getArgs) 7 | import System.IO 8 | import qualified Data.ByteString as B 9 | import Data.JsonStream.Parser 10 | import Data.Aeson.Types (Value(..)) 11 | import Control.Applicative (many) 12 | import Data.Word 13 | 14 | parseWith :: IO B.ByteString -> Parser a -> B.ByteString -> IO [a] 15 | parseWith refill scheme inp = do 16 | let pout = runParser' scheme inp 17 | doparse pout [] 18 | where 19 | doparse (ParseDone _) acc = return acc 20 | doparse (ParseFailed err) _ = return [] 21 | doparse (ParseYield v next) acc = doparse next (v:acc) 22 | doparse (ParseNeedData cont) acc = do 23 | dta <- refill 24 | doparse (cont dta) acc 25 | 26 | main :: IO () 27 | main = do 28 | (bs:cnt:args) <- getArgs 29 | let count = read cnt :: Int 30 | blkSize = read bs 31 | forM_ args $ \arg -> bracket (openFile arg ReadMode) hClose $ \h -> do 32 | putStrLn $ arg ++ ":" 33 | start <- getCurrentTime 34 | let loop !good !bad 35 | | good+bad >= count = return (good, bad) 36 | | otherwise = do 37 | hSeek h AbsoluteSeek 0 38 | let refill = B.hGet h blkSize 39 | result <- parseWith refill (many (arrayOf integer) :: Parser [Word32]) =<< refill 40 | case (result) of 41 | [] -> loop good (bad+1) 42 | _ -> loop (good+1) bad 43 | (good, _) <- loop 0 0 44 | delta <- flip diffUTCTime start `fmap` getCurrentTime 45 | putStrLn $ " " ++ show good ++ " good, " ++ show delta 46 | let rate = fromIntegral count / realToFrac delta :: Double 47 | putStrLn $ " " ++ show (round rate :: Int) ++ " per second" 48 | -------------------------------------------------------------------------------- /benchmarks/bench-parse.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os, re, subprocess, sys 4 | 5 | result_re = re.compile(r'^\s*(\d+) good, (\d+\.\d+)s$', re.M) 6 | 7 | if len(sys.argv) > 1: 8 | parser_exe = sys.argv[1] 9 | else: 10 | parser_exe = 'aeson-benchmark-aeson-parse' 11 | 12 | def run(count, filename): 13 | print(' %s :: %s times' % (filename, count)) 14 | p = subprocess.Popen(['cabal', 'exec', parser_exe, '65536', str(count), filename], 15 | stdout=subprocess.PIPE) 16 | output = p.stdout.read().decode('utf8') 17 | p.wait() 18 | m = result_re.search(output) 19 | if not m: 20 | print >> sys.stderr, 'run gave confusing output!?' 21 | sys.stderr.write(output) 22 | return 23 | else: 24 | #sys.stdout.write(output) 25 | pass 26 | good, elapsed = m.groups() 27 | good, elapsed = int(good), float(elapsed) 28 | st = os.stat(filename) 29 | parses_per_second = good / elapsed 30 | mb_per_second = st.st_size * parses_per_second / 1048576 31 | print (' %.3f seconds, %d parses/sec, %.3f MB/sec' % 32 | (elapsed, parses_per_second, mb_per_second)) 33 | return parses_per_second, mb_per_second, st.st_size, elapsed 34 | 35 | def runtimes(count, filename, times=1): 36 | for i in range(times): 37 | yield run(count, filename) 38 | 39 | info = ''' 40 | json-data/buffer-builder.json 500 41 | json-data/example.json 18000 42 | json-data/integers.json 4000 43 | json-data/geometry.json 500 44 | json-data/numbers.json 3000 45 | json-data/sigma.json 60 46 | json-data/twitter1.json 60000 47 | json-data/twitter10.json 13000 48 | json-data/twitter20.json 7500 49 | json-data/twitter50.json 2500 50 | json-data/twitter100.json 1000 51 | json-data/jp10.json 4000 52 | json-data/jp50.json 1200 53 | json-data/jp100.json 700 54 | ''' 55 | 56 | for i in info.strip().splitlines(): 57 | name, count = i.split() 58 | best = sorted(runtimes(int(count), name, times=3), reverse=True)[0] 59 | parses_per_second, mb_per_second, size, elapsed = best 60 | print ('%.1f KB: %d msg\\/sec (%.1f MB\\/sec)' % 61 | (size / 1024.0, int(round(parses_per_second)), mb_per_second)) 62 | -------------------------------------------------------------------------------- /changelog.md: -------------------------------------------------------------------------------- 1 | # 0.4.6.0 2 | 3 | - Show instance for ParseOutput 4 | - Correctly return remaining data for direct String parser (doesn't return correct data for direct Number/Bool/Null parser) 5 | 6 | # 0.4.5.3 7 | 8 | - lifted upper bounds on aeson 9 | 10 | # 0.4.5.2 11 | 12 | - user unsafeCoerce to existential wrapper 13 | - fix bad parsing with .| on object 14 | 15 | # 0.4.5.1 16 | 17 | - fixed testing suite 18 | 19 | # 0.4.5.0 20 | 21 | - objectOf parser for faster one-pass JSON object parsing 22 | - set minimum base to 4.11 23 | 24 | # 0.4.4.2 25 | 26 | - aeson-2.1 27 | 28 | # 0.4.4.1 29 | 30 | - added objectKeyValues (Dylan Simon) 31 | - optimization for reading ASCII strings 32 | 33 | 34 | # 0.4.4.0 35 | 36 | - added text 2.0 compatibility 37 | - added conduit interface behind a flag (Dylan Simon) 38 | - added manyReverse (Dylan Simon) 39 | - added valueWith (Dylan Simon) 40 | 41 | # 0.4.3.0 42 | 43 | - Aeson 2.0 compatibility 44 | - Added support for raw bytestring 45 | 46 | # 0.4.2.4 47 | 48 | Fix compiling with new ghc. 49 | 50 | # 0.4.2.3 51 | 52 | Fix 32-bit number parsing. 53 | 54 | # 0.4.2.2 55 | 56 | Speed optimization of `many` and aeson object. 57 | 58 | # 0.4.2.0 59 | Added Semigroup instance, compatibility with base-4.11 60 | 61 | # 0.4.1.5 62 | Renamed `_js_decode_string` function to avoid conflict with `aeson`. 63 | 64 | # 0.4.1.4 65 | Added support for GHC 8.2. 66 | 67 | # 0.4.1.3 68 | Fix windows build. 69 | 70 | # 0.4.1.2 71 | Slightly more strictness in arrayOf. 72 | 73 | # 0.4.1.1 74 | Fixed memory leak in arrayOf. 75 | 76 | # 0.4.1.0 77 | Added aeson-compatibile encode/decode functions. 78 | 79 | # 0.4.0.0 80 | Breaking changes (this could *really* break your code): 81 | - Changed `<|>` to `<>` (`Monoid` is better for 'appending' than `Alternative`) 82 | - Changed `>^>` to `<|>` - (`Alternative` now really means alternative) 83 | - Changed `toList` to `many` (Use existing `Alternative` function instead of a custom one) 84 | - Added `some` function (Alternative, default implementation won't work) 85 | - C-lexer now supports parsing numbers up to 18 digits (E-notation is not optimized yet) 86 | 87 | # 0.3.2.3 88 | - Completely rewritten text unescapes based on text decodeUtf8; fixes some surprising crashes, speed improvements. 89 | 90 | # 0.3.2.0 91 | - Changed string parsing; parsing of escaped strings is now very fast 92 | - Removed bytestring parser 93 | 94 | # 0.3.0.4 95 | - Fixed bug in safestring 96 | - Fixed test so it doesn't depend on versions of other packages 97 | - Added sax-like parsers 98 | 99 | # 0.3.0.3 100 | - Fixed wrong size of C structure in FFI that was causing a segfault. 101 | -------------------------------------------------------------------------------- /benchmarks/JStreamParseObj.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE BangPatterns, OverloadedStrings #-} 2 | 3 | import Control.Exception 4 | import Control.Monad 5 | import Data.Time.Clock 6 | import System.Environment (getArgs) 7 | import System.IO 8 | import qualified Data.ByteString as B 9 | import Data.JsonStream.Parser 10 | import Data.Aeson.Types (Value(..)) 11 | import Control.Applicative (many) 12 | import Data.Word 13 | import Data.JsonStream.Parser (integer, objectOf, value) 14 | import qualified Data.Text as T 15 | import Data.Aeson (FromJSON, withObject) 16 | import qualified Data.Aeson as AE 17 | 18 | parseWith :: IO B.ByteString -> Parser a -> B.ByteString -> IO [a] 19 | parseWith refill scheme inp = do 20 | let pout = runParser' scheme inp 21 | doparse pout [] 22 | where 23 | doparse (ParseDone _) acc = return acc 24 | doparse (ParseFailed err) _ = return [] 25 | doparse (ParseYield v next) acc = doparse next (v:acc) 26 | doparse (ParseNeedData cont) acc = do 27 | dta <- refill 28 | doparse (cont dta) acc 29 | 30 | data TestObj = TestObj T.Text T.Text Int T.Text T.Text Int 31 | 32 | instance FromJSON TestObj where 33 | parseJSON = withObject "obj" $ \o -> 34 | TestObj <$> (AE..:) o "guid" 35 | <*> (AE..:) o "picture" 36 | <*> (AE..:) o "age" 37 | <*> (AE..:) o "about" 38 | <*> (AE..:) o "phone" 39 | <*> (AE..:) o "index" 40 | 41 | -- cabal run -- aeson-benchmark-fastobj 65536 2000 json-data/buffer-builder.json 42 | 43 | main :: IO () 44 | main = do 45 | (bs:cnt:args) <- getArgs 46 | let count = read cnt :: Int 47 | blkSize = read bs 48 | forM_ args $ \arg -> bracket (openFile arg ReadMode) hClose $ \h -> do 49 | putStrLn $ arg ++ ":" 50 | start <- getCurrentTime 51 | let loop !good !bad 52 | | good+bad >= count = return (good, bad) 53 | | otherwise = do 54 | hSeek h AbsoluteSeek 0 55 | let refill = B.hGet h blkSize 56 | 57 | let obj1 = TestObj <$> "guid" .: string <*> "picture" .: string <*> "age" .: integer <*> "about" .: string <*> "phone" .: string <*> "index" .: integer :: Parser TestObj 58 | let obj2 = objectOf $ TestObj <$> "guid" .: string <*> "picture" .: string <*> "age" .: integer <*> "about" .: string <*> "phone" .: string <*> "index" .: integer :: Parser TestObj 59 | let obj3 = value :: Parser TestObj 60 | 61 | let parser3 = arrayOf obj2 :: Parser TestObj 62 | 63 | result <- parseWith refill parser3 =<< refill 64 | case result of 65 | [] -> loop good (bad+1) 66 | _ -> loop (good+1) bad 67 | (good, _) <- loop 0 0 68 | delta <- flip diffUTCTime start `fmap` getCurrentTime 69 | putStrLn $ " " ++ show good ++ " good, " ++ show delta 70 | let rate = fromIntegral count / realToFrac delta :: Double 71 | putStrLn $ " " ++ show (round rate :: Int) ++ " per second" 72 | -------------------------------------------------------------------------------- /benchmarks/json-data/example.json: -------------------------------------------------------------------------------- 1 | {"web-app": { 2 | "servlet": [ 3 | { 4 | "servlet-name": "cofaxCDS", 5 | "servlet-class": "org.cofax.cds.CDSServlet", 6 | "init-param": { 7 | "configGlossary:installationAt": "Philadelphia, PA", 8 | "configGlossary:adminEmail": "ksm@pobox.com", 9 | "configGlossary:poweredBy": "Cofax", 10 | "configGlossary:poweredByIcon": "/images/cofax.gif", 11 | "configGlossary:staticPath": "/content/static", 12 | "templateProcessorClass": "org.cofax.WysiwygTemplate", 13 | "templateLoaderClass": "org.cofax.FilesTemplateLoader", 14 | "templatePath": "templates", 15 | "templateOverridePath": "", 16 | "defaultListTemplate": "listTemplate.htm", 17 | "defaultFileTemplate": "articleTemplate.htm", 18 | "useJSP": false, 19 | "jspListTemplate": "listTemplate.jsp", 20 | "jspFileTemplate": "articleTemplate.jsp", 21 | "cachePackageTagsTrack": 200, 22 | "cachePackageTagsStore": 200, 23 | "cachePackageTagsRefresh": 60, 24 | "cacheTemplatesTrack": 100, 25 | "cacheTemplatesStore": 50, 26 | "cacheTemplatesRefresh": 15, 27 | "cachePagesTrack": 200, 28 | "cachePagesStore": 100, 29 | "cachePagesRefresh": 10, 30 | "cachePagesDirtyRead": 10, 31 | "searchEngineListTemplate": "forSearchEnginesList.htm", 32 | "searchEngineFileTemplate": "forSearchEngines.htm", 33 | "searchEngineRobotsDb": "WEB-INF/robots.db", 34 | "useDataStore": true, 35 | "dataStoreClass": "org.cofax.SqlDataStore", 36 | "redirectionClass": "org.cofax.SqlRedirection", 37 | "dataStoreName": "cofax", 38 | "dataStoreDriver": "com.microsoft.jdbc.sqlserver.SQLServerDriver", 39 | "dataStoreUrl": "jdbc:microsoft:sqlserver://LOCALHOST:1433;DatabaseName=goon", 40 | "dataStoreUser": "sa", 41 | "dataStorePassword": "dataStoreTestQuery", 42 | "dataStoreTestQuery": "SET NOCOUNT ON;select test='test';", 43 | "dataStoreLogFile": "/usr/local/tomcat/logs/datastore.log", 44 | "dataStoreInitConns": 10, 45 | "dataStoreMaxConns": 100, 46 | "dataStoreConnUsageLimit": 100, 47 | "dataStoreLogLevel": "debug", 48 | "maxUrlLength": 500}}, 49 | { 50 | "servlet-name": "cofaxEmail", 51 | "servlet-class": "org.cofax.cds.EmailServlet", 52 | "init-param": { 53 | "mailHost": "mail1", 54 | "mailHostOverride": "mail2"}}, 55 | { 56 | "servlet-name": "cofaxAdmin", 57 | "servlet-class": "org.cofax.cds.AdminServlet"}, 58 | 59 | { 60 | "servlet-name": "fileServlet", 61 | "servlet-class": "org.cofax.cds.FileServlet"}, 62 | { 63 | "servlet-name": "cofaxTools", 64 | "servlet-class": "org.cofax.cms.CofaxToolsServlet", 65 | "init-param": { 66 | "templatePath": "toolstemplates/", 67 | "log": 1, 68 | "logLocation": "/usr/local/tomcat/logs/CofaxTools.log", 69 | "logMaxSize": "", 70 | "dataLog": 1, 71 | "dataLogLocation": "/usr/local/tomcat/logs/dataLog.log", 72 | "dataLogMaxSize": "", 73 | "removePageCache": "/content/admin/remove?cache=pages&id=", 74 | "removeTemplateCache": "/content/admin/remove?cache=templates&id=", 75 | "fileTransferFolder": "/usr/local/tomcat/webapps/content/fileTransferFolder", 76 | "lookInContext": 1, 77 | "adminGroupID": 4, 78 | "betaServer": true}}], 79 | "servlet-mapping": { 80 | "cofaxCDS": "/", 81 | "cofaxEmail": "/cofaxutil/aemail/*", 82 | "cofaxAdmin": "/admin/*", 83 | "fileServlet": "/static/*", 84 | "cofaxTools": "/tools/*"}, 85 | 86 | "taglib": { 87 | "taglib-uri": "cofax.tld", 88 | "taglib-location": "/WEB-INF/tlds/cofax.tld"}}} 89 | -------------------------------------------------------------------------------- /test/UnescapeSpec.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | 3 | module UnescapeSpec where 4 | 5 | import Control.Applicative ((<$>)) 6 | import qualified Data.Aeson as AE 7 | import qualified Data.ByteString as BS 8 | import qualified Data.ByteString.Char8 as BSC 9 | import qualified Data.ByteString.Lazy as BSL 10 | import Data.Either (isLeft) 11 | import qualified Data.Text as T 12 | import Data.Text.Encoding (encodeUtf16BE, encodeUtf8) 13 | import Numeric (showHex) 14 | 15 | import Data.JsonStream.Unescape 16 | import Test.Hspec 17 | import Test.QuickCheck 18 | import qualified Test.QuickCheck.Unicode as QUNI 19 | 20 | spec :: Spec 21 | spec = do 22 | describe "Correctly converts correct data" $ do 23 | it "Converts empty string" $ 24 | unescapeText "" `shouldBe` Right "" 25 | it "Converts ascii text" $ 26 | unescapeText "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" `shouldBe` Right "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" 27 | it "Converts UTF-8 data" $ do 28 | let txt = "žluťoučký kůň úpěl ďábelské kódy" :: T.Text 29 | unescapeText (encodeUtf8 txt) `shouldBe` Right txt 30 | it "Converts backslash chars" $ 31 | unescapeText "abc\\b\\n\\r\\t\\\\\\/\\\"a konec" `shouldBe` Right "abc\b\n\r\t\\/\"a konec" 32 | it "Converts surrogate chars" $ 33 | unescapeText "\\ud801\\udc37" `shouldBe` Right "𐐷" 34 | 35 | describe "Fails on incorrect JSON backslashing" $ do 36 | it "Fails on backslash at end of string" $ 37 | unescapeText "aaa\\" `shouldSatisfy` isLeft 38 | it "Fails on incorrect backslash char" $ 39 | unescapeText "asfdjfk\\xyyy" `shouldSatisfy` isLeft 40 | it "Fails on incomplete unicode" $ 41 | unescapeText "aaa\\u1" `shouldSatisfy` isLeft 42 | it "Fails on incomplete unicode 2" $ 43 | unescapeText "aaa\\u1Xyz" `shouldSatisfy` isLeft 44 | 45 | it "Fails on unexpected lower surrogate" $ 46 | unescapeText "\\udc37\\ud801" `shouldSatisfy` isLeft 47 | it "Fails on uncompleted surrogate" $ 48 | unescapeText "\\ud801" `shouldSatisfy` isLeft 49 | it "Fails on uncompleted surrogate 2" $ 50 | unescapeText "\\ud801a" `shouldSatisfy` isLeft 51 | it "Fails on uncompleted surrogate 3" $ 52 | unescapeText "\\ud801\\u0012" `shouldSatisfy` isLeft 53 | describe "Fails on incorrect data UTF8" $ 54 | it "Fails on bad utf-8" $ do 55 | let txt = "žluťoučký kůň úpěl ďábelské kódy" :: T.Text 56 | unescapeText (BS.drop 1 $ encodeUtf8 txt) `shouldSatisfy` isLeft 57 | 58 | describe "It correctly decodes aeson encoded string" $ do 59 | it "QuickCheck with aeson encode - standard UTF8" $ do 60 | let check txt = 61 | let encoded = BS.init $ BS.tail (BSL.toStrict $ AE.encode (AE.String txt)) 62 | in unescapeText encoded `shouldBe` Right txt 63 | deepCheck check 64 | it "QuickCheck with aeson encode - \\u encoded data" $ do 65 | let check txt = -- Convert everything to \\uXXXX notation 66 | let u16chars = BS.concat $ map ((BS.append "\\u" . btohex) . BS.take 2) 67 | $ take (BS.length u16 `div` 2) 68 | $ iterate (BS.drop 2) u16 69 | u16 = encodeUtf16BE txt 70 | btohex cb = BSC.pack $ concatMap tohex $ BS.unpack cb 71 | tohex c 72 | | c < 16 = "0" ++ showHex c "" 73 | | otherwise = showHex c "" 74 | in unescapeText u16chars `shouldBe` Right txt 75 | deepCheck check 76 | 77 | deepCheck :: (T.Text -> Expectation) -> IO () 78 | deepCheck = quickCheckWith (stdArgs { maxSuccess = 10000}) 79 | 80 | instance Arbitrary T.Text where 81 | arbitrary = T.pack <$> QUNI.string 82 | 83 | main :: IO () 84 | main = hspec spec 85 | -------------------------------------------------------------------------------- /c_lib/unescape_string.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | 6 | #define UTF8_ACCEPT 0 7 | #define UTF8_REJECT 12 8 | 9 | static const uint8_t utf8d[] = { 10 | // The first part of the table maps bytes to character classes that 11 | // to reduce the size of the transition table and create bitmasks. 12 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 13 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 14 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 15 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 16 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 17 | 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 18 | 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 19 | 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8, 20 | 21 | // The second part is a transition table that maps a combination 22 | // of a state of the automaton and a character class to a state. 23 | 0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12, 24 | 12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12, 25 | 12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12, 26 | 12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12, 27 | 12,36,12,12,12,12,12,12,12,12,12,12, 28 | }; 29 | 30 | static uint32_t inline decode(uint32_t* state, uint32_t* codep, uint32_t byte) { 31 | uint32_t type = utf8d[byte]; 32 | 33 | *codep = (*state != UTF8_ACCEPT) ? 34 | (byte & 0x3fu) | (*codep << 6) : 35 | (0xff >> type) & (byte); 36 | 37 | *state = utf8d[256 + *state + type]; 38 | return *state; 39 | } 40 | 41 | static int inline ishexnum(uint32_t c) 42 | { 43 | return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'); 44 | } 45 | 46 | static uint32_t inline decode_hex(uint32_t c) 47 | { 48 | if (c >= '0' && c <= '9') 49 | return c - '0'; 50 | else if (c >= 'a' && c <= 'f') 51 | return c - 'a' + 10; 52 | else if (c >= 'A' && c <= 'F') 53 | return c - 'A' + 10; 54 | return 0; // Should not happen 55 | } 56 | 57 | static int inline isLowSurrogate(uint16_t c) 58 | { 59 | return c >= 0xDC00 && c <= 0xDFFF; 60 | } 61 | 62 | static int inline isHighSurrogate(uint16_t c) 63 | { 64 | return c >= 0xD800 && c <= 0xDBFF; 65 | } 66 | 67 | // Decode, return non-zero value on error 68 | int 69 | _jstream_decode_string(uint16_t *const dest, size_t *destoff, 70 | const uint8_t *s, const uint8_t *const srcend) 71 | { 72 | uint16_t *d = dest + *destoff; 73 | uint32_t state = 0; 74 | uint32_t codepoint; 75 | 76 | int surrogate = 0; 77 | uint16_t unidata; 78 | 79 | // Optimized version of dispatch when just an ASCII char is expected 80 | #define DISPATCH_ASCII(label) {\ 81 | if (s >= srcend) {\ 82 | return -1;\ 83 | }\ 84 | codepoint = *s++;\ 85 | goto label;\ 86 | } 87 | 88 | standard: 89 | // Test end of stream 90 | while (s < srcend) { 91 | if (*s <= 127) 92 | codepoint = *s++; 93 | else if (decode(&state, &codepoint, *s++) != UTF8_ACCEPT) { 94 | if (state == UTF8_REJECT) 95 | return -1; 96 | continue; 97 | } 98 | 99 | if (codepoint == '\\') 100 | DISPATCH_ASCII(backslash) 101 | else if (codepoint <= 0xffff) 102 | *d++ = (uint16_t) codepoint; 103 | else { 104 | *d++ = (uint16_t) (0xD7C0 + (codepoint >> 10)); 105 | *d++ = (uint16_t) (0xDC00 + (codepoint & 0x3FF)); 106 | } 107 | } 108 | *destoff = d - dest; 109 | // Exit point 110 | return (state != UTF8_ACCEPT); 111 | backslash: 112 | switch (codepoint) { 113 | case '"': 114 | case '\\': 115 | case '/': 116 | *d++ = (uint16_t) codepoint; 117 | goto standard; 118 | break; 119 | case 'b': *d++ = '\b';goto standard; 120 | case 'f': *d++ = '\f';goto standard; 121 | case 'n': *d++ = '\n';goto standard; 122 | case 'r': *d++ = '\r';goto standard; 123 | case 't': *d++ = '\t';goto standard; 124 | case 'u': DISPATCH_ASCII(unicode1);;break; 125 | default: 126 | return -1; 127 | } 128 | unicode1: 129 | if (!ishexnum(codepoint)) 130 | return -1; 131 | unidata = decode_hex(codepoint) << 12; 132 | DISPATCH_ASCII(unicode2); 133 | unicode2: 134 | if (!ishexnum(codepoint)) 135 | return -1; 136 | unidata |= decode_hex(codepoint) << 8; 137 | DISPATCH_ASCII(unicode3); 138 | unicode3: 139 | if (!ishexnum(codepoint)) 140 | return -1; 141 | unidata |= decode_hex(codepoint) << 4; 142 | DISPATCH_ASCII(unicode4); 143 | unicode4: 144 | if (!ishexnum(codepoint)) 145 | return -1; 146 | unidata |= decode_hex(codepoint); 147 | *d++ = (uint16_t) unidata; 148 | 149 | if (surrogate) { 150 | if (!isLowSurrogate(unidata)) 151 | return -1; 152 | surrogate = 0; 153 | } else { 154 | if (isHighSurrogate(unidata)) { 155 | surrogate = 1; 156 | DISPATCH_ASCII(surrogate1); 157 | } else if (isLowSurrogate(unidata)) 158 | return -1; 159 | } 160 | goto standard; 161 | surrogate1: 162 | if (codepoint != '\\') 163 | return -1; 164 | DISPATCH_ASCII(surrogate2) 165 | surrogate2: 166 | if (codepoint != 'u') 167 | return -1; 168 | DISPATCH_ASCII(unicode1) 169 | } 170 | -------------------------------------------------------------------------------- /json-stream.cabal: -------------------------------------------------------------------------------- 1 | name: json-stream 2 | version: 0.4.6.0 3 | synopsis: Incremental applicative JSON parser 4 | description: Easy to use JSON parser fully supporting incremental parsing. 5 | Parsing grammar in applicative form. 6 | . 7 | The parser is compatibile with aeson and its FromJSON class. 8 | It is possible to use aeson monadic parsing when appropriate. 9 | . 10 | The parser supports constant-space safe incremental parsing regardless 11 | of the input data. In addition to performance-critical parts written in C, 12 | a lot of performance is gained by being less memory intensive especially 13 | when used for stream parsing. 14 | 15 | homepage: https://github.com/ondrap/json-stream 16 | license: BSD3 17 | license-file: LICENSE 18 | author: Ondrej Palkovsky 19 | maintainer: palkovsky.ondrej@gmail.com 20 | category: Text, JSON 21 | build-type: Simple 22 | cabal-version: >=1.10 23 | extra-source-files: c_lib/lexer.h, changelog.md, benchmarks/json-data/*.json, README.md 24 | 25 | source-repository head 26 | type: git 27 | location: https://github.com/ondrap/json-stream.git 28 | 29 | flag conduit 30 | description: Support the conduit package 31 | manual: True 32 | default: False 33 | 34 | library 35 | exposed-modules: Data.JsonStream.Parser 36 | other-modules: Data.JsonStream.TokenParser 37 | , Data.JsonStream.CLexType 38 | , Data.JsonStream.CLexer 39 | , Data.JsonStream.Unescape 40 | c-sources: c_lib/lexer.c, c_lib/unescape_string.c 41 | includes: c_lib/lexer.h 42 | include-dirs: c_lib 43 | build-depends: base >=4.11 && <5 44 | , bytestring 45 | , text 46 | , aeson >= 0.7 && < 2.3 47 | , vector 48 | , unordered-containers 49 | , containers 50 | , scientific 51 | , primitive 52 | if flag(conduit) 53 | exposed-modules: Data.JsonStream.Conduit 54 | build-depends: conduit 55 | 56 | default-language: Haskell2010 57 | Ghc-Options: -Wall -fwarn-incomplete-uni-patterns 58 | 59 | 60 | test-suite doctest 61 | -- Doctest needs to expose CLexType, which we probably don't want. Disable tests for now. 62 | Buildable: False 63 | 64 | Type: exitcode-stdio-1.0 65 | Default-Language: Haskell2010 66 | HS-Source-Dirs: test, . 67 | Main-Is: doctests.hs 68 | other-modules: Data.JsonStream.CLexType 69 | , Data.JsonStream.Unescape 70 | , Data.JsonStream.CLexer 71 | , Data.JsonStream.Parser 72 | , Data.JsonStream.TokenParser 73 | c-sources: c_lib/lexer.c, c_lib/unescape_string.c 74 | includes: c_lib/lexer.h 75 | include-dirs: c_lib 76 | Build-Depends: base >= 4.11 && <5 77 | , doctest >= 0.9.3 78 | , bytestring 79 | , text 80 | , aeson 81 | , vector 82 | , containers 83 | , unordered-containers 84 | , hspec 85 | , scientific 86 | , directory 87 | , QuickCheck 88 | , quickcheck-unicode 89 | , primitive 90 | 91 | test-suite spec 92 | main-is: Spec.hs 93 | other-modules: Data.JsonStream.CLexType 94 | , Data.JsonStream.Unescape 95 | , ParserSpec 96 | , UnescapeSpec 97 | , Data.JsonStream.CLexer 98 | , Data.JsonStream.Parser 99 | , Data.JsonStream.TokenParser 100 | c-sources: c_lib/lexer.c, c_lib/unescape_string.c 101 | include-dirs: c_lib 102 | includes: c_lib/lexer.h 103 | type: exitcode-stdio-1.0 104 | hs-source-dirs: test, . 105 | default-language: Haskell2010 106 | ghc-options: -Wall 107 | build-depends: base >=4.11 && <5 108 | , bytestring 109 | , text 110 | , aeson 111 | , vector 112 | , containers 113 | , unordered-containers 114 | , hspec 115 | , scientific 116 | , directory 117 | , QuickCheck 118 | , quickcheck-unicode 119 | , primitive 120 | 121 | 122 | -- executable spdtest 123 | -- main-is: spdtest.hs 124 | -- other-modules: Data.JsonStream.TokenParser, Data.JsonStream.CLexType, Data.JsonStream.CLexer 125 | -- ghc-options: -O2 -Wall -fprof-auto 126 | -- c-sources: c_lib/lexer.c 127 | -- include-dirs: c_lib 128 | -- default-language: Haskell2010 129 | -- build-depends: base >=4.11 && <5 130 | -- , bytestring 131 | -- , text 132 | -- , aeson 133 | -- , vector 134 | -- , unordered-containers 135 | -- , scientific 136 | -------------------------------------------------------------------------------- /benchmarks/json-data/integers.json: -------------------------------------------------------------------------------- 1 | [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 7, 8, 9, 9, 10, 10, 11, 12, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 24, 26, 27, 29, 31, 33, 35, 37, 39, 42, 44, 47, 50, 53, 56, 60, 63, 67, 72, 76, 81, 86, 91, 96, 102, 109, 115, 122, 130, 138, 146, 155, 165, 175, 186, 197, 209, 222, 236, 250, 266, 282, 299, 318, 337, 358, 380, 403, 428, 454, 482, 511, 543, 576, 611, 649, 688, 730, 775, 823, 873, 926, 983, 1043, 1107, 1175, 1247, 1323, 1404, 1490, 1582, 1679, 1781, 1890, 2006, 2129, 2259, 2398, 2544, 2700, 2865, 3041, 3227, 3425, 3634, 3857, 4093, 4343, 4609, 4891, 5191, 5509, 5846, 6204, 6583, 6986, 7414, 7868, 8349, 8860, 9403, 9978, 10589, 11237, 11925, 12655, 13430, 14252, 15124, 16050, 17032, 18075, 19181, 20355, 21601, 22923, 24326, 25815, 27396, 29072, 30852, 32740, 34744, 36871, 39128, 41523, 44064, 46762, 49624, 52661, 55884, 59305, 62935, 66787, 70875, 75213, 79817, 84702, 89887, 95389, 101227, 107423, 113998, 120976, 128381, 136239, 144578, 153427, 162818, 172784, 183360, 194583, 206493, 219132, 232545, 246778, 261883, 277912, 294923, 312975, 332131, 352460, 374034, 396928, 421223, 447005, 474365, 503400, 534212, 566911, 601610, 638433, 677511, 718980, 762987, 809688, 859247, 911840, 967652, 1026880, 1089734, 1156434, 1227217, 1302333, 1382046, 1466638, 1556408, 1651673, 1752769, 1860052, 1973902, 2094721, 2222934, 2358996, 2503385, 2656613, 2819219, 2991777, 3174898, 3369227, 3575451, 3794297, 4026539, 4272995, 4534536, 4812086, 5106625, 5419191, 5750889, 6102889, 6476435, 6872845, 7293518, 7739939, 8213686, 8716429, 9249944, 9816115, 10416939, 11054539, 11731166, 12449207, 13211198, 14019829, 14877955, 15788605, 16754994, 17780533, 18868844, 20023768, 21249383, 22550016, 23930257, 25394980, 26949356, 28598872, 30349352, 32206975, 34178300, 36270285, 38490317, 40846232, 43346349, 45999492, 48815029, 51802899, 54973651, 58338478, 61909260, 65698602, 69719882, 73987297, 78515911, 83321713, 88421668, 93833782, 99577160, 105672079, 112140056, 119003924, 126287916, 134017747, 142220706, 150925751, 160163614, 169966908, 180370243, 191410345, 203126189, 215559137, 228753081, 242754599, 257613123, 273381107, 290114218, 307871529, 326715729, 346713346, 367934976, 390455540, 414354543, 439716356, 466630515, 495192035, 525501749, 557666661, 591800322, 628023236, 666463282, 707256166, 750545902, 796485316, 845236589, 896971830, 951873682, 1010135966, 1071964368, 1137577163, 1207205986, 1281096650, 1359510014, 1442722903, 1531029087, 1624740315, 1724187420, 1829721484, 1941715077, 2060563573, 2186686548, 2320529259, 2462564214, 2613292844, 2773247272, 2942992191, 3123126858, 3314287206, 3517148098, 3732425698, 3960880011, 4203317554, 4460594215, 4733618266, 5023353573, 5330822998, 5657112012, 6003372524, 6370826950, 6760772526, 7174585891, 7613727944, 8079749004, 8574294281, 9099109685, 9656047991, 10247075377, 10874278366, 11539871197, 12246203633, 12995769265, 13791214310, 14635346955, 15531147272, 16481777734, 17490594386, 18561158687, 19697250088, 20902879371, 22182302812, 23540037202, 24980875800, 26509905246, 28132523526, 29854459026, 31681790754, 33620969802, 35678842122, 37862672691, 40180171161, 42639519077, 45249398761, 48019023960, 50958172379, 54077220194, 57387178688, 60899733121, 64627283986, 68582990784, 72780818484, 77235586822, 81963022620, 86979815309, 92303675844, 97953399235, 103948930895, 110311437058, 117063379497, 124228594829, 131832378662, 139901574895, 148464670491, 157551896043, 167195332496, 177429024407, 188289100133, 199813899374, 212044108527, 225022904322, 238796106249, 253412338321, 268923200725, 285383451995, 302851202324, 321388118716, 341059642687, 361935221296, 384088552321, 407597844432, 432546093294, 459021374572, 487117154867, 516932621682, 548573033590, 582150091830, 617782334651, 655595555791, 695723248569, 738307077168, 783497376747, 831453684183, 882345301285, 936351892486, 993664119121, 1054484312524, 1119027188325, 1187520604468, 1260206365627, 1337341076854, 1419197049486, 1506063262491, 1598246382662, 1696071847252, 1799885012878, 1910052374747, 2026962860500, 2151029203266, 2282689398739, 2422408251457, 2570679015712, 2728025136906, 2895002099486, 3072199387991, 3260242568131, 3459795495242, 3671562657914, 3896291665080, 4134775885316, 4387857247705, 4656429214122, 4941439933460, 5243895588908, 5564863950114, 5905478142772, 6266940648935, 6650527552175, 7057593042589, 7489574197539, 7947996055022, 8434476997558, 8950734465625, 9498591020797, 10079980779998, 10696956243580, 11351695541337, 12046510122031, 12783852913581, 13566326982715, 14396694724673, 15277887615381, 16213016560543, 17205382878181, 18258489953389, 19376055606456, 20562025218016, 21820585657560, 23156180064488, 24573523533875, 26077619762337, 27673778712750, 29367635360200, 31165169585327, 33072727285306, 35097042776985, 37245262571278, 39524970602741, 41944215003394, 44511536515321, 47235998642351, 50127219647252, 53195406507421, 56451390948928, 59906667686130, 63573435001862, 67464637811456, 71594013362620, 75976139732519, 80626487293267, 85561473327514, 90798519986944, 96356115798305, 102253880934088, 108512636478302, 115154477931865, 122202853217119, 129682645456833, 137620260819954, 146043721744222, 154982765864743, 164468950997792, 174535766550465, 185218751749486, 196555621106568, 208586397563259, 221353553785311, 234902162105402, 249280053643550, 264537987166964, 280729828285480, 297912739615178, 316147382581544, 335498131574595, 356033301212013, 377825387512598, 400951323831468, 425492752460545, 451536312853150, 479173947490266, 508503226468250, 539627691953919, 572657223723034, 607708427072674, 644905044476938, 684378392439283, 726267825083707, 770721226121430, 817895530929871, 867957280587026, 921083209817197, 977460870923688, 1037289295911185, 1100779699135317, 1168156222959992, 1239656729054927, 1315533638126921, 1396054821049394, 1481504544536185, 1572184474698156, 1668414742025481, 1770535071555377, 1878905982215138, 1993910059574563, 2115953306501003, 2245466576485316, 2382907094698829, 2528760072151156, 2683540418647385, 2847794560591953] 2 | -------------------------------------------------------------------------------- /benchmarks/json-data/twitter10.json: -------------------------------------------------------------------------------- 1 | {"results":[{"from_user_id_str":"207858021","profile_image_url":"http://a3.twimg.com/sticky/default_profile_images/default_profile_2_normal.png","created_at":"Wed, 26 Jan 2011 04:30:38 +0000","from_user":"pboudarga","id_str":"30120402839666689","metadata":{"result_type":"recent"},"to_user_id":null,"text":"I'm at Rolla Sushi Grill (27737 Bouquet Canyon Road, #106, Btw Haskell Canyon and Rosedell Drive, Saugus) http://4sq.com/gqqdhs","id":30120402839666689,"from_user_id":207858021,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"<a href="http://foursquare.com" rel="nofollow">foursquare</a>"},{"from_user_id_str":"69988683","profile_image_url":"http://a0.twimg.com/profile_images/1211955817/avatar_7888_normal.gif","created_at":"Wed, 26 Jan 2011 04:25:23 +0000","from_user":"YNK33","id_str":"30119083059978240","metadata":{"result_type":"recent"},"to_user_id":null,"text":"hsndfile 0.5.0: Free and open source Haskell bindings for libsndfile http://bit.ly/gHaBWG Mac Os","id":30119083059978240,"from_user_id":69988683,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"<a href="http://twitterfeed.com" rel="nofollow">twitterfeed</a>"},{"from_user_id_str":"81492","profile_image_url":"http://a1.twimg.com/profile_images/423894208/Picture_7_normal.jpg","created_at":"Wed, 26 Jan 2011 04:24:28 +0000","from_user":"satzz","id_str":"30118851488251904","metadata":{"result_type":"recent"},"to_user_id":null,"text":"Emacs\u306e\u30e2\u30fc\u30c9\u8868\u793a\u304c\u4eca(Ruby Controller Outputz RoR Flymake REl hs)\u3068\u306a\u3063\u3066\u3066\u3088\u304f\u308f\u304b\u3089\u306a\u3044\u3093\u3060\u3051\u3069\u6700\u5f8c\u306eREl\u3068\u304bhs\u3063\u3066\u4f55\u3060\u308d\u3046\u2026haskell\u3068\u304b2\u5e74\u4ee5\u4e0a\u66f8\u3044\u3066\u306a\u3044\u3051\u3069\u2026","id":30118851488251904,"from_user_id":81492,"geo":null,"iso_language_code":"ja","to_user_id_str":null,"source":"<a href="http://www.hootsuite.com" rel="nofollow">HootSuite</a>"},{"from_user_id_str":"9518356","profile_image_url":"http://a2.twimg.com/profile_images/119165723/ocaml-icon_normal.png","created_at":"Wed, 26 Jan 2011 04:19:19 +0000","from_user":"planet_ocaml","id_str":"30117557788741632","metadata":{"result_type":"recent"},"to_user_id":null,"text":"I so miss #haskell type classes in #ocaml - i want to do something like refinement. Also why does ocaml not have... http://bit.ly/geYRwt","id":30117557788741632,"from_user_id":9518356,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"<a href="http://twitterfeed.com" rel="nofollow">twitterfeed</a>"},{"from_user_id_str":"218059","profile_image_url":"http://a1.twimg.com/profile_images/1053837723/twitter-icon9_normal.jpg","created_at":"Wed, 26 Jan 2011 04:16:32 +0000","from_user":"aprikip","id_str":"30116854940835840","metadata":{"result_type":"recent"},"to_user_id":null,"text":"yatex-mode\u3084haskell-mode\u306e\u3053\u3068\u3067\u3059\u306d\u3001\u308f\u304b\u308a\u307e\u3059\u3002","id":30116854940835840,"from_user_id":218059,"geo":null,"iso_language_code":"ja","to_user_id_str":null,"source":"<a href="http://sites.google.com/site/yorufukurou/" rel="nofollow">YoruFukurou</a>"},{"from_user_id_str":"216363","profile_image_url":"http://a1.twimg.com/profile_images/72454310/Tim-Avatar_normal.png","created_at":"Wed, 26 Jan 2011 04:15:30 +0000","from_user":"dysinger","id_str":"30116594684264448","metadata":{"result_type":"recent"},"to_user_id":null,"text":"Haskell in Hawaii tonight for me... #fun","id":30116594684264448,"from_user_id":216363,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"<a href="http://www.nambu.com/" rel="nofollow">Nambu</a>"},{"from_user_id_str":"1774820","profile_image_url":"http://a2.twimg.com/profile_images/61169291/dan_desert_thumb_normal.jpg","created_at":"Wed, 26 Jan 2011 04:13:36 +0000","from_user":"DanMil","id_str":"30116117682851840","metadata":{"result_type":"recent"},"to_user_id":1594784,"text":"@ojrac @chewedwire @tomheon Haskell isn't a language, it's a belief system. A seductive one...","id":30116117682851840,"from_user_id":1774820,"to_user":"ojrac","geo":null,"iso_language_code":"en","to_user_id_str":"1594784","source":"<a href="http://twitter.com/">web</a>"},{"from_user_id_str":"659256","profile_image_url":"http://a0.twimg.com/profile_images/746976711/angular-final_normal.jpg","created_at":"Wed, 26 Jan 2011 04:11:06 +0000","from_user":"djspiewak","id_str":"30115488931520512","metadata":{"result_type":"recent"},"to_user_id":null,"text":"One of the very nice things about Haskell as opposed to SML is the reduced proliferation of identifiers (e.g. andb, orb, etc). #typeclasses","id":30115488931520512,"from_user_id":659256,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"<a href="http://itunes.apple.com/us/app/twitter/id409789998?mt=12" rel="nofollow">Twitter for Mac</a>"},{"from_user_id_str":"144546280","profile_image_url":"http://a1.twimg.com/a/1295051201/images/default_profile_1_normal.png","created_at":"Wed, 26 Jan 2011 04:06:12 +0000","from_user":"listwarenet","id_str":"30114255890026496","metadata":{"result_type":"recent"},"to_user_id":null,"text":"http://www.listware.net/201101/haskell-cafe/84752-re-haskell-cafe-gpl-license-of-h-matrix-and-prelude-numeric.html Re: Haskell-c","id":30114255890026496,"from_user_id":144546280,"geo":null,"iso_language_code":"no","to_user_id_str":null,"source":"<a href="http://1e10.org/cloud/" rel="nofollow">1e10</a>"},{"from_user_id_str":"1594784","profile_image_url":"http://a2.twimg.com/profile_images/378515773/square-profile_normal.jpg","created_at":"Wed, 26 Jan 2011 04:01:29 +0000","from_user":"ojrac","id_str":"30113067333324800","metadata":{"result_type":"recent"},"to_user_id":null,"text":"RT @tomheon: @ojrac @chewedwire Don't worry, learning Haskell will not give you any clear idea what monad means.","id":30113067333324800,"from_user_id":1594784,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"<a href="http://twitter.com/">web</a>"}],"max_id":30120402839666689,"since_id":0,"refresh_url":"?since_id=30120402839666689&q=haskell","next_page":"?page=2&max_id=30120402839666689&rpp=10&q=haskell","results_per_page":10,"page":1,"completed_in":0.012714,"since_id_str":"0","max_id_str":"30120402839666689","query":"haskell"} -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # json-stream - Applicative incremental JSON parser for Haskell 2 | 3 | [![Hackage](https://img.shields.io/hackage/v/json-stream.svg)](https://hackage.haskell.org/package/json-stream) 4 | 5 | # When to use this library 6 | 7 | - use [aeson](https://hackage.haskell.org/package/aeson) if you can; compile aeson with `cffi` flag if you need better performance 8 | - use `json-stream` if you 9 | - need streaming 10 | - need every bit of performance (do profile; the best course could be using the aeson `value` parser with json-stream) 11 | - do not care that parsing may not fail on malformed JSON data 12 | - do not need advanced error reporting; json-stream tends to skip data that 13 | doesn't fit parsing rules (this might be implemented better in the future) 14 | 15 | # Intro 16 | 17 | Most haskellers use the excellent [aeson](https://hackage.haskell.org/package/aeson) library 18 | to decode and encode JSON structures. Unfortunately, although very fast, this parser 19 | must read the whole structure into memory. Json-stream allows incremental parsing. 20 | 21 | > Parsing performance is generally better than aeson, sometimes significantly better. 22 | > Json-stream uses a small and fast C lexer to parse the JSON into tokens. This results 23 | > in quite significant performance gain. Ideal scenario is parsing large files 24 | > where not all information is required; json-stream parses only what is really needed. 25 | 26 | Standard aeson library reads the whole input, creates an object in memory representing 27 | the JSON structure which is then converted into proper values using FromJSON instances. 28 | This library is compatible with aeson - you can immediately use FromJSON instances almost without 29 | any change in code and enjoy incremental parsing. The real strength is in the applicative interface 30 | which allows to parse only those parts of JSON that are of interest while skipping what is not needed. 31 | 32 | The parsing process uses the least amount of memory possible and is completely lazy. It does not perfectly 33 | check for JSON syntax and the behaviour on incorrect JSON input is undefined (it cheats quite a lot; 34 | but this is needed for constant-space parsing). **The result on badly formed input is undefined, 35 | the parser does not guarantee failing on bad input.** 36 | 37 | - The parser generally does not fail. If the data does not match, the parser silently ignores it. 38 | The failures should be only syntax errors in JSON. 39 | - The ',' and ':' characters in the lexer are treated as white-space. 40 | - When a value is not needed to be parsed, it is parsed by a parser counting braces and brackets. 41 | Anything can happen, the parser just waits for the sum of openings to equal sum of closings. 42 | - The length of an object key is limited to ~64K, records with longer keys are ignored. 43 | 44 | ## Motivation 45 | 46 | Result of ElasticSearch bulk operations is a large JSON with this structure: 47 | ```json 48 | { 49 | "took":42, 50 | "errors":true, 51 | "items": [ 52 | {"index": {"_index":"test","_type":"type1","_id":"3","status":400,"error":"Some error "}}, 53 | {"index":{"_index":"test","_type":"type1","_id":"4","_version":2,"status":200}} 54 | ] 55 | } 56 | ``` 57 | 58 | We want the parser to return an empty list immediately when it encounters the *errors* key 59 | and the value is *false*. If the value is *true*, we want the parser to return a list of 60 | `_id` keys with an error status. 61 | 62 | 63 | ```haskell 64 | -- | Result of bulk operation 65 | resultParser :: Parser [(Text, Text)] 66 | resultParser = const [] <$> filterI not ("errors" .: bool) 67 | <|> many ("items" .: arrayOf bulkItemError) 68 | 69 | bulkItemError :: Parser (Text, Text) 70 | bulkItemError = objectWithKey "index" $ 71 | (,) <$> "_id" .: string 72 | <*> "error" .: string 73 | <* filterI statusError ("status" .: integer) 74 | where 75 | statusError s = s < 200 || s > (299 :: Int) 76 | 77 | ``` 78 | ## Performance 79 | 80 | The crude lexing is done by a C-optimized code in batches, the 81 | lexed pieces are then parsed using the user-specified parser. Json-stream 82 | is generally slightly faster than aeson. It is significantly faster 83 | in the following scenarios: 84 | 85 | - parsing numbers 86 | - parsing strings when aeson is not compiled with `cffi` flag 87 | (the `cffi` flag of aeson enables fast text decoding borrowed from json-stream) 88 | - parsing only subset of big JSON structures 89 | 90 | Json-stream in streaming mode is also much friendlier to the GC. 91 | 92 | Using json-stream parser instead of aeson `value` evades the need to build the structure 93 | using aeson `Value` and then converting it to the user-requested structure. Instead 94 | the structure is built on the fly directly during the parsing phase. 95 | 96 | Json-stream can optimize certain scenarios. If not all data from the input stream is 97 | required, it is skipped by the parsers. 98 | 99 | ## Constant space parsing 100 | 101 | If the matching parser follows certain rules and the input chunks have limited size, 102 | the parsing should run in constant space. If you have a large JSON structure but need 103 | only small pieces, the parsing can be very fast - when the data does not match what 104 | is expected, it is parsed only by the lexical parser and ignored. The object key 105 | length is limited to 64K, maximum length of a string can be limited with `safeString` 106 | parser. The number of digits in a number is limited to 200.000, longer number will 107 | make the parser fail. 108 | 109 | ## Examples 110 | 111 | ```haskell 112 | -- The parseByteString function always returns a list of 'things'. 113 | -- Other functions are available. 114 | >>> :t parseByteString 115 | parseByteString :: Parser a -> ByteString -> [a] 116 | 117 | -- 'value' stands for FromJSON instance that will be yielded; 118 | -- most normal types work by default 119 | >>> parseByteString value "[1,2,3]" :: [[Int]] 120 | [[1,2,3]] 121 | 122 | -- the parser says we have an 'array of values'; i.e. return each value in array 123 | >>> parseByteString (arrayOf value) "[1,2,3]" :: [Int] 124 | [1,2,3] 125 | 126 | -- Use <*> for parallel parsing. Order is not important. 127 | >>> let test = "[{\"name\": \"John\", \"age\": 20}, {\"age\": 30, \"name\": \"Frank\"} ]" 128 | >>> let parser = arrayOf $ (,) <$> "name" .: value 129 | <*> "age" .: value 130 | >>> parseByteString parser test :: [(Text,Int)] 131 | [("John",20),("Frank",30)] 132 | 133 | -- Use objectOf for parsing objects (it's faster than parallel parsing). 134 | >>> let test = "[{\"name\": \"John\", \"age\": 20}, {\"age\": 30, \"name\": \"Frank\"} ]" 135 | >>> let parser = arrayOf $ objectOf $ (,) <$> "name" .: value 136 | <*> "age" .: value 137 | >>> parseByteString parser test :: [(Text,Int)] 138 | [("John",20),("Frank",30)] 139 | 140 | -- If you have more results returned from each branch, all are combined. 141 | >>> let test = "[{\"key1\": [1,2], \"key2\": [5,6], \"key3\": [8,9]}]" 142 | >>> let parser = arrayOf $ (,) <$> "key2" .: (arrayOf value) 143 | <*> "key1" .: (arrayOf value) 144 | >>> parseByteString parser test :: [(Int, Int)] 145 | [(6,2),(6,1),(5,2),(5,1)] 146 | 147 | -- Use <> to return both branches 148 | let test = "[{\"key1\": [1,2], \"key2\": [5,6], \"key3\": [8,9]}]" 149 | >>> let parser = arrayOf $ "key1" .: (arrayOf value) 150 | <> "key2" .: (arrayOf value) 151 | >>> parseByteString parser test :: [Int] 152 | [1,2,5,6] 153 | 154 | -- objectItems function enriches value with object key 155 | >>> let test = "[{\"key1\": [1,2,3], \"key2\": [5,6,7]}]" 156 | >>> parseByteString (arrayOf $ objectItems value) test :: [(Text, [Int])] 157 | [("key1",[1,2,3]),("key2",[5,6,7])] 158 | >>> parseByteString (arrayOf $ objectItems $ arrayOf value) test :: [(Text, Int)] 159 | [("key1",1),("key1",2),("key1",3),("key2",5),("key2",6),("key2",7)] 160 | 161 | -- .:? produces a maybe value; Nothing if match is not found or is null. 162 | -- .| produces a default value if the preceding didn't produce anything 163 | >>> let test = "[{\"name\":\"John\", \"value\": 12}, {\"name\":\"name2\"}, {\"value\":12}]" 164 | >>> let parser = arrayOf $ (,) <$> "name" .:? string 165 | <*> "value" .: integer .| 0 166 | >>> parseByteString parser test :: [(Maybe Text, Int)] 167 | [(Just "John",12),(Just "name2",0),(Nothing,12)] 168 | 169 | -- And it works the same with the objectOf parser 170 | >>> let test = "[{\"name\":\"John\", \"value\": 12}, {\"name\":\"name2\"}, {\"value\":12}]" 171 | >>> let parser = arrayOf $ objectOf $ (,) <$> "name" .:? string 172 | <*> "value" .: integer .| 0 173 | >>> parseByteString parser test :: [(Maybe Text, Int)] 174 | [(Just "John",12),(Just "name2",0),(Nothing,12)] 175 | 176 | ``` 177 | 178 | See [haddocks](https://hackage.haskell.org/package/json-stream) documentation for more details. 179 | -------------------------------------------------------------------------------- /c_lib/lexer.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "lexer.h" 9 | 10 | /* 11 | * Batch lexer for JSON 12 | * 13 | * When each handle_* function is called, 2 things hold: 14 | * - at least 1 character is available in the input buffer 15 | * - at least 1 result slot is free 16 | * 17 | * 18 | ) 19 | */ 20 | 21 | static inline int isempty(char chr) 22 | { 23 | return (chr == ':' || chr == ',' || isspace(chr)); 24 | } 25 | 26 | static inline int isJnumber(char chr) 27 | { 28 | return ((chr >= '0' && chr <= '9') || chr == '-' || chr == '.' || chr == '+' || chr == 'e' || chr == 'E'); 29 | } 30 | 31 | // Add simple result to the result list 32 | static inline void add_simple_res(int restype, struct lexer *lexer, int length, struct lexer_result *result) 33 | { 34 | struct lexer_result *res = &result[lexer->result_num]; 35 | 36 | res->restype = restype; 37 | res->startpos = lexer->position; 38 | res->length = length; 39 | lexer->result_num++; 40 | } 41 | 42 | static inline int handle_space(const char *input, struct lexer *lexer) 43 | { 44 | /* Skip space */ 45 | while (lexer->position < lexer->length && isempty(input[lexer->position])) 46 | lexer->position++; 47 | 48 | if (lexer->position >= lexer->length) 49 | return LEX_YIELD; 50 | 51 | return LEX_OK; 52 | } 53 | 54 | static inline int handle_base(const char *input, struct lexer *lexer, struct lexer_result *result) 55 | { 56 | if (handle_space(input, lexer)) 57 | return LEX_OK; 58 | 59 | char chr = input[lexer->position]; 60 | switch (chr) { 61 | case '{': add_simple_res(RES_OPEN_BRACE, lexer, 1, result); lexer->position++;break; 62 | case '}': add_simple_res(RES_CLOSE_BRACE, lexer, 1, result); lexer->position++;break; 63 | case '[': add_simple_res(RES_OPEN_BRACKET, lexer, 1, result); lexer->position++;break; 64 | case ']': add_simple_res(RES_CLOSE_BRACKET, lexer, 1, result); lexer->position++;break; 65 | case '"': lexer->current_state = STATE_STRING; 66 | lexer->state_data = 0; 67 | lexer->state_data_2 = 0; 68 | lexer->position++; 69 | return LEX_OK; 70 | case 't': lexer->current_state = STATE_TRUE; lexer->state_data = 1; lexer->position++;return LEX_OK; 71 | case 'f': lexer->current_state = STATE_FALSE; lexer->state_data = 1; lexer->position++;return LEX_OK; 72 | case 'n': lexer->current_state = STATE_NULL; lexer->state_data = 1; lexer->position++;return LEX_OK; 73 | default: 74 | if (isJnumber(chr)) { 75 | lexer->current_state = STATE_NUMBER; 76 | lexer->state_data = 0; 77 | return LEX_OK; 78 | } else { 79 | // Unknown character 80 | return LEX_ERROR; 81 | } 82 | } 83 | return LEX_OK; 84 | } 85 | 86 | static inline int handle_ident(const char *input, struct lexer *lexer, const char *ident, int idtype, 87 | struct lexer_result *result) 88 | { 89 | while (lexer->position < lexer->length) { 90 | char chr = input[lexer->position]; 91 | if (!ident[lexer->state_data]) { 92 | // Check that the next character is allowed 93 | if (isempty(chr) || chr == ']' || chr == '}') { 94 | add_simple_res(idtype, lexer, lexer->state_data, result); 95 | lexer->current_state = STATE_BASE; 96 | return LEX_OK; 97 | } else { 98 | // Unexpected next character in handle_ident 99 | return LEX_ERROR; 100 | } 101 | } 102 | if (ident[lexer->state_data] != chr) 103 | return LEX_ERROR; 104 | lexer->state_data++; 105 | lexer->position++; 106 | } 107 | return LEX_OK; 108 | } 109 | 110 | /* Read a number; compute the number if the 'int' type can hold it */ 111 | int handle_number(const char *input, struct lexer *lexer, struct lexer_result *result) 112 | { 113 | /* Just eat characters that can be numbers and feed them to a table */ 114 | // Copy the character to buffer 115 | int startposition = lexer->position; 116 | 117 | // Try to compute the number fitting to int - 32-bit=9, 64-bit=18 118 | int maxdigits = sizeof(long) == 8 ? 18 : 9; 119 | long computedNumber = 0; 120 | int digits = 0; 121 | int gotDot = 0; 122 | int dotDigits = 0; 123 | int invalid = 0; 124 | int sign = 1; 125 | 126 | // Do not try on number continuation 127 | if (lexer->state_data) 128 | invalid = 1; 129 | 130 | for (;lexer->position < lexer->length && isJnumber(input[lexer->position]);++lexer->position) { 131 | char ch = input[lexer->position]; 132 | if (!invalid) { 133 | if (lexer->position == startposition && ch == '-') { 134 | sign = -1; 135 | } else if (isdigit(ch)) { 136 | digits++; 137 | computedNumber = computedNumber * 10 + (ch - '0'); 138 | if (gotDot) 139 | dotDigits++; 140 | } else if (ch == '.' && gotDot == 0) { 141 | gotDot = 1; 142 | } else 143 | invalid = 1; // We do not support E notation to optimize or some syntax error 144 | 145 | if (digits > maxdigits) 146 | invalid = 1; 147 | } 148 | } 149 | 150 | struct lexer_result *res = &result[lexer->result_num]; 151 | res->adddata = lexer->state_data; 152 | if (lexer->position == lexer->length) { 153 | res->restype = RES_NUMBER_PARTIAL; 154 | // We can just point directly to the input 155 | res->startpos = startposition; 156 | res->length = lexer->position - startposition; 157 | lexer->state_data = 1; 158 | } else if (!invalid) { 159 | /* Optimized number generation, so that we don't have to parse it in haskell */ 160 | res->restype = RES_NUMBER_SMALL; 161 | res->adddata = sign * computedNumber; 162 | res->length = dotDigits; 163 | 164 | lexer->current_state = STATE_BASE; 165 | } else { 166 | res->restype = RES_NUMBER; 167 | // We can just point directly to the input 168 | res->startpos = startposition; 169 | res->length = lexer->position - startposition; 170 | 171 | lexer->current_state = STATE_BASE; 172 | } 173 | 174 | lexer->result_num++; 175 | return LEX_OK; 176 | } 177 | 178 | /* Handle beginning of a string, the '"' is already stripped 179 | * 180 | * state_data: 1 - this is string continuation 181 | * state_data_2: 1 - we have just skipped the backslash 182 | */ 183 | int handle_string(const char *input, struct lexer *lexer, struct lexer_result *result) 184 | { 185 | int startposition = lexer->position; 186 | char ch; 187 | int hasspecialchar = 0; 188 | 189 | for (ch=input[lexer->position]; lexer->position < lexer->length; ch = input[++lexer->position]) { 190 | if (ch < 32 || ch > 126) 191 | hasspecialchar = 1; 192 | if (lexer->state_data_2) 193 | lexer->state_data_2 = 0; 194 | else if (ch == '\\') { 195 | lexer->state_data_2 = 1; 196 | hasspecialchar = 1; 197 | } else if (ch == '"') 198 | break; 199 | } 200 | 201 | struct lexer_result *res = &result[lexer->result_num]; 202 | res->startpos = startposition; 203 | res->length = lexer->position - startposition; 204 | if (lexer->position < lexer->length && input[lexer->position] == '"') { 205 | res->restype = RES_STRING; 206 | if (lexer->state_data) 207 | res->adddata = 1; // Indicate that we are final portion of the string 208 | else if (hasspecialchar) 209 | res->adddata = 0; // Indicate that the string contains escaped/UTF-8 characters 210 | else 211 | res->adddata = -1; // Indicate that the string is clean ASCII (optimization) 212 | 213 | lexer->result_num++; 214 | lexer->current_state = STATE_BASE; 215 | lexer->position++; // Skip the final '"' 216 | return LEX_OK; 217 | } else { 218 | // Emit partial string 219 | res->restype = RES_STRING_PARTIAL; 220 | res->adddata = 0; 221 | lexer->result_num++; 222 | 223 | lexer->state_data = 1; 224 | return LEX_OK; 225 | } 226 | } 227 | 228 | int lex_json(const char *input, struct lexer *lexer, struct lexer_result *result) 229 | { 230 | int res = LEX_OK; 231 | static void* dispatch_table[] = { 232 | &&state_base, &&state_string, &&state_number, &&state_true, 233 | &&state_false, &&state_null 234 | }; 235 | #define DISPATCH() { \ 236 | if (!(lexer->position < lexer->length && lexer->result_num < lexer->result_limit && res == 0)) \ 237 | return res; \ 238 | goto *dispatch_table[lexer->current_state];\ 239 | } 240 | 241 | DISPATCH(); 242 | state_base: 243 | res = handle_base(input, lexer, result); 244 | DISPATCH(); 245 | state_string: 246 | res = handle_string(input, lexer, result); 247 | DISPATCH(); 248 | state_number: 249 | res = handle_number(input, lexer, result); 250 | DISPATCH(); 251 | state_true: 252 | res = handle_ident(input, lexer, "true", RES_TRUE, result); 253 | DISPATCH(); 254 | state_false: 255 | res = handle_ident(input, lexer, "false", RES_FALSE, result); 256 | DISPATCH(); 257 | state_null: 258 | res = handle_ident(input, lexer, "null", RES_NULL, result); 259 | DISPATCH(); 260 | 261 | return res; 262 | } 263 | -------------------------------------------------------------------------------- /benchmarks/json-data/twitter20.json: -------------------------------------------------------------------------------- 1 | {"results":[{"from_user_id_str":"166199691","profile_image_url":"http://a2.twimg.com/profile_images/1252958188/38_normal.jpg","created_at":"Fri, 25 Feb 2011 17:41:26 +0000","from_user":"_classicc","id_str":"41191052790603776","metadata":{"result_type":"recent"},"to_user_id":null,"text":"my twitter is actin slow today.","id":41191052790603776,"from_user_id":166199691,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"<a href="http://twitter.com/" rel="nofollow">Twitter for iPhone</a>"},{"from_user_id_str":"138835410","profile_image_url":"http://a1.twimg.com/profile_images/1250985094/243321801_normal.jpg","created_at":"Fri, 25 Feb 2011 17:41:26 +0000","from_user":"amereronday","id_str":"41191050307567616","metadata":{"result_type":"recent"},"to_user_id":null,"text":"twitter jail, here i come.","id":41191050307567616,"from_user_id":138835410,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"<a href="http://twitter.com/">web</a>"},{"from_user_id_str":"359548","profile_image_url":"http://a2.twimg.com/profile_images/53612334/don_otvos_normal.jpg","created_at":"Fri, 25 Feb 2011 17:41:26 +0000","from_user":"donnyo","id_str":"41191050110451712","metadata":{"result_type":"recent"},"to_user_id":7074534,"text":"@stlsmallbiz I see there is currently a Twitter promo too....tempting....","id":41191050110451712,"from_user_id":359548,"to_user":"stlsmallbiz","geo":null,"iso_language_code":"en","place":{"id":"82b7b2f97b12261d","type":"poi","full_name":"Yammer Inc, San Francisco"},"to_user_id_str":"7074534","source":"<a href="http://twitter.com/">web</a>"},{"from_user_id_str":"122523770","profile_image_url":"http://a0.twimg.com/profile_images/1073533262/11646_1076222405045_1810798478_156114_3663737_n_normal.jpg","created_at":"Fri, 25 Feb 2011 17:41:26 +0000","from_user":"msBreChan","id_str":"41191049720238080","metadata":{"result_type":"recent"},"to_user_id":null,"text":"so twitter gt spam now -_-","id":41191049720238080,"from_user_id":122523770,"geo":{"type":"Point","coordinates":[35.2213,-80.8276]},"iso_language_code":"en","place":{"id":"4d5ed95f830e9b41","type":"neighborhood","full_name":"Elizabeth, Charlotte"},"to_user_id_str":null,"source":"<a href="http://mobile.twitter.com" rel="nofollow">Twitter for Android</a>"},{"from_user_id_str":"229024950","profile_image_url":"http://a3.twimg.com/sticky/default_profile_images/default_profile_6_normal.png","created_at":"Fri, 25 Feb 2011 17:41:25 +0000","from_user":"NinaMaechik","id_str":"41191047853912064","metadata":{"result_type":"recent"},"to_user_id":null,"text":"Glad I can remember my twitter password! LOL! Hugs to Nick from his aunties...","id":41191047853912064,"from_user_id":229024950,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"<a href="http://twitter.com/">web</a>"},{"from_user_id_str":"215545822","profile_image_url":"http://a2.twimg.com/profile_images/1239909540/RTL_normal.png","created_at":"Fri, 25 Feb 2011 17:41:25 +0000","from_user":"RightToLaugh","id_str":"41191047371558912","metadata":{"result_type":"recent"},"to_user_id":null,"text":"#Bacon wrapped dates http://bit.ly/hUxVC9 Just like my grandma used to make http://twitter.com/#","id":41191047371558912,"from_user_id":215545822,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"<a href="http://twitter.com/">web</a>"},{"from_user_id_str":"217845526","profile_image_url":"http://a0.twimg.com/sticky/default_profile_images/default_profile_3_normal.png","created_at":"Fri, 25 Feb 2011 17:41:25 +0000","from_user":"TheShoeLooker","id_str":"41191047052804096","metadata":{"result_type":"recent"},"to_user_id":38033240,"text":"@ShoeDazzle love love love to twitter or blog about your deals and shoes!\ntheshoelooker.blogspot.com","id":41191047052804096,"from_user_id":217845526,"to_user":"shoedazzle","geo":null,"iso_language_code":"en","to_user_id_str":"38033240","source":"<a href="http://twitter.com/">web</a>"},{"from_user_id_str":"24953438","profile_image_url":"http://a3.twimg.com/profile_images/1181252763/76820_10150100014748783_677998782_7386695_4377006_n_normal.jpg","created_at":"Fri, 25 Feb 2011 17:41:25 +0000","from_user":"liljermaine32","id_str":"41191046025056256","metadata":{"result_type":"recent"},"to_user_id":null,"text":"Twitter fam I gotta question 4 ya is texas south or midwest #arguments","id":41191046025056256,"from_user_id":24953438,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"<a href="http://mobile.twitter.com" rel="nofollow">Twitter for Android</a>"},{"from_user_id_str":"122451024","profile_image_url":"http://a2.twimg.com/profile_images/1143400129/100706-ETCanada_201134_1__normal.jpg","created_at":"Fri, 25 Feb 2011 17:41:25 +0000","from_user":"Kim_DEon","id_str":"41191045161164801","metadata":{"result_type":"recent"},"to_user_id":null,"text":"Help @CARE @EvaLongoria and ME use Twitter to change the lives of girls in poverty across the world! Ready? Go to http://TwitChange.com NOW!","id":41191045161164801,"from_user_id":122451024,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"<a href="http://twitter.com/">web</a>"},{"from_user_id_str":"128351479","profile_image_url":"http://a3.twimg.com/profile_images/1150198829/Kirby_Photo__edit__normal.JPG","created_at":"Fri, 25 Feb 2011 17:41:24 +0000","from_user":"TheDaveKirby","id_str":"41191043894493184","metadata":{"result_type":"recent"},"to_user_id":null,"text":"Maybe its time to clean house...MY HOUSE http://bit.ly/ejG22P","id":41191043894493184,"from_user_id":128351479,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"<a href="http://www.tweetdeck.com" rel="nofollow">TweetDeck</a>"},{"from_user_id_str":"87512773","profile_image_url":"http://a0.twimg.com/profile_images/1252511106/image_normal.jpg","created_at":"Fri, 25 Feb 2011 17:41:24 +0000","from_user":"supportthehood","id_str":"41191043206610946","metadata":{"result_type":"recent"},"to_user_id":2467330,"text":"@eastcoastmp3 #FF it's the way twitter works !!!","id":41191043206610946,"from_user_id":87512773,"to_user":"eastcoastmp3","geo":null,"iso_language_code":"en","to_user_id_str":"2467330","source":"<a href="http://twitter.com/" rel="nofollow">Twitter for iPhone</a>"},{"from_user_id_str":"149894545","profile_image_url":"http://a0.twimg.com/profile_images/1219334400/patrick_camo_normal.jpg","created_at":"Fri, 25 Feb 2011 17:41:24 +0000","from_user":"suburbpat","id_str":"41191040912330752","metadata":{"result_type":"recent"},"to_user_id":null,"text":"I havent had a rib session on twitter in a while.......I wanna Rib with a nigga with alot followers lol","id":41191040912330752,"from_user_id":149894545,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"<a href="http://twitter.com/">web</a>"},{"from_user_id_str":"97749622","profile_image_url":"http://a0.twimg.com/profile_images/1254640781/IMG1633A_normal.jpg","created_at":"Fri, 25 Feb 2011 17:41:24 +0000","from_user":"jrricardosantos","id_str":"41191040547430400","metadata":{"result_type":"recent"},"to_user_id":101017476,"text":"@yofzs pq tirou o CAPS LOCK? kkkk tava bem legal...hehehe eu estou no t\u00e9dio...kkk tbm estou no twitter e msn..aff!!","id":41191040547430400,"from_user_id":97749622,"to_user":"yofzs","geo":null,"iso_language_code":"en","to_user_id_str":"101017476","source":"<a href="http://twitter.com/">web</a>"},{"from_user_id_str":"94381536","profile_image_url":"http://a1.twimg.com/profile_images/1174777929/41509_100001699300415_3521888_n_normal.jpg","created_at":"Fri, 25 Feb 2011 17:41:23 +0000","from_user":"DonNieves","id_str":"41191038978752512","metadata":{"result_type":"recent"},"to_user_id":151926400,"text":"@TiiH13 cheira meu ovo, no email, no orkut, no msn, no facebook, no twitter, no skype, no spark, no ICQ e no google talk","id":41191038978752512,"from_user_id":94381536,"to_user":"TiiH13","geo":null,"iso_language_code":"en","to_user_id_str":"151926400","source":"<a href="http://twitter.com/">web</a>"},{"from_user_id_str":"211858432","profile_image_url":"http://a3.twimg.com/profile_images/1231906180/JonasBrothers_normal.jpg","created_at":"Fri, 25 Feb 2011 17:41:23 +0000","from_user":"OriginalJBfans","id_str":"41191038328651776","metadata":{"result_type":"recent"},"to_user_id":null,"text":"I literally forgot about this twitter... so whats occuring followers?","id":41191038328651776,"from_user_id":211858432,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"<a href="http://twitter.com/">web</a>"},{"from_user_id_str":"90317132","profile_image_url":"http://a3.twimg.com/profile_images/1230085056/254101635_normal.jpg","created_at":"Fri, 25 Feb 2011 17:41:23 +0000","from_user":"Just2smooth","id_str":"41191036936126464","metadata":{"result_type":"recent"},"to_user_id":null,"text":"Wassup twitter","id":41191036936126464,"from_user_id":90317132,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"<a href="http://ubersocial.com" rel="nofollow">\u00dcberSocial</a>"},{"from_user_id_str":"144771504","profile_image_url":"http://a1.twimg.com/profile_images/1243343836/image_normal.jpg","created_at":"Fri, 25 Feb 2011 17:41:23 +0000","from_user":"ima_b_b_badman","id_str":"41191036638339072","metadata":{"result_type":"recent"},"to_user_id":null,"text":"i been M.I.A. all day twitter my bad","id":41191036638339072,"from_user_id":144771504,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"<a href="http://twitter.com/">web</a>"},{"from_user_id_str":"207922664","profile_image_url":"http://a2.twimg.com/profile_images/1237106108/258356167_normal.jpg","created_at":"Fri, 25 Feb 2011 17:41:22 +0000","from_user":"itsthecarter_","id_str":"41191036013391873","metadata":{"result_type":"recent"},"to_user_id":null,"text":"twitter is a stupid addiction.","id":41191036013391873,"from_user_id":207922664,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"<a href="http://ubersocial.com" rel="nofollow">\u00dcberSocial</a>"},{"from_user_id_str":"150000649","profile_image_url":"http://a0.twimg.com/profile_images/1237789059/belllaa_normal.jpg","created_at":"Fri, 25 Feb 2011 17:41:22 +0000","from_user":"angelicabrvo","id_str":"41191035707207680","metadata":{"result_type":"recent"},"to_user_id":null,"text":"que hubo twitter? huy que feoxd","id":41191035707207680,"from_user_id":150000649,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"<a href="http://blackberry.com/twitter" rel="nofollow">Twitter for BlackBerry\u00ae</a>"},{"from_user_id_str":"185713003","profile_image_url":"http://a2.twimg.com/profile_images/1191421540/163229_177015295661467_154687431227587_492170_2147500_n_normal.jpg","created_at":"Fri, 25 Feb 2011 17:41:22 +0000","from_user":"downbytheshores","id_str":"41191033945595905","metadata":{"result_type":"recent"},"to_user_id":null,"text":"follow us on twitter\nwww.twitter.com/downbytheshores http://fb.me/DhDS915e","id":41191033945595905,"from_user_id":185713003,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"<a href="http://www.facebook.com/twitter" rel="nofollow">Facebook</a>"}],"max_id":41191052790603776,"since_id":38643906774044672,"refresh_url":"?since_id=41191052790603776&q=twitter","next_page":"?page=2&max_id=41191052790603776&rpp=20&lang=en&q=twitter","results_per_page":20,"page":1,"completed_in":0.128719,"warning":"adjusted since_id to 38643906774044672 (), requested since_id was older than allowed -- since_id removed for pagination.","since_id_str":"38643906774044672","max_id_str":"41191052790603776","query":"twitter"} -------------------------------------------------------------------------------- /Data/JsonStream/CLexer.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE BangPatterns #-} 2 | {-# LANGUAGE CPP #-} 3 | {-# LANGUAGE ForeignFunctionInterface #-} 4 | {-# LANGUAGE MultiWayIf #-} 5 | {-# LANGUAGE OverloadedStrings #-} 6 | {-# LANGUAGE RecordWildCards #-} 7 | 8 | module Data.JsonStream.CLexer ( 9 | tokenParser 10 | , unescapeText 11 | ) where 12 | 13 | #if !MIN_VERSION_bytestring(0,10,6) 14 | import Control.Applicative ((<$>)) 15 | #endif 16 | 17 | import Control.Monad (when) 18 | import qualified Data.Aeson as AE 19 | import qualified Data.ByteString as BSW 20 | import qualified Data.ByteString.Char8 as BS 21 | import Data.ByteString.Unsafe (unsafeUseAsCString) 22 | import Data.Scientific (Scientific, scientific) 23 | import Data.Text.Internal.Unsafe (inlinePerformIO) 24 | import Foreign 25 | import Foreign.C.Types 26 | import System.IO.Unsafe (unsafeDupablePerformIO) 27 | 28 | import Data.JsonStream.CLexType 29 | import Data.JsonStream.TokenParser (Element (..), TokenResult (..)) 30 | import Data.JsonStream.Unescape (unescapeText) 31 | 32 | -- | Limit for maximum size of a number; fail if larger number is found 33 | -- this is needed to make this constant-space, otherwise we would eat 34 | -- all memory just memoizing the number. The lexer fails if larger number 35 | -- is encountered. 36 | numberDigitLimit :: Int 37 | numberDigitLimit = 200000 38 | 39 | newtype ResultPtr = ResultPtr { unresPtr :: ForeignPtr () } 40 | 41 | -- | Header for the C routing for batch parsing 42 | data Header = Header { 43 | hdrCurrentState :: !CInt 44 | , hdrStateData :: !CInt 45 | , hdrStateSata2 :: !CInt 46 | 47 | , hdrPosition :: !CInt 48 | , hdrLength :: !CInt 49 | , hdrResultNum :: !CInt 50 | , hdrResultLimit :: !CInt 51 | } deriving (Show) 52 | 53 | defHeader :: Header 54 | defHeader = Header 0 0 0 0 0 0 0 55 | 56 | instance Storable Header where 57 | sizeOf _ = 7 * sizeOf (undefined :: CInt) 58 | alignment _ = sizeOf (undefined :: CInt) 59 | peek ptr = do 60 | state <- peekByteOff ptr 0 61 | sdata1 <- peekByteOff ptr (sizeOf state) 62 | sdata2 <- peekByteOff ptr (2 * sizeOf state) 63 | position <- peekByteOff ptr (3 * sizeOf state) 64 | slength <- peekByteOff ptr (4 * sizeOf state) 65 | sresultnum <- peekByteOff ptr (5 * sizeOf state) 66 | sresultlimit <- peekByteOff ptr (6 * sizeOf state) 67 | return $ Header state sdata1 sdata2 position slength sresultnum sresultlimit 68 | 69 | poke ptr (Header {..}) = do 70 | pokeByteOff ptr 0 hdrCurrentState 71 | pokeByteOff ptr (1 * sizeOf hdrCurrentState) hdrStateData 72 | pokeByteOff ptr (2 * sizeOf hdrCurrentState) hdrStateSata2 73 | pokeByteOff ptr (3 * sizeOf hdrCurrentState) hdrPosition 74 | pokeByteOff ptr (4 * sizeOf hdrCurrentState) hdrLength 75 | pokeByteOff ptr (5 * sizeOf hdrCurrentState) hdrResultNum 76 | pokeByteOff ptr (6 * sizeOf hdrCurrentState) hdrResultLimit 77 | 78 | -- | Hardcoded result record size (see lexer.h) 79 | resultRecSize :: Int 80 | resultRecSize = 4 * sizeOf (undefined :: CInt) + sizeOf (undefined :: CLong) 81 | 82 | peekResultField :: Int -> Int -> ResultPtr -> Int 83 | peekResultField n fieldno fptr = inlinePerformIO $ -- !! Using inlinePerformIO should be safe - we are just reading bytes from memory 84 | withForeignPtr (unresPtr fptr) $ \ptr -> 85 | fromIntegral <$> (peekByteOff ptr (resultRecSize * n + fieldno * isize) :: IO CInt) 86 | where 87 | isize = sizeOf (undefined :: CInt) 88 | 89 | peekResultAddData :: Int -> ResultPtr -> CLong 90 | peekResultAddData n fptr = inlinePerformIO $ -- !! Using inlinePerformIO should be safe - we are just reading bytes from memory 91 | withForeignPtr (unresPtr fptr) $ \ptr -> 92 | fromIntegral <$> (peekByteOff ptr (resultRecSize * n + 4 * isize) :: IO CLong) 93 | where 94 | isize = sizeOf (undefined :: CInt) 95 | 96 | peekResultType :: Int -> ResultPtr -> LexResultType 97 | peekResultType n fptr = inlinePerformIO $ -- !! Using inlinePerformIO should be safe - we are just reading bytes from memory 98 | withForeignPtr (unresPtr fptr) $ \ptr -> 99 | LexResultType <$> peekByteOff ptr (resultRecSize * n) 100 | 101 | foreign import ccall unsafe "lex_json" lexJson :: Ptr CChar -> Ptr Header -> Ptr () -> IO CInt 102 | 103 | -- Call the C lexer. Returns (Error code, Header, (result_count, result_count, ResultPointer)) 104 | callLex :: BS.ByteString -> Header -> (CInt, Header, Int, ResultPtr) 105 | callLex bs hdr = unsafeDupablePerformIO $ -- Using Dupable PerformIO should be safe - at the worst is is executed twice 106 | alloca $ \hdrptr -> do 107 | poke hdrptr (hdr{hdrResultNum=0, hdrLength=fromIntegral $ BS.length bs}) 108 | 109 | bsptr <- unsafeUseAsCString bs return 110 | resptr <- mallocForeignPtrBytes (fromIntegral (hdrResultLimit hdr) * resultRecSize) 111 | res <- withForeignPtr resptr $ \resptr' -> 112 | lexJson bsptr hdrptr resptr' 113 | 114 | hdrres <- peek hdrptr 115 | let !rescount = fromIntegral (hdrResultNum hdrres) 116 | return (res, hdrres, rescount, ResultPtr resptr) 117 | 118 | {-# INLINE substr #-} 119 | substr :: Int -> Int -> BS.ByteString -> BS.ByteString 120 | substr start len = BS.take len . BS.drop start 121 | 122 | data TempData = TempData { 123 | tmpBuffer :: BS.ByteString 124 | , tmpHeader :: Header 125 | , tmpError :: Bool 126 | , tmpNumbers :: [BS.ByteString] 127 | } 128 | 129 | -- | Parse number from bytestring to Scientific using JSON syntax rules 130 | parseNumber :: BS.ByteString -> Maybe Scientific 131 | parseNumber tnumber = do 132 | let 133 | (csign, r1) = parseSign tnumber :: (Int, BS.ByteString) 134 | ((num, numdigits), r2) = parseDecimal r1 :: ((Integer, Int), BS.ByteString) 135 | ((frac, frdigits), r3) = parseFract r2 :: ((Integer, Int), BS.ByteString) 136 | (texp, rest) = parseE r3 137 | when (numdigits == 0 || not (BS.null rest)) Nothing 138 | let dpart = fromIntegral csign * (num * (10 ^ frdigits) + fromIntegral frac) :: Integer 139 | e = texp - frdigits 140 | return $ scientific dpart e 141 | where 142 | parseFract txt 143 | | BS.null txt = ((0, 0), txt) 144 | | BS.head txt == '.' = parseDecimal (BS.tail txt) 145 | | otherwise = ((0,0), txt) 146 | 147 | parseE txt 148 | | BS.null txt = (0, txt) 149 | | firstc == 'e' || firstc == 'E' = 150 | let (sign, rest) = parseSign (BS.tail txt) 151 | ((dnum, _), trest) = parseDecimal rest :: ((Int, Int), BS.ByteString) 152 | in (dnum * sign, trest) 153 | | otherwise = (0, txt) 154 | where 155 | firstc = BS.head txt 156 | 157 | parseSign txt 158 | | BS.null txt = (1, txt) 159 | | BS.head txt == '+' = (1, BS.tail txt) 160 | | BS.head txt == '-' = (-1, BS.tail txt) 161 | | otherwise = (1, txt) 162 | 163 | parseDecimal txt 164 | | BS.null txt = ((0, 0), txt) 165 | | otherwise = parseNum txt (0,0) 166 | 167 | parseNum txt (!start, !digits) 168 | | BS.null txt = ((start, digits), txt) 169 | | dchr >= 48 && dchr <= 57 = parseNum (BS.tail txt) (start * 10 + fromIntegral (dchr - 48), digits + 1) 170 | | otherwise = ((start, digits), txt) 171 | where 172 | dchr = BSW.head txt 173 | 174 | -- | Parse particular result 175 | parseResults :: TempData -> (CInt, Header, Int, ResultPtr) -> TokenResult 176 | parseResults TempData{tmpNumbers=tmpNumbers, tmpBuffer=bs} (err, hdr, rescount, resptr) = parse 0 177 | where 178 | newtemp = TempData bs hdr (err /= 0) 179 | -- We iterate the items from CNT to 1, 1 is the last element, CNT is the first 180 | parse n 181 | | n >= rescount = getNextResult (newtemp tmpNumbers) 182 | | otherwise = 183 | let resType = peekResultType n resptr 184 | resStartPos = peekResultField n 1 resptr 185 | resLength = peekResultField n 2 resptr 186 | resAddData = peekResultAddData n resptr 187 | next = parse (n + 1) 188 | context = BS.drop (resStartPos + resLength) bs 189 | textSection = substr resStartPos resLength bs 190 | in case () of 191 | _| resType == resNumberPartial -> 192 | if | resAddData == 0 -> getNextResult (newtemp [textSection]) -- First part of number 193 | | sum (map BS.length tmpNumbers) > numberDigitLimit -> TokFailed -- Number too long 194 | | otherwise -> getNextResult (newtemp (textSection:tmpNumbers)) -- Middle part of number 195 | | resType == resTrue -> PartialResult (JValue (AE.Bool True)) next 196 | | resType == resFalse -> PartialResult (JValue (AE.Bool False)) next 197 | | resType == resNull -> PartialResult (JValue AE.Null) next 198 | | resType == resOpenBrace -> PartialResult ObjectBegin next 199 | | resType == resOpenBracket -> PartialResult ArrayBegin next 200 | -- ObjectEnd and ArrayEnd need pointer to data that wasn't parsed 201 | | resType == resCloseBrace -> PartialResult (ObjectEnd context) next 202 | | resType == resCloseBracket -> PartialResult (ArrayEnd context) next 203 | -- Number optimized - integer 204 | | resType == resNumberSmall -> 205 | if | resLength == 0 -> PartialResult (JInteger resAddData) next 206 | | otherwise -> PartialResult 207 | (JValue (AE.Number $ scientific (fromIntegral resAddData) ((-1) * resLength))) 208 | next 209 | -- Number optimized - floating 210 | | resType == resNumber -> 211 | if | resAddData == 0 -> -- Single one-part number 212 | case parseNumber textSection of 213 | Just num -> PartialResult (JValue (AE.Number num)) next 214 | Nothing -> TokFailed 215 | | otherwise -> -- Concatenate number from partial parts 216 | case parseNumber (BS.concat $ reverse (textSection:tmpNumbers)) of 217 | Just num -> PartialResult (JValue (AE.Number num)) next 218 | Nothing -> TokFailed 219 | | resType == resString -> 220 | if | resAddData == -1 || resAddData == 0 -> -- One-part string without escaped characters; with escaped 221 | PartialResult (StringRaw textSection (resAddData == -1) (BS.drop 1 context)) next -- (tail - skip the last '"') 222 | | otherwise -> PartialResult (StringContent textSection) -- Final part of partial strings 223 | (PartialResult (StringEnd (BS.drop 1 context)) next ) -- (tail - skip the last '"') 224 | | resType == resStringPartial -> 225 | PartialResult (StringContent textSection) next -- string section 226 | | otherwise -> error "Unsupported" 227 | 228 | -- | Estimate number of elements in a chunk 229 | estResultLimit :: BS.ByteString -> CInt 230 | estResultLimit dta = fromIntegral $ 20 + BS.length dta `quot` 5 231 | 232 | getNextResult :: TempData -> TokenResult 233 | getNextResult tmp@TempData{..} 234 | | tmpError = TokFailed 235 | | hdrPosition tmpHeader < hdrLength tmpHeader = parseResults tmp (callLex tmpBuffer tmpHeader) 236 | | otherwise = TokMoreData newdata 237 | where 238 | newdata dta = parseResults newtmp (callLex dta newhdr{hdrResultLimit=estResultLimit dta}) 239 | where 240 | newtmp = tmp{tmpBuffer=dta} 241 | newhdr = tmpHeader{hdrPosition=0, hdrLength=fromIntegral $ BS.length dta} 242 | 243 | 244 | tokenParser :: BS.ByteString -> TokenResult 245 | tokenParser dta = getNextResult (TempData dta newhdr False []) 246 | where 247 | newhdr = defHeader{hdrLength=fromIntegral (BS.length dta), hdrResultLimit=estResultLimit dta} 248 | -------------------------------------------------------------------------------- /benchmarks/json-data/numbers.json: -------------------------------------------------------------------------------- 1 | [1.15, 1.3224999999999998, 1.5208749999999998, 1.7490062499999994, 2.0113571874999994, 2.313060765624999, 2.6600198804687487, 3.0590228625390607, 3.5178762919199196, 4.045557735707907, 4.652391396064092, 5.350250105473706, 6.152787621294761, 7.075705764488975, 8.137061629162321, 9.357620873536668, 10.761264004567169, 12.375453605252241, 14.231771646040077, 16.36653739294609, 18.821518001888, 21.644745702171196, 24.891457557496874, 28.625176191121405, 32.918952619789614, 37.856795512758055, 43.535314839671756, 50.06561206562252, 57.57545387546589, 66.21177195678577, 76.14353775030362, 87.56506841284916, 100.69982867477653, 115.804802975993, 133.17552342239193, 153.15185193575073, 176.1246297261133, 202.5433241850303, 232.9248228127848, 267.86354623470254, 308.04307816990786, 354.24953989539404, 407.3869708797031, 468.49501651165855, 538.7692689884072, 619.5846593366683, 712.5223582371685, 819.4007119727437, 942.3108187686552, 1083.6574415839534, 1246.2060578215462, 1433.136966494778, 1648.1075114689947, 1895.323638189344, 2179.622183917745, 2506.565511505407, 2882.5503382312177, 3314.9328889659, 3812.172822310785, 4383.998745657402, 5041.598557506012, 5797.838341131914, 6667.5140923017, 7667.641206146955, 8817.787387068996, 10140.455495129345, 11661.523819398746, 13410.752392308557, 15422.365251154839, 17735.720038828065, 20396.078044652273, 23455.489751350113, 26973.813214052625, 31019.885196160518, 35672.867975584595, 41023.79817192228, 47177.36789771062, 54253.97308236721, 62392.06904472228, 71750.87940143062, 82513.5113116452, 94890.53800839197, 109124.11870965076, 125492.73651609836, 144316.6469935131, 165964.14404254008, 190858.76564892105, 219487.5804962592, 252410.71757069806, 290272.32520630275, 333813.17398724816, 383885.1500853353, 441467.9225981356, 507688.1109878559, 583841.3276360342, 671417.5267814393, 772130.1557986551, 887949.6791684533, 1021142.1310437213, 1174313.4507002793, 1350460.4683053212, 1553029.5385511192, 1785983.969333787, 2053881.564733855, 2361963.7994439327, 2716258.369360523, 3123697.124764601, 3592251.6934792907, 4131089.447501184, 4750752.864626361, 5463365.794320315, 6282870.663468362, 7225301.262988616, 8309096.452436907, 9555460.920302443, 10988780.058347808, 12637097.067099977, 14532661.627164973, 16712560.871239718, 19219445.001925673, 22102361.752214525, 25417716.0150467, 29230373.417303704, 33614929.42989926, 38657168.84438414, 44455744.17104176, 51124105.79669802, 58792721.66620272, 67611629.91613312, 77753374.40355308, 89416380.56408603, 102828837.64869894, 118253163.29600377, 135991137.79040432, 156389808.45896497, 179848279.7278097, 206825521.68698114, 237849349.94002828, 273526752.4310325, 314555765.2956874, 361739130.09004045, 415999999.60354644, 478399999.5440784, 550159999.4756901, 632683999.3970436, 727586599.3066001, 836724589.20259, 962233277.5829784, 1106568269.2204251, 1272553509.6034887, 1463436536.044012, 1682952016.4506137, 1935394818.9182055, 2225704041.755936, 2559559648.019326, 2943493595.222225, 3385017634.5055585, 3892770279.681392, 4476685821.6336, 5148188694.87864, 5920416999.1104355, 6808479548.977001, 7829751481.32355, 9004214203.522081, 10354846334.050394, 11908073284.157951, 13694284276.781643, 15748426918.29889, 18110690956.04372, 20827294599.450275, 23951388789.367817, 27544097107.772987, 31675711673.938934, 36427068425.02977, 41891128688.78423, 48174797992.10186, 55401017690.91714, 63711170344.5547, 73267845896.2379, 84258022780.67358, 96896726197.77461, 111431235127.4408, 128145920396.5569, 147367808456.04044, 169472979724.44647, 194893926683.11343, 224128015685.58044, 257747218038.41748, 296409300744.1801, 340870695855.80707, 392001300234.1781, 450801495269.3048, 518421719559.70044, 596184977493.6555, 685612724117.7037, 788454632735.3593, 906722827645.6631, 1042731251792.5125, 1199140939561.3892, 1379012080495.5974, 1585863892569.937, 1823743476455.4275, 2097304997923.7415, 2411900747612.3022, 2773685859754.1475, 3189738738717.2695, 3668199549524.8594, 4218429481953.5884, 4851193904246.626, 5578872989883.619, 6415703938366.162, 7378059529121.086, 8484768458489.248, 9757483727262.635, 11221106286352.03, 12904272229304.832, 14839913063700.555, 17065900023255.637, 19625785026743.98, 22569652780755.58, 25955100697868.91, 29848365802549.246, 34325620672931.63, 39474463773871.375, 45395633339952.07, 52204978340944.88, 60035725092086.61, 69041083855899.59, 79397246434284.53, 91306833399427.2, 105002858409341.27, 120753287170742.45, 138866280246353.81, 159696222283306.88, 183650655625802.88, 211198253969673.3, 242877992065124.28, 279309690874892.9, 321206144506126.8, 369387066182045.8, 424795126109352.6, 488514395025755.5, 561791554279618.75, 646060287421561.5, 742969330534795.8, 854414730115015.0, 982576939632267.1, 1129963480577107.2, 1299458002663673.2, 1494376703063224.0, 1718533208522707.5, 1976313189801113.5, 2272760168271280.5, 2613674193511972.0, 3005725322538767.5, 3456584120919582.5, 3975071739057519.5, 4571332499916147.0, 5257032374903569.0, 6045587231139104.0, 6952425315809969.0, 7995289113181464.0, 9194582480158682.0, 1.0573769852182484e+16, 1.2159835330009856e+16, 1.3983810629511332e+16, 1.6081382223938032e+16, 1.8493589557528736e+16, 2.1267627991158044e+16, 2.4457772189831748e+16, 2.8126438018306508e+16, 3.234540372105248e+16, 3.719721427921035e+16, 4.27767964210919e+16, 4.919331588425568e+16, 5.657231326689403e+16, 6.505816025692813e+16, 7.481688429546734e+16, 8.603941693978744e+16, 9.894532948075555e+16, 1.1378712890286886e+17, 1.3085519823829918e+17, 1.5048347797404406e+17, 1.7305599967015066e+17, 1.9901439962067325e+17, 2.288665595637742e+17, 2.6319654349834032e+17, 3.026760250230913e+17, 3.48077428776555e+17, 4.002890430930382e+17, 4.603323995569939e+17, 5.29382259490543e+17, 6.087895984141244e+17, 7.00108038176243e+17, 8.051242439026793e+17, 9.258928804880812e+17, 1.0647768125612933e+18, 1.224493334445487e+18, 1.4081673346123103e+18, 1.6193924348041567e+18, 1.8623013000247798e+18, 2.1416464950284966e+18, 2.462893469282771e+18, 2.8323274896751867e+18, 3.257176613126464e+18, 3.7457531050954337e+18, 4.3076160708597484e+18, 4.95375848148871e+18, 5.696822253712016e+18, 6.551345591768818e+18, 7.53404743053414e+18, 8.66415454511426e+18, 9.963777726881399e+18, 1.1458344385913608e+19, 1.3177096043800648e+19, 1.5153660450370744e+19, 1.7426709517926355e+19, 2.0040715945615307e+19, 2.30468233374576e+19, 2.650384683807624e+19, 3.047942386378767e+19, 3.505133744335582e+19, 4.030903805985919e+19, 4.635539376883806e+19, 5.330870283416377e+19, 6.130500825928833e+19, 7.0500759498181575e+19, 8.10758734229088e+19, 9.323725443634512e+19, 1.0722284260179688e+20, 1.233062689920664e+20, 1.4180220934087634e+20, 1.6307254074200778e+20, 1.8753342185330894e+20, 2.1566343513130526e+20, 2.4801295040100103e+20, 2.8521489296115116e+20, 3.2799712690532385e+20, 3.7719669594112236e+20, 4.337762003322907e+20, 4.988426303821342e+20, 5.7366902493945437e+20, 6.597193786803724e+20, 7.586772854824282e+20, 8.724788783047925e+20, 1.0033507100505113e+21, 1.1538533165580878e+21, 1.326931314041801e+21, 1.5259710111480708e+21, 1.7548666628202813e+21, 2.0180966622433234e+21, 2.3208111615798217e+21, 2.668932835816795e+21, 3.0692727611893135e+21, 3.529663675367711e+21, 4.059113226672867e+21, 4.667980210673796e+21, 5.368177242274866e+21, 6.173403828616095e+21, 7.099414402908508e+21, 8.164326563344784e+21, 9.388975547846502e+21, 1.0797321880023475e+22, 1.2416920162026996e+22, 1.4279458186331043e+22, 1.64213769142807e+22, 1.8884583451422802e+22, 2.171727096913622e+22, 2.497486161450665e+22, 2.872109085668265e+22, 3.3029254485185042e+22, 3.798364265796279e+22, 4.368118905665721e+22, 5.0233367415155795e+22, 5.7768372527429155e+22, 6.643362840654352e+22, 7.639867266752504e+22, 8.78584735676538e+22, 1.0103724460280185e+23, 1.1619283129322213e+23, 1.3362175598720545e+23, 1.5366501938528623e+23, 1.7671477229307915e+23, 2.0322198813704103e+23, 2.3370528635759717e+23, 2.687610793112367e+23, 3.090752412079222e+23, 3.554365273891105e+23, 4.0875200649747705e+23, 4.700648074720986e+23, 5.405745285929133e+23, 6.216607078818502e+23, 7.149098140641278e+23, 8.221462861737468e+23, 9.454682290998088e+23, 1.0872884634647801e+24, 1.250381732984497e+24, 1.4379389929321714e+24, 1.653629841871997e+24, 1.9016743181527962e+24, 2.1869254658757156e+24, 2.5149642857570727e+24, 2.8922089286206334e+24, 3.326040267913728e+24, 3.824946308100787e+24, 4.3986882543159046e+24, 5.05849149246329e+24, 5.817265216332783e+24, 6.6898549987827e+24, 7.693333248600105e+24, 8.84733323589012e+24, 1.0174433221273637e+25, 1.1700598204464681e+25, 1.3455687935134384e+25, 1.547404112540454e+25, 1.779514729421522e+25, 2.04644193883475e+25, 2.353408229659962e+25, 2.7064194641089563e+25, 3.1123823837253e+25, 3.5792397412840944e+25, 4.116125702476708e+25, 4.733544557848214e+25, 5.443576241525446e+25, 6.260112677754262e+25, 7.199129579417401e+25, 8.27899901633001e+25, 9.52084886877951e+25, 1.0948976199096437e+26, 1.25913226289609e+26, 1.4480021023305035e+26, 1.665202417680079e+26, 1.9149827803320907e+26, 2.202230197381904e+26, 2.5325647269891895e+26, 2.9124494360375676e+26, 3.349316851443203e+26, 3.8517143791596826e+26, 4.429471536033635e+26, 5.09389226643868e+26, 5.857976106404481e+26, 6.736672522365153e+26, 7.747173400719924e+26, 8.909249410827912e+26, 1.0245636822452099e+27, 1.1782482345819913e+27, 1.35498546976929e+27, 1.5582332902346833e+27, 1.7919682837698857e+27, 2.0607635263353683e+27, 2.3698780552856732e+27, 2.725359763578524e+27, 3.1341637281153025e+27, 3.6042882873325974e+27, 4.1449315304324867e+27, 4.7666712599973594e+27, 5.481671948996963e+27, 6.303922741346507e+27, 7.249511152548482e+27, 8.336937825430755e+27, 9.587478499245366e+27, 1.102560027413217e+28, 1.2679440315251995e+28, 1.4581356362539794e+28, 1.6768559816920761e+28, 1.9283843789458875e+28, 2.2176420357877703e+28, 2.5502883411559357e+28, 2.932831592329326e+28, 3.3727563311787245e+28, 3.8786697808555327e+28, 4.460470247983863e+28, 5.129540785181441e+28, 5.8989719029586575e+28, 6.783817688402455e+28, 7.801390341662823e+28, 8.971598892912247e+28, 1.0317338726849081e+29, 1.1864939535876443e+29, 1.3644680466257909e+29, 1.5691382536196593e+29, 1.8045089916626083e+29, 2.0751853404119993e+29, 2.3864631414737988e+29, 2.7444326126948684e+29, 3.1560975045990984e+29, 3.629512130288963e+29, 4.173938949832307e+29, 4.800029792307153e+29, 5.520034261153225e+29, 6.348039400326208e+29, 7.300245310375139e+29, 8.39528210693141e+29, 9.65457442297112e+29, 1.1102760586416787e+30, 1.2768174674379305e+30, 1.46834008755362e+30, 1.6885911006866628e+30, 1.941879765789662e+30, 2.2331617306581113e+30, 2.568135990256828e+30, 2.9533563887953516e+30, 3.396359847114654e+30, 3.905813824181852e+30, 4.491685897809129e+30, 5.165438782480498e+30, 5.940254599852573e+30, 6.831292789830458e+30, 7.855986708305026e+30, 9.034384714550779e+30, 1.0389542421733396e+31, 1.1947973784993405e+31, 1.3740169852742412e+31, 1.5801195330653773e+31, 1.817137463025184e+31, 2.0897080824789613e+31, 2.4031642948508052e+31, 2.7636389390784257e+31, 3.1781847799401893e+31, 3.6549124969312175e+31, 4.2031493714709e+31, 4.833621777191535e+31, 5.558665043770264e+31, 6.392464800335803e+31, 7.3513345203861735e+31, 8.454034698444098e+31, 9.722139903210713e+31, 1.1180460888692319e+32, 1.2857530021996165e+32, 1.478615952529559e+32, 1.7004083454089925e+32, 1.9554695972203414e+32, 2.2487900368033926e+32, 2.586108542323901e+32, 2.974024823672486e+32, 3.420128547223359e+32, 3.933147829306862e+32, 4.523120003702891e+32, 5.2015880042583246e+32, 5.981826204897073e+32, 6.879100135631634e+32, 7.910965155976377e+32, 9.097609929372833e+32, 1.0462251418778757e+33, 1.2031589131595571e+33, 1.3836327501334904e+33, 1.591177662653514e+33, 1.8298543120515409e+33, 2.104332458859272e+33, 2.4199823276881623e+33, 2.7829796768413863e+33, 3.200426628367594e+33, 3.6804906226227334e+33, 4.232564216016143e+33, 4.867448848418564e+33, 5.597566175681348e+33, 6.437201102033549e+33, 7.402781267338582e+33, 8.513198457439368e+33, 9.790178226055273e+33, 1.1258704959963564e+34, 1.2947510703958096e+34, 1.488963730955181e+34, 1.712308290598458e+34, 1.9691545341882266e+34, 2.2645277143164603e+34, 2.604206871463929e+34, 2.994837902183518e+34, 3.4440635875110457e+34, 3.960673125637702e+34, 4.554774094483357e+34, 5.237990208655861e+34, 6.023688739954239e+34, 6.927242050947375e+34, 7.96632835858948e+34, 9.161277612377902e+34, 1.0535469254234585e+35, 1.2115789642369772e+35, 1.3933158088725236e+35, 1.6023131802034022e+35, 1.8426601572339122e+35, 2.119059180818999e+35, 2.4369180579418488e+35, 2.8024557666331258e+35, 3.2228241316280944e+35, 3.706247751372308e+35, 4.262184914078154e+35, 4.901512651189877e+35, 5.636739548868358e+35, 6.482250481198611e+35, 7.454588053378403e+35, 8.572776261385162e+35, 9.858692700592935e+35, 1.1337496605681875e+36, 1.3038121096534156e+36, 1.499383926101428e+36, 1.7242915150166418e+36, 1.982935242269138e+36, 2.2803755286095084e+36] 2 | -------------------------------------------------------------------------------- /benchmarks/json-data/jp10.json: -------------------------------------------------------------------------------- 1 | {"results":[{"from_user_id_str":"2458313","profile_image_url":"http://a2.twimg.com/profile_images/1203653060/fure091226_normal.png","created_at":"Thu, 27 Jan 2011 20:30:04 +0000","from_user":"19princess","id_str":"30724239224995840","metadata":{"result_type":"recent"},"to_user_id":null,"text":"\u6d77\u6674\u300c\u672c\u65e5\u306e\u6771\u4eac\u306e\u304a\u5929\u6c17\u306f\u6674\u6642\u3005\u66c7\u3067\u3057\u3087\u3046\u3002\u6700\u9ad8\u6c17\u6e29\u306f8\u5ea6\u3001\u6700\u4f4e\u6c17\u6e29\u306f1\u5ea6\u3067\u3059\u3002\u3042\u306a\u305f\u306e\u4eca\u65e5\u306e\u4eba\u751f\u306b\u3068\u3073\u3063\u304d\u308a\u306e\u304a\u5929\u6c17\u3092\u2665\u300d","id":30724239224995840,"from_user_id":2458313,"geo":null,"iso_language_code":"ja","to_user_id_str":null,"source":"<a href="http://babyprincess.sakura.ne.jp/about/" rel="nofollow">\u5929\u4f7f\u5bb6\u306e\u88cf\u5c71</a>"},{"from_user_id_str":"66578965","profile_image_url":"http://a0.twimg.com/profile_images/1183763267/fossetta_normal.png","created_at":"Thu, 27 Jan 2011 20:00:04 +0000","from_user":"Fossetta_Tokyo","id_str":"30716689779785729","metadata":{"result_type":"recent"},"to_user_id":null,"text":"\u304a\u306f\u3088\u3046\u3054\u3056\u3044\u307e\u3059!!\u6771\u4eac\u90fd\u3001\u672c\u65e5\u306e\u304a\u5929\u6c17\u306f\u6674\u6642\u3005\u66c7\u3002\u6700\u9ad8\u6c17\u6e298\u5ea6\u3001\u6700\u4f4e\u6c17\u6e291\u5ea6\u3002\u6771\u4eac\u5730\u65b9\u3067\u306f\u3001\u7a7a\u6c17\u306e\u4e7e\u71e5\u3057\u305f\u72b6\u614b\u304c\u7d9a\u3044\u3066\u3044\u307e\u3059\u3002\u706b\u306e\u53d6\u308a\u6271\u3044\u306b\u6ce8\u610f\u3057\u3066\u4e0b\u3055\u3044\u3002\u4f0a\u8c46\u8af8\u5cf6\u3068\u5c0f\u7b20\u539f\u8af8\u5cf6\u306b\u306f\u3001\u5f37\u98a8\u3001\u6ce2\u6d6a\u3001\u4e7e\u71e5\u3001\u971c\u306e\u6ce8\u610f\u5831\u3092\u767a\u8868\u4e2d\u3067\u3059\u3002","id":30716689779785729,"from_user_id":66578965,"geo":null,"iso_language_code":"ja","to_user_id_str":null,"source":"<a href="http://bit.ly/Fossetta" rel="nofollow">\u30d5\u30a9\u30bb\u30c3\u30bf ver.3.1.1</a>"},{"from_user_id_str":"104041146","profile_image_url":"http://a2.twimg.com/profile_images/863965118/001jwatokyo_normal.jpg","created_at":"Thu, 27 Jan 2011 19:44:33 +0000","from_user":"jwa_tokyo","id_str":"30712787537756160","metadata":{"result_type":"recent"},"to_user_id":null,"text":"\u6c17\u8c61\u5e81\u304b\u3089\u6771\u4eac\u306e\u5929\u6c17\u4e88\u5831\u304c\u767a\u8868\u3055\u308c\u307e\u3057\u305f\u3000\u305d\u306e\u5929\u6c17\u4e88\u5831\u3092\u3053\u3053\u3067\u3064\u3076\u3084\u3053\u3046\u304b\u306a\u3041\uff5e\uff1f\u3000\u8003\u3048\u4e2d\u3000\u307e\u3066\u6b21\u53f7\uff08\u7b11","id":30712787537756160,"from_user_id":104041146,"geo":null,"iso_language_code":"ja","to_user_id_str":null,"source":"<a href="http://www.docodemo.jp/twil/" rel="nofollow">Twil2 (Tweet Anytime, Anywhere by Mail)</a>"},{"from_user_id_str":"144500192","profile_image_url":"http://a3.twimg.com/a/1294874399/images/default_profile_3_normal.png","created_at":"Thu, 27 Jan 2011 18:59:33 +0000","from_user":"kyo4to4","id_str":"30701462774358016","metadata":{"result_type":"recent"},"to_user_id":null,"text":"\u6771\u4eac\u90fd \u516b\u4e08\u5cf6 - \u4eca\u65e5\u306e\u5929\u6c17\u306f\u30fb\u30fb\u30fb\u66c7\u306e\u3061\u6674\u3067\u3059\u306e\uff01","id":30701462774358016,"from_user_id":144500192,"geo":null,"iso_language_code":"ja","to_user_id_str":null,"source":"<a href="http://twitter.com/kyo4to4" rel="nofollow">lost-sheep-bot</a>"},{"from_user_id_str":"165724582","profile_image_url":"http://a0.twimg.com/profile_images/1222842347/163087_1256281064506_1753997852_484926_2761052_n_normal.jpg","created_at":"Thu, 27 Jan 2011 17:39:06 +0000","from_user":"Rifqi_19931020","id_str":"30681213748387840","metadata":{"result_type":"recent"},"to_user_id":113796067,"text":"@CHLionRagbaby \u30b1\u30f3\u3061\u3083\u3093\u3001\u671d\u4eca\u307e\u3067\u306e\u81ea\u5206\u306e\u30db\u30fc\u30e0\u30a8\u30ea\u30a2\u304b\u3089\u307e\u3060\u975e\u5e38\u306b\u5bd2\u3044\u3068\u96e8\u304c\u964d\u3063\u3066\u3044\u305f..\u3069\u306e\u3088\u3046\u306b\u73fe\u5728\u306e\u6771\u4eac\u306e\u5929\u6c17\uff1f","id":30681213748387840,"from_user_id":165724582,"to_user":"CHLionRagbaby","geo":null,"iso_language_code":"ja","to_user_id_str":"113796067","source":"<a href="http://twitter.com/">web</a>"},{"from_user_id_str":"120911929","profile_image_url":"http://a0.twimg.com/profile_images/772435076/20100312___capture2_normal.png","created_at":"Thu, 27 Jan 2011 17:28:30 +0000","from_user":"Cirno_fan","id_str":"30678546020040705","metadata":{"result_type":"recent"},"to_user_id":null,"text":"\u4eca\u65e5\u306e\u6771\u4eac\u306e\u5929\u6c17\u306f\u3001\u66c7\u6642\u3005\u6674\u3067\u6700\u9ad8\u6c17\u6e29\u306f8\u2103\uff01 \u6700\u4f4e\u6c17\u6e29\u306f2\u2103\u3060\u3063\u305f\u3088\uff01 RT @mimi22999 \uff08\uff65\u2200\uff65\uff09\uff4c\u3001\u865a\u5f31\u306a\u8005\u306b\u3068\u3063\u3066\u3001\u6717\u3089\u304b\u306a\u9854\u306f\u4e0a\u5929\u6c17\u3068\u540c\u3058\u304f\u3089\u3044\u3046\u308c\u3057\u3044\u3082\u306e\u3060\u3002\u30d5\u30e9\u30f3\u30af\u30ea\u30f3 #tenki #bot","id":30678546020040705,"from_user_id":120911929,"geo":null,"iso_language_code":"ja","to_user_id_str":null,"source":"<a href="http://blog.livedoor.jp/fairycirno/archives/34686.html" rel="nofollow">\u5e7b\u60f3\u90f7 \u9727\u306e\u6e56</a>"},{"from_user_id_str":"65976527","profile_image_url":"http://a0.twimg.com/profile_images/452810990/little_italies_____normal.jpg","created_at":"Thu, 27 Jan 2011 17:00:03 +0000","from_user":"heta_weather01","id_str":"30671388754845696","metadata":{"result_type":"recent"},"to_user_id":null,"text":"\u3010\u30d8\u30bf\u5929\u3011Tere hommikust.\u30a8\u30b9\u30c8\u30cb\u30a2\u3067\u3059\u3002\u4eca\u65e5\u306e\u6771\u4eac\u306e\u5929\u6c17\u306f\u6674\u6642\u3005\u66c7\u3067\u660e\u65e5\u306f\u66c7\u308a\u3067\u3059\u3002\u3061\u306a\u307f\u306b\u50d5\u306e\u3068\u3053\u308d\u3067\u306f\u66c7\u308a\u3067\u6c17\u6e29-7\u2103\u3067\u3059\u3002\u3061\u306a\u307f\u306b\u30b9\u30ab\u30a4\u30d7\u306e\u958b\u767a\u672c\u90e8\u306f\u50d5\u306e\u5bb6\u306b\u3042\u308b\u3093\u3067\u3059\u3002","id":30671388754845696,"from_user_id":65976527,"geo":null,"iso_language_code":"ja","to_user_id_str":null,"source":"<a href="http://www15.atpages.jp/~kageyanma/" rel="nofollow">\u5730\u7403\u306e\u4e2d</a>"},{"from_user_id_str":"7278433","profile_image_url":"http://a3.twimg.com/profile_images/1218965303/michael-1_normal.png","created_at":"Thu, 27 Jan 2011 16:50:16 +0000","from_user":"shimohiko","id_str":"30668926035689472","metadata":{"result_type":"recent"},"to_user_id":null,"text":"\u3042\u308a\u3002\n\u3010\u7f8e\u4eba\u5929\u6c17/\u6771\u4eac\u3011\u7f8e\u4eba\u5929\u6c17\u30ad\u30e3\u30b9\u30bf\u30fc\u306e"\u3055\u3042\u3084\u3093\u3055\u3093"\u306b\u3088\u308b\u3068\u300c1/29(\u571f)\u306f\u304f\u3082\u308a\u3067\u3001\u964d\u6c34\u78ba\u738740%\u3001\u6700\u9ad8\u6c17\u6e29\u306f6\u2103\u3067\u6700\u4f4e\u6c17\u6e29\u306f1\u2103\u3067\u3059\u300d\u7f8e\u4eba\u5929\u6c17\u21d2http://bit.ly/djB8th http://twitpic.com/3twnl0 #bt_tenki","id":30668926035689472,"from_user_id":7278433,"geo":null,"iso_language_code":"ja","to_user_id_str":null,"source":"<a href="http://bijintenki.jp" rel="nofollow">bijintenki.jp</a>"},{"from_user_id_str":"61770","profile_image_url":"http://a3.twimg.com/profile_images/1206955079/tw172a_normal.png","created_at":"Thu, 27 Jan 2011 16:40:52 +0000","from_user":"rsky","id_str":"30666561853333504","metadata":{"result_type":"recent"},"to_user_id":null,"text":"\u3075\u3068\u591c\u7a7a\u3092\u898b\u4e0a\u3052\u3066\u30aa\u30ea\u30aa\u30f3\u304c\u898b\u3048\u306a\u304f\u3066\u300c\u6771\u4eac\u306b\u306f\u7a7a\u304c\u306a\u3044\u300d\u3068\u667a\u6075\u5b50\u306e\u3088\u3046\u306a\u3053\u3068\u3092\u601d\u3063\u305f\u308f\u3051\u3060\u304c\u3001\u305f\u3076\u3093\u534a\u5206\u3050\u3089\u3044\u306f\u5929\u6c17\u306e\u305b\u3044\u3067\u3059","id":30666561853333504,"from_user_id":61770,"geo":null,"iso_language_code":"ja","to_user_id_str":null,"source":"<a href="http://www.hootsuite.com" rel="nofollow">HootSuite</a>"},{"from_user_id_str":"103259265","profile_image_url":"http://a0.twimg.com/profile_images/767857973/anime_icon_normal.gif","created_at":"Thu, 27 Jan 2011 16:37:42 +0000","from_user":"liveshowonly","id_str":"30665765698928640","metadata":{"result_type":"recent"},"to_user_id":null,"text":"iPhone\u5929\u6c17\u3002\u6771\u4eac4\u2103\u3063\u3066\u7d50\u69cb\u5bd2\u304f\u306a\u3044\u3058\u3083\u3093\u3002\uff08\u78ba\u304b\u306b\u3082\u306e\u51c4\u304f\u5bd2\u3044\u8a33\u3067\u306f\u7121\u3044\uff09\u3002\u798f\u5ca1\u5e02\u306f1\u2103\u3060\u3002\u52dd\u3061\u3060\u305c\u3002Rock'n'roll","id":30665765698928640,"from_user_id":103259265,"geo":null,"iso_language_code":"ja","to_user_id_str":null,"source":"<a href="http://twitter.com/" rel="nofollow">Twitter for iPhone</a>"},{"from_user_id_str":"165242761","profile_image_url":"http://a1.twimg.com/profile_images/1147334744/SN3E00600001_normal.jpg","created_at":"Thu, 27 Jan 2011 16:37:35 +0000","from_user":"deuxavril0502","id_str":"30665736556912640","metadata":{"result_type":"recent"},"to_user_id":null,"text":"\u6b8b\u5ff5\u3001\u79c1\u306f\u6c96\u7e04\u3067\u3059(>_<)\u3067\u3082\u304a\u5929\u6c17\u826f\u3055\u305d\u3046\u3067\u826f\u304b\u3063\u305f\u306d\u3002\u3053\u3061\u3089\u306f\u96e8\u3088\uff5eRT @rinandy2010: \u305d\u3046\u3067\u3059\u3063\u266a\u51fa\u5f35\u3067(^^)\u304a\u306d\u3048\u3055\u307e\u306f\u6771\u4eac\u3067\u3059\u304b\uff1f\u3081\u3063\u3061\u3083\u5929\u6c17\u826f\u304f\u3066\u3073\u3063\u304f\u308a\u3067\u3059\u3002\u624b\u888b\u3068\u304b\u5168\u7136\u3044\u3089\u306a\u3044\u3067\u3059\u306d\u30fc RT @deuxavril0502 \u6771\u4eac\u306a\u306e\u30fc\uff1f","id":30665736556912640,"from_user_id":165242761,"geo":null,"iso_language_code":"ja","to_user_id_str":null,"source":"<a href="http://www.echofon.com/" rel="nofollow">Echofon</a>"},{"from_user_id_str":"163900288","profile_image_url":"http://a1.twimg.com/profile_images/1210224823/icon12945064955238_normal.jpg","created_at":"Thu, 27 Jan 2011 16:24:37 +0000","from_user":"nyao_yurichan","id_str":"30662472734089216","metadata":{"result_type":"recent"},"to_user_id":100985873,"text":"@ray_ko302 \u767a\u898b\u3042\u308a\u304c\u3068\u3067\u3059\u3045\u3002\u3053\u3061\u3089\u3067\u3082\u3088\u308d\u3057\u304f\u306d\u3002\u6771\u4eac\u306f\u4eca\u65e5\u3082\u4e7e\u71e5\u3067\u5927\u5909\u3088\u3002\u305d\u3063\u3061\u3068\u771f\u9006\u306e\u5929\u6c17\u3060\u306d\u3002\u30a2\u30a4\u30b9\u30d0\u30fc\u30f3\u304d\u3092\u3064\u3051\u3066\u3088\u306d\u3002","id":30662472734089216,"from_user_id":163900288,"to_user":"ray_ko302","geo":null,"iso_language_code":"ja","to_user_id_str":"100985873","source":"<a href="http://twitter.com/">web</a>"},{"from_user_id_str":"49227098","profile_image_url":"http://a3.twimg.com/profile_images/965007882/____normal.jpg","created_at":"Thu, 27 Jan 2011 15:52:26 +0000","from_user":"dosannko6","id_str":"30654371016478720","metadata":{"result_type":"recent"},"to_user_id":108570870,"text":"@peke_hajiP \u4ffa\u304c\u6771\u4eac \u6765\u3066\u6700\u521d\u306b\u9a5a\u3044\u305f\u306e\u304c\u3001\u5929\u6c17\u4e88\u5831\u3067\u82b1\u7c89\u60c5\u5831\u304c\u6d41\u308c\u308b\u3053\u3068\u3067\u3001\u6700\u521d\u306b\u8a66\u3057\u305f\u306e\u304c \u30b4\u30ad\u69d8 \u53ec\u559a\u306e\u5100\u5f0f\u3067\u3059\u304a\uff1f","id":30654371016478720,"from_user_id":49227098,"to_user":"peke_hajip","geo":null,"iso_language_code":"ja","to_user_id_str":"108570870","source":"<a href="http://twitter.com/">web</a>"},{"from_user_id_str":"71148934","profile_image_url":"http://a0.twimg.com/profile_images/1209223630/image_normal.jpg","created_at":"Thu, 27 Jan 2011 15:46:49 +0000","from_user":"123keiko","id_str":"30652958311981058","metadata":{"result_type":"recent"},"to_user_id":null,"text":"\u9727\u5cf6\u306e\u964d\u7070\u306e\u30cb\u30e5\u30fc\u30b9\u3092\u898b\u3066\u601d\u3044\u51fa\u3057\u307e\u3057\u305f\u3002\u4e0a\u4eac\u3057\u305f\u59cb\u3081\u306e\u9803\u3001\u6771\u4eac\u306f1\u5e74\u4e2d\u3001\u7070\u304c\u964d\u3089\u306a\u3044\u304b\u3089\u7a7a\u6c17\u304c\u6f84\u3093\u3067\u3066\u904e\u3054\u3057\u3084\u3059\u3044\u306a\u3041\u3001\u3068\u601d\u3063\u305f\u306a\u3041\u3001\u3063\u3066\u3002\n\u5b9f\u5bb6\u306f\u51ac\u306e\u5b63\u7bc0\u98a8\u3067\u685c\u5cf6\u306e\u7070\u304c\u964d\u308b\u5730\u533a\u3067\u3057\u305f\u3002\u9e7f\u5150\u5cf6\u306e\u5929\u6c17\u4e88\u5831\u3067\u306f\u3001\u6bce\u65e5\u3001\u685c\u5cf6\u4e0a\u7a7a\u306e\u98a8\u5411\u304d\u4e88\u5831\u304c\u3067\u307e\u3059\u3002\u3053\u308c\u304b\u3089\u306f\u9727\u5cf6\u3082\u304b\u306a\uff1f","id":30652958311981058,"from_user_id":71148934,"geo":null,"iso_language_code":"ja","to_user_id_str":null,"source":"<a href="http://twitter.com/" rel="nofollow">Twitter for iPhone</a>"},{"from_user_id_str":"91124773","profile_image_url":"http://a3.twimg.com/profile_images/1139308356/prof101007_3-1_normal.jpg","created_at":"Thu, 27 Jan 2011 15:43:35 +0000","from_user":"sanposuruhito","id_str":"30652146277945345","metadata":{"result_type":"recent"},"to_user_id":null,"text":"\u4eca\u5bb5\u306e\u90fd\u5fc3\u306f\u3001\u8eab\u3082\u5f15\u304d\u7de0\u307e\u308b\u3068\u3044\u3046\u3088\u308a\u306f\u51cd\u3048\u308b\u7a0b\u306e\u51b7\u6c17\u3092\u611f\u3058\u308b\u5bd2\u3044\u591c\u3067\u3059\u3002\u660e\u65e5\u65e5\u4e2d\u306e\u6771\u4eac\u306e\u5929\u6c17\u306f\u3001\u5915\u65b9\u306b\u591a\u5c11\u96f2\u304c\u51fa\u308b\u5834\u6240\u304c\u3042\u308a\u305d\u3046\u306a\u3082\u306e\u306e\u6982\u306d\u6674\u308c\u7a7a\u304c\u5e83\u304c\u308a\u7d9a\u3051\u305d\u3046\u3067\u3059\u3002\u6700\u9ad8\u6c17\u6e29\u306f\u30015-6\u5ea6\u4f4d\u3068\u306a\u308a\u305d\u3046\u3067\u3059\u3002#weather_tokyo","id":30652146277945345,"from_user_id":91124773,"geo":null,"iso_language_code":"ja","to_user_id_str":null,"source":"<a href="http://projects.playwell.jp/go/Saezuri" rel="nofollow">Saezuri</a>"}],"max_id":30724239224995840,"since_id":28179956995457024,"refresh_url":"?since_id=30724239224995840&q=%E6%9D%B1%E4%BA%AC%E3%80%80%E5%A4%A9%E6%B0%97","next_page":"?page=2&max_id=30724239224995840&lang=ja&q=%E6%9D%B1%E4%BA%AC%E3%80%80%E5%A4%A9%E6%B0%97","results_per_page":15,"page":1,"completed_in":0.104827,"warning":"adjusted since_id to 28179956995457024 (), requested since_id was older than allowed -- since_id removed for pagination.","since_id_str":"28179956995457024","max_id_str":"30724239224995840","query":"%E6%9D%B1%E4%BA%AC%E3%80%80%E5%A4%A9%E6%B0%97"} -------------------------------------------------------------------------------- /Data/JsonStream/Unescape.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP #-} 2 | {-# LANGUAGE ForeignFunctionInterface #-} 3 | {-# LANGUAGE MagicHash #-} 4 | {-# LANGUAGE UnliftedFFITypes #-} 5 | {-# LANGUAGE BangPatterns #-} 6 | {-# LANGUAGE MultiWayIf #-} 7 | 8 | {-# OPTIONS_GHC -Wno-unused-imports #-} 9 | 10 | module Data.JsonStream.Unescape ( 11 | unescapeText 12 | , unsafeDecodeASCII 13 | ) where 14 | 15 | import Data.ByteString as B 16 | import Data.ByteString.Internal as B hiding (c2w) 17 | import Data.Text.Encoding.Error (UnicodeException (..)) 18 | import Data.Text.Internal (Text (..)) 19 | import Data.Text.Unsafe (unsafeDupablePerformIO) 20 | import Data.Word (Word8, Word32) 21 | import Foreign.ForeignPtr (withForeignPtr) 22 | import Foreign.Ptr (Ptr, plusPtr) 23 | import Foreign.Storable (peek) 24 | import qualified Data.Text as T 25 | 26 | #if MIN_VERSION_text(2,0,0) 27 | 28 | import qualified Data.Primitive as P 29 | import qualified Data.Text.Array as TA 30 | import qualified Data.Text.Internal as T 31 | import Data.Bits (shiftL, shiftR, (.&.), (.|.)) 32 | import Control.Exception (try, throwIO) 33 | import Foreign.ForeignPtr (ForeignPtr) 34 | import qualified Data.ByteString.Short.Internal as SBS 35 | 36 | #else 37 | 38 | import Control.Exception (evaluate, throw, try) 39 | import Control.Monad.ST.Unsafe (unsafeIOToST, unsafeSTToIO) 40 | import Data.Text.Internal.Private (runText) 41 | import Foreign.Marshal.Utils (with) 42 | import qualified Data.Text.Array as A 43 | import GHC.Base (MutableByteArray#) 44 | import Foreign.C.Types (CInt (..), CSize (..)) 45 | import qualified Data.Text.Encoding as TE 46 | 47 | #endif 48 | 49 | unsafeDecodeASCII :: ByteString -> T.Text 50 | 51 | #if MIN_VERSION_text(2,0,0) 52 | unsafeDecodeASCII bs = withBS bs $ \_fp len -> if len == 0 then T.empty else 53 | let !(SBS.SBS arr) = SBS.toShort bs in T.Text (TA.ByteArray arr) 0 len 54 | 55 | #else 56 | unsafeDecodeASCII = TE.decodeLatin1 57 | #endif 58 | 59 | 60 | #if !MIN_VERSION_text(2,0,0) 61 | 62 | foreign import ccall unsafe "_jstream_decode_string" c_js_decode 63 | :: MutableByteArray# s -> Ptr CSize 64 | -> Ptr Word8 -> Ptr Word8 -> IO CInt 65 | 66 | unescapeText' :: ByteString -> Text 67 | unescapeText' (PS fp off len) = runText $ \done -> do 68 | let go dest = withForeignPtr fp $ \ptr -> 69 | with (0::CSize) $ \destOffPtr -> do 70 | let end = ptr `plusPtr` (off + len) 71 | loop curPtr = do 72 | res <- c_js_decode (A.maBA dest) destOffPtr curPtr end 73 | case res of 74 | 0 -> do 75 | n <- peek destOffPtr 76 | unsafeSTToIO (done dest (fromIntegral n)) 77 | _ -> 78 | throw (DecodeError desc Nothing) 79 | loop (ptr `plusPtr` off) 80 | (unsafeIOToST . go) =<< A.new len 81 | where 82 | desc = "Data.JsonStream.Unescape.unescapeText': Invalid UTF-8 stream" 83 | {-# INLINE unescapeText' #-} 84 | 85 | unescapeText :: ByteString -> Either UnicodeException Text 86 | unescapeText = unsafeDupablePerformIO . try . evaluate . unescapeText' 87 | {-# INLINE unescapeText #-} 88 | 89 | #else 90 | 91 | withBS :: ByteString -> (ForeignPtr Word8 -> Int -> r) -> r 92 | #if MIN_VERSION_bytestring(0,11,0) 93 | withBS (BS !sfp !slen) kont = kont sfp slen 94 | #else 95 | withBS (PS !sfp !soff !slen) kont = kont (plusForeignPtr sfp soff) slen 96 | #endif 97 | {-# INLINE withBS #-} 98 | 99 | unescapeText :: ByteString -> Either UnicodeException Text 100 | unescapeText = unsafeDupablePerformIO . try . unescapeTextIO 101 | 102 | throwDecodeError :: IO a 103 | throwDecodeError = 104 | let desc = "Data.Text.Internal.Encoding.decodeUtf8: Invalid UTF-8 stream" 105 | in throwIO (DecodeError desc Nothing) 106 | 107 | -- The following is copied from aeson-2.0 108 | 109 | ------------------------------------------------------------------------------- 110 | -- unescapeTextIO 111 | ------------------------------------------------------------------------------- 112 | 113 | -- This function is generated using staged-streams 114 | -- See: https://github.com/phadej/staged/blob/master/staged-streams-unicode/src/Unicode/JSON.hs 115 | -- 116 | -- Because @aeson@ better to not use template-haskell itself, 117 | -- we dump the splice and prettify it by hand a bit. 118 | -- 119 | unescapeTextIO :: ByteString -> IO Text 120 | unescapeTextIO bs = withBS bs $ \fptr len -> 121 | withForeignPtr fptr $ \begin -> do 122 | let end :: Ptr Word8 123 | end = plusPtr begin len 124 | 125 | arr <- P.newPrimArray len 126 | 127 | let write3bytes :: Int -> Word8 -> Word8 -> Word8 -> Ptr Word8 -> IO Text 128 | write3bytes !out !b1 !b2 !b3 !inp = do 129 | P.writePrimArray arr out b1 130 | write2bytes (out + 1) b2 b3 inp 131 | 132 | write2bytes :: Int -> Word8 -> Word8 -> Ptr Word8 -> IO Text 133 | write2bytes !out !b1 !b2 !inp = do 134 | P.writePrimArray arr out b1 135 | write1byte (out + 1) b2 inp 136 | 137 | write1byte :: Int -> Word8 -> Ptr Word8 -> IO Text 138 | write1byte !out !b1 !inp = do 139 | P.writePrimArray arr out b1 140 | state_start (out + 1) inp 141 | 142 | writeCodePoint :: Int -> Ptr Word8 -> Word32 -> IO Text 143 | writeCodePoint !out !inp !acc 144 | | acc <= 127 = do 145 | P.writePrimArray arr out (fromIntegral acc :: Word8) 146 | state_start (out + 1) (plusPtr inp 1) 147 | 148 | | acc <= 2047 = do 149 | let b1 = fromIntegral (shiftR acc 6 .|. 192) :: Word8 150 | let b2 = fromIntegral ((acc .&. 63) .|. 128) :: Word8 151 | P.writePrimArray arr out b1 152 | write1byte (out + 1) b2 (plusPtr inp 1) 153 | 154 | | acc <= 65535 = do 155 | let b1 = fromIntegral (shiftR acc 12 .|. 224) :: Word8 156 | let b2 = fromIntegral ((shiftR acc 6 .&. 63) .|. 128) :: Word8 157 | let b3 = fromIntegral ((acc .&. 63) .|. 128) :: Word8 158 | P.writePrimArray arr out b1 159 | write2bytes (out + 1) b2 b3 (plusPtr inp 1) 160 | 161 | | otherwise = do 162 | let b1 = fromIntegral (shiftR acc 18 .|. 240) :: Word8 163 | let b2 = fromIntegral ((shiftR acc 12 .&. 63) .|. 128) :: Word8 164 | let b3 = fromIntegral ((shiftR acc 6 .&. 63) .|. 128) :: Word8 165 | let b4 = fromIntegral ((acc .&. 63) .|. 128) :: Word8 166 | P.writePrimArray arr out b1 167 | write3bytes (out + 1) b2 b3 b4 (plusPtr inp 1) 168 | 169 | state_sudone :: Int -> Ptr Word8 -> Word32 -> Word32 -> IO Text 170 | state_sudone !out !inp !hi !lo 171 | | 56320 <= lo, lo <= 57343 172 | = writeCodePoint out inp (65536 + (shiftL (hi - 55296) 10 .|. (lo - 56320))) 173 | 174 | | otherwise 175 | = throwDecodeError 176 | 177 | state_su4 :: Int -> Ptr Word8 -> Word32 -> Word32 -> IO Text 178 | state_su4 !out !inp !hi !acc 179 | | inp == end = throwDecodeError 180 | | otherwise = do 181 | w8 <- peek inp 182 | if | 48 <= w8, w8 <= 57 -> 183 | state_sudone out inp hi (shiftL acc 4 .|. fromIntegral (w8 - 48)) 184 | | 65 <= w8, w8 <= 70 -> 185 | state_sudone out inp hi (shiftL acc 4 .|. fromIntegral (w8 - 55)) 186 | | 97 <= w8, w8 <= 102 -> 187 | state_sudone out inp hi (shiftL acc 4 .|. fromIntegral (w8 - 87)) 188 | | otherwise -> 189 | throwDecodeError 190 | 191 | state_su3 :: Int -> Ptr Word8 -> Word32 -> Word32 -> IO Text 192 | state_su3 !out !inp !hi !acc 193 | | inp == end = throwDecodeError 194 | | otherwise = do 195 | w8 <- peek inp 196 | if | 48 <= w8, w8 <= 57 -> 197 | state_su4 out (plusPtr inp 1) hi (shiftL acc 4 .|. fromIntegral (w8 - 48)) 198 | | 65 <= w8, w8 <= 70 -> 199 | state_su4 out (plusPtr inp 1) hi (shiftL acc 4 .|. fromIntegral (w8 - 55)) 200 | | 97 <= w8, w8 <= 102 -> 201 | state_su4 out (plusPtr inp 1) hi (shiftL acc 4 .|. fromIntegral (w8 - 87)) 202 | | otherwise -> 203 | throwDecodeError 204 | 205 | state_su2 :: Int -> Ptr Word8 -> Word32 -> Word32 -> IO Text 206 | state_su2 !out !inp !hi !acc 207 | | inp == end = throwDecodeError 208 | | otherwise = do 209 | w8 <- peek inp 210 | if | 48 <= w8, w8 <= 57 -> 211 | state_su3 out (plusPtr inp 1) hi (shiftL acc 4 .|. fromIntegral (w8 - 48)) 212 | | 65 <= w8, w8 <= 70 -> 213 | state_su3 out (plusPtr inp 1) hi (shiftL acc 4 .|. fromIntegral (w8 - 55)) 214 | | 97 <= w8, w8 <= 102 -> 215 | state_su3 out (plusPtr inp 1) hi (shiftL acc 4 .|. fromIntegral (w8 - 87)) 216 | | otherwise -> 217 | throwDecodeError 218 | 219 | state_su1 :: Int -> Ptr Word8 -> Word32 -> IO Text 220 | state_su1 !out !inp !hi 221 | | inp == end = throwDecodeError 222 | | otherwise = do 223 | w8 <- peek inp 224 | if | 48 <= w8, w8 <= 57 -> 225 | state_su2 out (plusPtr inp 1) hi (fromIntegral (w8 - 48)) 226 | | 65 <= w8, w8 <= 70 -> 227 | state_su2 out (plusPtr inp 1) hi (fromIntegral (w8 - 55)) 228 | | 97 <= w8, w8 <= 102 -> 229 | state_su2 out (plusPtr inp 1) hi (fromIntegral (w8 - 87)) 230 | | otherwise -> 231 | throwDecodeError 232 | 233 | state_su :: Int -> Ptr Word8 -> Word32 -> IO Text 234 | state_su !out !inp !hi 235 | | inp == end = throwDecodeError 236 | | otherwise = do 237 | w8 <- peek inp 238 | case w8 of 239 | 117 -> state_su1 out (plusPtr inp 1) hi 240 | _ -> throwDecodeError 241 | 242 | state_ss :: Int -> Ptr Word8 -> Word32 -> IO Text 243 | state_ss !out !inp !hi 244 | | inp == end = throwDecodeError 245 | | otherwise = do 246 | w8 <- peek inp 247 | case w8 of 248 | 92 -> state_su out (plusPtr inp 1) hi 249 | _ -> throwDecodeError 250 | 251 | state_udone :: Int -> Ptr Word8 -> Word32 -> IO Text 252 | state_udone !out !inp !acc 253 | | acc < 55296 || acc > 57343 = 254 | writeCodePoint out inp acc 255 | 256 | | acc < 56320 = 257 | state_ss out (plusPtr inp 1) acc 258 | 259 | | otherwise = 260 | throwDecodeError 261 | 262 | state_u4 :: Int -> Ptr Word8 -> Word32 -> IO Text 263 | state_u4 !out !inp !acc 264 | | inp == end = throwDecodeError 265 | | otherwise = do 266 | w8 <- peek inp 267 | if | 48 <= w8, w8 <= 57 -> 268 | state_udone out inp (shiftL acc 4 .|. fromIntegral (w8 - 48)) 269 | | 65 <= w8, w8 <= 70 -> 270 | state_udone out inp (shiftL acc 4 .|. fromIntegral (w8 - 55)) 271 | | 97 <= w8, w8 <= 102 -> 272 | state_udone out inp (shiftL acc 4 .|. fromIntegral (w8 - 87)) 273 | | otherwise -> 274 | throwDecodeError 275 | 276 | state_u3 :: Int -> Ptr Word8 -> Word32 -> IO Text 277 | state_u3 !out !inp !acc 278 | | inp == end = throwDecodeError 279 | | otherwise = do 280 | w8 <- peek inp 281 | if | 48 <= w8, w8 <= 57 -> 282 | state_u4 out (plusPtr inp 1) (shiftL acc 4 .|. fromIntegral (w8 - 48)) 283 | | 65 <= w8, w8 <= 70 -> 284 | state_u4 out (plusPtr inp 1) (shiftL acc 4 .|. fromIntegral (w8 - 55)) 285 | | 97 <= w8, w8 <= 102 -> 286 | state_u4 out (plusPtr inp 1) (shiftL acc 4 .|. fromIntegral (w8 - 87)) 287 | | otherwise -> 288 | throwDecodeError 289 | 290 | state_u2 :: Int -> Ptr Word8 -> Word32 -> IO Text 291 | state_u2 !out !inp !acc 292 | | inp == end = throwDecodeError 293 | | otherwise = do 294 | w8 <- peek inp 295 | if | 48 <= w8, w8 <= 57 -> 296 | state_u3 out (plusPtr inp 1) (shiftL acc 4 .|. fromIntegral (w8 - 48)) 297 | | 65 <= w8, w8 <= 70 -> 298 | state_u3 out (plusPtr inp 1) (shiftL acc 4 .|. fromIntegral (w8 - 55)) 299 | | 97 <= w8, w8 <= 102 -> 300 | state_u3 out (plusPtr inp 1) (shiftL acc 4 .|. fromIntegral (w8 - 87)) 301 | | otherwise -> 302 | throwDecodeError 303 | 304 | state_u1 :: Int -> Ptr Word8 -> IO Text 305 | state_u1 !out !inp 306 | | inp == end = throwDecodeError 307 | | otherwise = do 308 | w8 <- peek inp 309 | if | 48 <= w8, w8 <= 57 -> 310 | state_u2 out (plusPtr inp 1) (fromIntegral (w8 - 48)) 311 | | 65 <= w8, w8 <= 70 -> 312 | state_u2 out (plusPtr inp 1) (fromIntegral (w8 - 55)) 313 | | 97 <= w8, w8 <= 102 -> 314 | state_u2 out (plusPtr inp 1) (fromIntegral (w8 - 87)) 315 | | otherwise -> 316 | throwDecodeError 317 | 318 | state_escape :: Int -> Ptr Word8 -> IO Text 319 | state_escape !out !inp 320 | | inp == end = throwDecodeError 321 | | otherwise = do 322 | w8 <- peek inp 323 | case w8 of 324 | 34 -> do 325 | P.writePrimArray arr out 34 326 | state_start (out + 1) (plusPtr inp 1) 327 | 328 | 92 -> do 329 | P.writePrimArray arr out 92 330 | state_start (out + 1) (plusPtr inp 1) 331 | 332 | 47 -> do 333 | P.writePrimArray arr out 47 334 | state_start (out + 1) (plusPtr inp 1) 335 | 336 | 98 -> do 337 | P.writePrimArray arr out 8 338 | state_start (out + 1) (plusPtr inp 1) 339 | 340 | 102 -> do 341 | P.writePrimArray arr out 12 342 | state_start (out + 1) (plusPtr inp 1) 343 | 344 | 110 -> do 345 | P.writePrimArray arr out 10 346 | state_start (out + 1) (plusPtr inp 1) 347 | 348 | 114 -> do 349 | P.writePrimArray arr out 13 350 | state_start (out + 1) (plusPtr inp 1) 351 | 352 | 116 -> do 353 | P.writePrimArray arr out 9 354 | state_start (out + 1) (plusPtr inp 1) 355 | 356 | 117 -> 357 | state_u1 out (plusPtr inp 1) 358 | 359 | _ -> throwDecodeError 360 | 361 | state_input4c :: Int -> Ptr Word8 -> Word8 -> Word8 -> Word8 -> IO Text 362 | state_input4c !out !inp !b1 !b2 !b3 363 | | inp == end = throwDecodeError 364 | | otherwise = do 365 | w8 <- peek inp 366 | if | (w8 .&. 192) == 128 367 | , let acc = shiftL (fromIntegral (b1 .&. 7)) 18 368 | , let acc' = acc .|. shiftL (fromIntegral (b2 .&. 63)) 12 369 | , let acc'' = acc' .|. shiftL (fromIntegral (b3 .&. 63)) 6 370 | , let acc''' = acc'' .|. fromIntegral (w8 .&. 63) :: Word32 371 | , acc''' >= 65536 && acc''' < 1114112 -> do 372 | P.writePrimArray arr out b1 373 | write3bytes (out + 1) b2 b3 w8 (plusPtr inp 1) 374 | 375 | | otherwise -> 376 | throwDecodeError 377 | 378 | state_input4b :: Int -> Ptr Word8 -> Word8 -> Word8 -> IO Text 379 | state_input4b !out !inp !b1 !b2 380 | | inp == end = throwDecodeError 381 | | otherwise = do 382 | w8 <- peek inp 383 | if | (w8 .&. 192) == 128 -> 384 | state_input4c out (plusPtr inp 1) b1 b2 w8 385 | 386 | | otherwise -> 387 | throwDecodeError 388 | 389 | state_input4 :: Int -> Ptr Word8 -> Word8 -> IO Text 390 | state_input4 !out !inp !b1 391 | | inp == end = throwDecodeError 392 | | otherwise = do 393 | w8 <- peek inp 394 | if | (w8 .&. 192) == 128 -> 395 | state_input4b out (plusPtr inp 1) b1 w8 396 | 397 | | otherwise -> 398 | throwDecodeError 399 | 400 | state_input3b :: Int -> Ptr Word8 -> Word8 -> Word8 -> IO Text 401 | state_input3b !out !inp !b1 !b2 402 | | inp == end = throwDecodeError 403 | | otherwise = do 404 | w8 <- peek inp 405 | if | (w8 .&. 192) == 128 406 | , let acc = shiftL (fromIntegral (b1 .&. 15)) 12 407 | , let acc' = acc .|. shiftL (fromIntegral (b2 .&. 63)) 6 408 | , let acc'' = acc' .|. fromIntegral (w8 .&. 63) :: Word32 409 | , (acc'' >= 2048 && acc'' < 55296) || acc'' > 57343 -> do 410 | P.writePrimArray arr out b1 411 | write2bytes (out + 1) b2 w8 (plusPtr inp 1) 412 | 413 | | otherwise -> 414 | throwDecodeError 415 | 416 | state_input3 :: Int -> Ptr Word8 -> Word8 -> IO Text 417 | state_input3 !out !inp !b1 418 | | inp == end = throwDecodeError 419 | | otherwise = do 420 | w8 <- peek inp 421 | if | (w8 .&. 192) == 128 -> 422 | state_input3b out (plusPtr inp 1) b1 w8 423 | 424 | | otherwise -> 425 | throwDecodeError 426 | 427 | state_input2 :: Int -> Ptr Word8 -> Word8 -> IO Text 428 | state_input2 !out !inp !b1 429 | | inp == end = throwDecodeError 430 | | otherwise = do 431 | w8 <- peek inp 432 | if | (w8 .&. 192) == 128, 433 | let acc = shiftL (fromIntegral (b1 .&. 63)) 6 :: Word32 434 | acc' = acc .|. fromIntegral (w8 .&. 63) :: Word32 435 | , acc' >= 128 -> do 436 | P.writePrimArray arr out b1 437 | write1byte (out + 1) w8 (plusPtr inp 1) 438 | 439 | | otherwise -> 440 | throwDecodeError 441 | 442 | state_start :: Int -> Ptr Word8 -> IO Text 443 | state_start !out !inp 444 | | inp == end = do 445 | P.shrinkMutablePrimArray arr out 446 | frozenArr <- P.unsafeFreezePrimArray arr 447 | return $ case frozenArr of 448 | P.PrimArray ba -> T.Text (TA.ByteArray ba) 0 out 449 | 450 | | otherwise = do 451 | w8 <- peek inp 452 | if | w8 == 92 -> state_escape out (plusPtr inp 1) 453 | | w8 < 128 -> do 454 | P.writePrimArray arr out w8 455 | state_start (out + 1) (plusPtr inp 1) 456 | 457 | | w8 < 192 -> throwDecodeError 458 | | w8 < 224 -> state_input2 out (plusPtr inp 1) w8 459 | | w8 < 240 -> state_input3 out (plusPtr inp 1) w8 460 | | w8 < 248 -> state_input4 out (plusPtr inp 1) w8 461 | 462 | | otherwise -> throwDecodeError 463 | 464 | -- start the state machine 465 | state_start (0 :: Int) begin 466 | 467 | #endif -------------------------------------------------------------------------------- /test/ParserSpec.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | {-# LANGUAGE CPP #-} 3 | module ParserSpec where 4 | 5 | import Control.Applicative 6 | import Test.Hspec 7 | import Data.Monoid ((<>)) 8 | import qualified Data.Aeson as AE 9 | import Data.Aeson (Value(..)) 10 | import qualified Data.ByteString.Char8 as BS 11 | import qualified Data.ByteString.Lazy as BL 12 | import qualified Data.Text as T 13 | import Control.Monad (forM_) 14 | import Data.Text.Encoding (encodeUtf8) 15 | import qualified Data.Vector as Vec 16 | import System.Directory (getDirectoryContents) 17 | import Data.Int 18 | import Data.Word 19 | #if MIN_VERSION_aeson(2,0,0) 20 | import qualified Data.Aeson.KeyMap as AEK 21 | #else 22 | import qualified Data.HashMap.Strict as HMap 23 | #endif 24 | 25 | import Data.JsonStream.Parser 26 | 27 | -- During the tests the single quotes are replaced with double quotes in the strings as 28 | -- otherwise it would be unreadable in haskell 29 | 30 | todquotes :: BS.ByteString -> BS.ByteString 31 | todquotes = BS.map squotes 32 | where 33 | squotes '\'' = '"' 34 | squotes x = x 35 | 36 | parse :: Parser a -> BS.ByteString -> [a] 37 | parse parser text = parseByteString parser (todquotes text) 38 | 39 | testRemaining :: Parser a -> BS.ByteString -> BS.ByteString 40 | testRemaining parser startdata = loop (runParser' parser startdata) 41 | where 42 | loop (ParseNeedData _) = error "Not enough data." 43 | loop (ParseDone rem1) = rem1 44 | loop (ParseFailed err) = error err 45 | loop (ParseYield _ np) = loop np 46 | 47 | testRemainingChunks :: Parser a -> [BS.ByteString] -> BS.ByteString 48 | testRemainingChunks parser = loop (runParser parser) 49 | where 50 | loop (ParseNeedData _) [] = error "Not enough data." 51 | loop (ParseNeedData f) (x:rest) = loop (f x) rest 52 | loop (ParseDone rem1) chunks = rem1 <> BS.concat chunks 53 | loop (ParseFailed err) _ = error err 54 | loop (ParseYield _ np) chunks = loop np chunks 55 | 56 | 57 | specBase :: Spec 58 | specBase = describe "Basic parsing" $ do 59 | it "Parses null values" $ do 60 | let test = "[null,3]" 61 | res = parse (arrayOf value) test :: [Maybe Int] 62 | res `shouldBe` [Nothing, Just 3 ] 63 | 64 | it "Parses bool values" $ do 65 | let test = "[true,false]" 66 | res = parse (arrayOf value) test :: [Bool] 67 | res `shouldBe` [True, False] 68 | 69 | it "Parses string values with special chracters" $ do 70 | let test = "['" `BS.append` (encodeUtf8 "žluť") `BS.append` "', '\\n\\b\\r\\'', '\\u0041\\u0078\\u0161']" 71 | res = parse (arrayOf value) test :: [T.Text] 72 | res `shouldBe` ["\382lu\357","\n\b\r\"","Ax\353"] 73 | 74 | it "Parses fractional values with exponent" $ do 75 | let test = "[1, 2.5, -3.6, 6e1, -3.2e-2]" 76 | res = parse (arrayOf value) test :: [Double] 77 | show res `shouldBe` "[1.0,2.5,-3.6,60.0,-3.2e-2]" 78 | 79 | it "Parses objects 1" $ do 80 | let test = "{'key1': 'value2', 'key2': 'value2'}" 81 | res = parse (objectItems value) test :: [(T.Text, AE.Value)] 82 | res `shouldBe` [("key1",String "value2"),("key2",String "value2")] 83 | 84 | specObjComb :: Spec 85 | specObjComb = describe "Object accesors" $ do 86 | it "objectWithKey works" $ do 87 | let test = "[{'name': 'John', 'age': 20}, {'age': 30, 'name': 'Frank' } ]" 88 | msg = parse (arrayOf $ (,) <$> objectWithKey "name" value <*> objectWithKey "age" value) test :: [(T.Text,Int)] 89 | msg `shouldBe` [("John",20),("Frank",30)] 90 | 91 | it "objectKeyValues works" $ do 92 | let test = "[{'name': 'John', 'age': 20}, {'age': 30, 'name': 'Frank' } ]" 93 | item "name" = Left <$> string 94 | item "age" = Right <$> integer 95 | item _ = empty 96 | msg = parse (arrayOf $ objectKeyValues item) test :: [Either T.Text Int] 97 | msg `shouldBe` [Left "John",Right 20,Right 30,Left "Frank"] 98 | 99 | it "yield test 1" $ do 100 | let test = "[{'key1': [1,2,3], 'key2': [5,6,7]}]" 101 | msg1 = parse (arrayOf $ objectItems value) test :: [(T.Text, [Int])] 102 | msg2 = parse (arrayOf $ objectItems $ arrayOf value) test :: [(T.Text, Int)] 103 | msg1 `shouldBe` [("key1",[1,2,3]),("key2",[5,6,7])] 104 | msg2 `shouldBe` [("key1",1),("key1",2),("key1",3),("key2",5),("key2",6),("key2",7)] 105 | 106 | it "<*> test 1 reverse keys" $ do 107 | let test = "[{'key1': [1,2], 'key2': [5,6], 'key3': [8,9]}]" 108 | parser = arrayOf $ (,) <$> objectWithKey "key2" (arrayOf value) <*> objectWithKey "key1" (arrayOf value) 109 | msg = parse parser test :: [(Int, Int)] 110 | msg `shouldBe` [(5,1),(5,2),(6,1),(6,2)] 111 | 112 | it "<> test 1" $ do 113 | let test = "[{'key1': [1,2], 'key2': [5,6], 'key3': [8,9]}]" 114 | parser = arrayOf $ objectWithKey "key1" (arrayOf value) <> objectWithKey "key2" (arrayOf value) 115 | msg = parse parser test :: [Int] 116 | msg `shouldBe` [1,2,5,6] 117 | 118 | it "<|> returns first items even if second is in previous chunk" $ do 119 | let test = ["{\"error\":1, ", "\"values\":[2,3,4]}"] 120 | parser = "values" .: arrayOf integer 121 | <|> "error" .: integer 122 | msg = parseLazyByteString parser (BL.fromChunks test) :: [Int] 123 | msg `shouldBe` [2,3,4] 124 | it "<|> returns second item if first does not match" $ do 125 | let test = ["{\"error\":1, ", "\"values\":[true,null,false]}"] 126 | parser = "values" .: arrayOf integer 127 | <|> ("error" .: integer) 128 | msg = parseLazyByteString parser (BL.fromChunks test) :: [Int] 129 | msg `shouldBe` [1] 130 | 131 | it "objectOf <|> returns first items even if second is in previous chunk" $ do 132 | let test = ["{\"error\":1, ", "\"values\":[2,3,4]}"] 133 | parser = objectOf $ "values" .: arrayOf integer 134 | <|> "error" .: integer 135 | msg = parseLazyByteString parser (BL.fromChunks test) :: [Int] 136 | msg `shouldBe` [2,3,4] 137 | it "objectOf <|> returns second item if first does not match" $ do 138 | let test = ["{\"error\":1, ", "\"values\":[true,null,false]}"] 139 | parser = objectOf $ "values" .: arrayOf integer 140 | <|> ("error" .: integer) 141 | msg = parseLazyByteString parser (BL.fromChunks test) :: [Int] 142 | msg `shouldBe` [1] 143 | 144 | 145 | it "arrayFound generates events" $ do 146 | let test = ["[[1,2,3],true,[],false,{\"key\":1}]"] 147 | parser = arrayOf (arrayFound 10 20 (1 .! integer)) 148 | msg = parseLazyByteString parser (BL.fromChunks test) :: [Int] 149 | msg `shouldBe` [10,2,20,10,20] 150 | 151 | it "objectFound generates events" $ do 152 | let test = ["[[1,2,3],true,[],false,{\"key\":1}]"] 153 | parser = arrayOf (objectFound 10 20 ("key" .: integer)) 154 | msg = parseLazyByteString parser (BL.fromChunks test) :: [Int] 155 | msg `shouldBe` [10,1,20] 156 | 157 | it "objectOf objectFound generates events" $ do 158 | let test = ["[[1,2,3],true,[],false,{\"key\":1}]"] 159 | parser = arrayOf (objectFound 10 20 (objectOf $ "key" .: integer)) 160 | msg = parseLazyByteString parser (BL.fromChunks test) :: [Int] 161 | msg `shouldBe` [10,1,20] 162 | 163 | it "Has working byteString parser" $ do 164 | let test = ["[\"abcd\\n\\rxyz\"]"] 165 | parser = arrayOf byteString :: Parser BS.ByteString 166 | msg = parseLazyByteString parser (BL.fromChunks test) :: [BS.ByteString] 167 | msg `shouldBe` ["abcd\\n\\rxyz"] 168 | 169 | 170 | 171 | specEdge :: Spec 172 | specEdge = describe "Edge cases" $ do 173 | it "Correct incremental parsing 1" $ do 174 | let msg1 = "[ {\"test1\" :[1,true,false,null,-3.591e+1,[12,13]], \"test2\":\"123\\r\\n\\\"\\u0041\"}]" 175 | pmsg = BL.fromChunks $ map BS.singleton msg1 176 | res = parseLazyByteString value pmsg :: [AE.Value] 177 | #if MIN_VERSION_aeson(2,0,0) 178 | res `shouldBe` [Array (Vec.fromList [Object $ AEK.fromList [("test2",String "123\r\n\"A"),("test1",Array (Vec.fromList [Number 1.0,Bool True,Bool False,Null,Number (-35.91),Array (Vec.fromList [Number 12.0,Number 13.0])]))]])] 179 | #else 180 | res `shouldBe` [Array (Vec.fromList [Object $ HMap.fromList [("test2",String "123\r\n\"A"),("test1",Array (Vec.fromList [Number 1.0,Bool True,Bool False,Null,Number (-35.91),Array (Vec.fromList [Number 12.0,Number 13.0])]))]])] 181 | #endif 182 | 183 | it "Correct incremental parsing 2" $ do 184 | let msg1 = "{\"test1\" :[1,true,false,null,-3.591e+1,[12,13]], \"test2\":\"test2string\"}" 185 | pmsg = BL.fromChunks $ map BS.singleton msg1 186 | res = parseLazyByteString ("test2" .: string) pmsg :: [T.Text] 187 | res `shouldBe` ["test2string"] 188 | 189 | it "Correct incremental parsing 3" $ do 190 | let msg1 = "[\"Žluťoučký kůň\"]" :: T.Text 191 | pmsg = BL.fromChunks $ map BS.singleton $ BS.unpack $ encodeUtf8 msg1 192 | res = parseLazyByteString (0 .! string) pmsg :: [T.Text] 193 | res `shouldBe` ["Žluťoučký kůň"] 194 | 195 | 196 | it "Correctly skips data" $ do 197 | let msg1 = "[{\"123\":[1,2,[3,4]]},11]" 198 | res = parseByteString (arrayWithIndexOf 0 (objectValues (arrayOf $ pure "x")) <> arrayWithIndexOf 1 (pure "y") <> arrayOf (pure "z")) msg1 :: [String] 199 | res `shouldBe` ["x", "x", "x", "y", "z", "z"] 200 | 201 | it "Correctly returns unparsed data 1" $ do 202 | let msg1 = "[{\"123\":[1,2,[3,4]]},11] " 203 | rem1 = testRemaining (pure "x" :: Parser String) msg1 204 | rem1 `shouldBe` " " 205 | it "Correctly returns unparsed data 2" $ do 206 | let msg1 = "[{\"123\":[1,2,[3,4]]},11] !x!" 207 | rem1 = testRemaining (pure "x" :: Parser String) msg1 208 | rem1 `shouldBe` " !x!" 209 | it "Correctly returns unparsed data 3" $ do 210 | let msg1 = "[{\"123\":[1,2,[3,4]]},11] 25 " 211 | rem1 = testRemaining (pure "x" :: Parser String) msg1 212 | rem1 `shouldBe` " 25 " 213 | it "Correctly returns unparsed data 4" $ do 214 | let msg1 = "[{\"123\":[1,2,[3,4]]},11] 25" 215 | rem1 = testRemaining (pure "x" :: Parser String) msg1 216 | rem1 `shouldBe` " 25" 217 | it "Correctly returns unparsed data 4" $ do 218 | let msg1 = "[{\"123\":[1,2,[3,4]]},11] \"" 219 | rem1 = testRemaining (pure "x" :: Parser String) msg1 220 | rem1 `shouldBe` " \"" 221 | it "Correctly returns unparsed data 4" $ do 222 | let msg1 = "[{\"123\":[1,2,[3,4]]},11] \"aa\"" 223 | rem1 = testRemaining (pure "x" :: Parser String) msg1 224 | rem1 `shouldBe` " \"aa\"" 225 | it "Correctly returns unparsed data 5" $ do 226 | let msg1 = "\"bb\"\"aa\"" 227 | rem1 = testRemaining (pure "x" :: Parser String) msg1 228 | rem1 `shouldBe` "\"aa\"" 229 | it "Correctly returns unparsed data 6" $ do 230 | let msg1 = ["\"bb", "\"\"aa\""] 231 | rem1 = testRemainingChunks (pure "x" :: Parser String) msg1 232 | rem1 `shouldBe` "\"aa\"" 233 | 234 | it "Handles values in interleaving order" $ do 235 | let msg1 = BL.fromChunks ["{\"err\":true,\"values\":[1,2,3", "4,5,6,7]}"] 236 | parser = (Right <$> objectWithKey "values" (arrayOf value)) 237 | <> (Left <$> objectWithKey "err" value) 238 | res = parseLazyByteString parser msg1 :: [Either Bool Int] 239 | res `shouldBe` [Right 1,Right 2,Left True,Right 34,Right 5,Right 6,Right 7] 240 | 241 | specControl :: Spec 242 | specControl = describe "Control parser" $ do 243 | -- it "many" $ do 244 | -- it "fileterI" $ do 245 | it "takeI limits number of values" $ do 246 | let test = "[[1,2,3], [4,5,6]]" 247 | parser = arrayOf $ takeI 2 $ arrayOf integer 248 | res = parse parser test :: [Int] 249 | res `shouldBe` [1,2,4,5] 250 | 251 | it "ignores non-match for array" $ do 252 | let test = "[1,2,[3,4,5]]" 253 | parser = arrayOf (arrayOf value) 254 | res = parse parser test :: [Int] 255 | res `shouldBe` [3,4,5] 256 | 257 | it "ignores non-match for object" $ do 258 | let test = "[1,2,{\"test\": 3}]" 259 | parser = arrayOf $ objectWithKey "test" value 260 | res = parse parser test :: [Int] 261 | res `shouldBe` [3] 262 | it "ignores non-match for string" $ do 263 | let test = "[1,2,[\"a\", 3, null], \"test\",{}, \"test2\"]" 264 | res = parse (arrayOf string) test :: [T.Text] 265 | res `shouldBe` ["test", "test2"] 266 | it "ignores non-match for number" $ do 267 | let test = "[{\"aa\":3},2,3,\"test\",4, \"test2\"]" 268 | res = parse (arrayOf integer) test :: [Int] 269 | res `shouldBe` [2,3,4] 270 | it "ignores non-match for bool" $ do 271 | let test = "[1,[],true,\"test\",{\"t\":true}, \"test2\",false]" 272 | res = parse (arrayOf bool) test :: [Bool] 273 | res `shouldBe` [True, False] 274 | it "nullable sets values correctly" $ do 275 | let test = "[1,2,null,\"test\",null,3,[],{}]" 276 | res = parse (arrayOf $ nullable integer) test :: [Maybe Int] 277 | res `shouldBe` [Just 1, Just 2, Nothing, Nothing, Just 3] 278 | it "matches null values" $ do 279 | let test = "[1,2,null,\"test\",null,3,[],{}]" 280 | res = parse (arrayOf jNull) test :: [()] 281 | length res `shouldBe` 2 282 | it "correctly ignores out-of-bounds values for bounded integer" $ do 283 | let test = "[-9999999999999999999999999,-999999999999,-9999999,-30000,-10000,0,10000,80000,9999,9999999, 999999999999, 18446744073709551000, 9999999999999999999999999, 4294967295]" 284 | let res1 = parse (arrayOf value) test :: [Integer] 285 | res1 `shouldBe` [-9999999999999999999999999,-999999999999,-9999999,-30000,-10000,0,10000,80000,9999,9999999, 999999999999, 18446744073709551000, 9999999999999999999999999, 4294967295] 286 | 287 | let res2 = parse (arrayOf integer) test :: [Int64] 288 | res2 `shouldBe` [-999999999999,-9999999,-30000,-10000,0,10000,80000,9999,9999999, 999999999999, 4294967295] 289 | 290 | let res3 = parse (arrayOf integer) test :: [Word64] 291 | res3 `shouldBe` [0,10000,80000,9999,9999999, 999999999999, 18446744073709551000, 4294967295] 292 | 293 | let res4 = parse (arrayOf integer) test :: [Int32] 294 | res4 `shouldBe` [-9999999,-30000,-10000,0,10000,80000,9999,9999999] 295 | 296 | let res5 = parse (arrayOf integer) test :: [Word32] 297 | res5 `shouldBe` [0,10000,80000,9999,9999999, 4294967295] 298 | 299 | 300 | -- Tests of things that were found to be buggy 301 | errTests :: Spec 302 | errTests = describe "Tests of previous errors" $ do 303 | it "arrayOf (pure True) should return only n*True, not (n+1)" $ do 304 | let test1 = "[]" 305 | res1 = parse (arrayOf (pure True)) test1 :: [Bool] 306 | length res1 `shouldBe` 0 307 | let test2 = "[{},2,3,[]]" 308 | res2 = parse (arrayOf (pure True)) test2 :: [Bool] 309 | length res2 `shouldBe` 4 310 | it "objectWithKey should return only first key with given name" $ do 311 | let test1 = "{\"test1\":1, \"test2\":2, \"test1\": 3}" 312 | res1 = parse (objectWithKey "test1" value) test1 :: [Int] 313 | res1 `shouldBe` [1] 314 | 315 | it "binds correctly convenience operators" $ do 316 | let test1 = "[{\"name\": \"test1\", \"value\": 1}, {\"name\": \"test2\", \"value\": null}, {\"name\": \"test3\"}, {\"name\": \"test4\", \"value\": true}]" 317 | parser = arrayOf $ (,) <$> "name" .: string 318 | <*> "value" .: integer .| (-1) 319 | res = parse parser test1 :: [(T.Text, Int)] 320 | res `shouldBe` [("test1",1),("test2",-1),("test3",-1),("test4",-1)] 321 | 322 | it "objectOf $ binds correctly convenience operators" $ do 323 | let test1 = "[{\"name\": \"test1\", \"value\": 1}, {\"name\": \"test2\", \"value\": null}, {\"name\": \"test3\"}, {\"name\": \"test4\", \"value\": true}]" 324 | parser = arrayOf $ objectOf $ (,) <$> "name" .: string 325 | <*> "value" .: integer .| (-1) 326 | res = parse parser test1 :: [(T.Text, Int)] 327 | res `shouldBe` [("test1",1),("test2",-1),("test3",-1),("test4",-1)] 328 | 329 | it "binds correctly convenience operators 2" $ do 330 | let test1 = "{\"key\":[{\"key2\":13}]}" 331 | parser = "key" .: 0 .! "key2" .: integer 332 | res = parse parser test1 :: [Int] 333 | res `shouldBe` [13] 334 | it "binds correctly .| at the last moment" $ do 335 | let test1 = "{\"key3\":{}}" 336 | parser = "key-none" .: "key2" .: integer .| 2 337 | res = parse parser test1 :: [Int] 338 | res `shouldBe` [2] 339 | it "binds correct .| 2" $ do 340 | let test1 = "{\"key3\":{\"key2\": null}}" 341 | parser = "key-none" .: "key2" .: integer .| 2 342 | res = parse parser test1 :: [Int] 343 | res `shouldBe` [2] 344 | 345 | 346 | it "objectOf $ binds correctly .| at the last moment" $ do 347 | let test1 = "{\"key3\":{}}" 348 | parser = objectOf $ "key-none" .: "key2" .: integer .| 2 349 | res = parse parser test1 :: [Int] 350 | res `shouldBe` [2] 351 | it "objectOf $ binds correct .| 2" $ do 352 | let test1 = "{\"key3\":{\"key2\": null}}" 353 | parser = objectOf $ "key-none" .: "key2" .: integer .| 2 354 | res = parse parser test1 :: [Int] 355 | res `shouldBe` [2] 356 | 357 | it "Parses correctly empty arrays:" $ do 358 | let test1 = "[]" 359 | parser = arrayOf $ many ("keys" .: arrayOf integer) 360 | res = parse parser test1 :: [[Int]] 361 | res `shouldBe` [] 362 | 363 | it "Parses correctly runs ignore parser on array:" $ do 364 | let test1 = "[{\"name\":\"x\",\"key\":20}]" 365 | onechar = BL.fromChunks $ map BS.singleton $ BS.unpack test1 366 | parser = arrayOf $ "key" .: integer 367 | res = parseLazyByteString parser onechar :: [Int] 368 | res `shouldBe` [20] 369 | 370 | it "objectOf $ Parses correctly runs ignore parser on array:" $ do 371 | let test1 = "[{\"name\":\"x\",\"key\":20}]" 372 | onechar = BL.fromChunks $ map BS.singleton $ BS.unpack test1 373 | parser = arrayOf $ objectOf $ "key" .: integer 374 | res = parseLazyByteString parser onechar :: [Int] 375 | res `shouldBe` [20] 376 | 377 | it "Parses correctly runs ignore parser on array:" $ do 378 | let test1 = "[\"abc\",123,\"def\"]" 379 | onechar = BL.fromChunks $ map BS.singleton $ BS.unpack test1 380 | parser = arrayOf integer 381 | res = parseLazyByteString parser onechar :: [Int] 382 | res `shouldBe` [123] 383 | 384 | it "Parses correctly handles empty strings when sliced:" $ do 385 | let test1 = "[\"\", \"\", true]" 386 | onechar = BL.fromChunks $ map BS.singleton $ BS.unpack test1 387 | parser = arrayOf bool 388 | res = parseLazyByteString parser onechar :: [Bool] 389 | res `shouldBe` [True] 390 | 391 | it "Correctly parses safeString when sliced" $ do 392 | let test1 = "[\"looooooooooong\", \"short\"]" 393 | onechar = BL.fromChunks $ map BS.singleton $ BS.unpack test1 394 | parser = arrayOf (safeString 6) 395 | res = parseLazyByteString parser onechar :: [T.Text] 396 | res `shouldBe` ["short"] 397 | 398 | 399 | -- testLexer (start:rest) = iter rest (tokenParser start) 400 | -- where 401 | -- iter [] (TokMoreData cont) = print "done" 402 | -- iter (dta:rest) (TokMoreData cont) = do 403 | -- print "more-data" 404 | -- iter rest (cont dta) 405 | -- iter dta (PartialResult el cont) = do 406 | -- print el 407 | -- iter dta cont 408 | -- iter _ TokFailed = print "tok failed" 409 | 410 | aeCompare :: Spec 411 | aeCompare = describe "Compare parsing of strings aeason vs json-stream" $ do 412 | let values = [ 413 | "{}" 414 | , "{ \"v\":\"1\"} " 415 | , "{ \"v\":\"1\"\r\n} " 416 | , "{ \"v\":1}" 417 | , "{ \"v\":\"ab'c\"}" 418 | , "{ \"PI\":3.141E-10}" 419 | , "{ \"PI\":3.141e-10}" 420 | , "{ \"v\":12345123456789} " 421 | , "{ \"v\":123456789123456789123456789}" 422 | , "[ 1,2,3,4] " 423 | , "[ \"1\",\"2\",\"3\",\"4\"] " 424 | , "[ { }, { },[]] " 425 | , "{ \"v\":\"\\u2000\\u20ff\"} " 426 | , "{ \"v\":\"\\u2000\\u20FF\"} " 427 | , "{ \"a\":\"hp://foo\"} " 428 | , "{ \"a\":null} " 429 | , "{ \"a\":true} " 430 | , " { \"a\" : true } " 431 | , "{ \"v\":1.7976931348623157E308} " 432 | ] 433 | 434 | forM_ values $ \test -> it ("Parses " ++ show test ++ " the same as aeson") $ do 435 | let resStream = head $ parseByteString value test :: AE.Value 436 | let Just resAeson = AE.decode (BL.fromChunks [test]) 437 | resStream `shouldBe` resAeson 438 | 439 | readBenchFiles :: FilePath -> IO [BS.ByteString] 440 | readBenchFiles dirname = 441 | getDirectoryContents dirname >>= return . (filter isJson) >>= mapM readFile' 442 | where 443 | readFile' fname = BS.readFile (dirname ++ "/" ++ fname) 444 | isJson fname = take 5 (reverse fname) == ("nosj." :: String) 445 | 446 | aeCompareBench :: Spec 447 | aeCompareBench = describe "Compare benchmark jsons" $ 448 | it "JSONs from benchamark directory are the same" $ do 449 | values <- readBenchFiles "benchmarks/json-data" 450 | forM_ values $ \test -> do 451 | let resStream = head $ parseByteString value test :: AE.Value 452 | let Just resAeson = AE.decode (BL.fromChunks [test]) 453 | resStream `shouldBe` resAeson 454 | 455 | spec :: Spec 456 | spec = do 457 | specBase 458 | specObjComb 459 | specEdge 460 | specControl 461 | errTests 462 | aeCompare 463 | aeCompareBench 464 | 465 | main :: IO () 466 | main = hspec spec 467 | -------------------------------------------------------------------------------- /benchmarks/json-data/twitter50.json: -------------------------------------------------------------------------------- 1 | {"results":[{"from_user_id_str":"207858021","profile_image_url":"http://a3.twimg.com/sticky/default_profile_images/default_profile_2_normal.png","created_at":"Wed, 26 Jan 2011 04:30:38 +0000","from_user":"pboudarga","id_str":"30120402839666689","metadata":{"result_type":"recent"},"to_user_id":null,"text":"I'm at Rolla Sushi Grill (27737 Bouquet Canyon Road, #106, Btw Haskell Canyon and Rosedell Drive, Saugus) http://4sq.com/gqqdhs","id":30120402839666689,"from_user_id":207858021,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"<a href="http://foursquare.com" rel="nofollow">foursquare</a>"},{"from_user_id_str":"69988683","profile_image_url":"http://a0.twimg.com/profile_images/1211955817/avatar_7888_normal.gif","created_at":"Wed, 26 Jan 2011 04:25:23 +0000","from_user":"YNK33","id_str":"30119083059978240","metadata":{"result_type":"recent"},"to_user_id":null,"text":"hsndfile 0.5.0: Free and open source Haskell bindings for libsndfile http://bit.ly/gHaBWG Mac Os","id":30119083059978240,"from_user_id":69988683,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"<a href="http://twitterfeed.com" rel="nofollow">twitterfeed</a>"},{"from_user_id_str":"81492","profile_image_url":"http://a1.twimg.com/profile_images/423894208/Picture_7_normal.jpg","created_at":"Wed, 26 Jan 2011 04:24:28 +0000","from_user":"satzz","id_str":"30118851488251904","metadata":{"result_type":"recent"},"to_user_id":null,"text":"Emacs\u306e\u30e2\u30fc\u30c9\u8868\u793a\u304c\u4eca(Ruby Controller Outputz RoR Flymake REl hs)\u3068\u306a\u3063\u3066\u3066\u3088\u304f\u308f\u304b\u3089\u306a\u3044\u3093\u3060\u3051\u3069\u6700\u5f8c\u306eREl\u3068\u304bhs\u3063\u3066\u4f55\u3060\u308d\u3046\u2026haskell\u3068\u304b2\u5e74\u4ee5\u4e0a\u66f8\u3044\u3066\u306a\u3044\u3051\u3069\u2026","id":30118851488251904,"from_user_id":81492,"geo":null,"iso_language_code":"ja","to_user_id_str":null,"source":"<a href="http://www.hootsuite.com" rel="nofollow">HootSuite</a>"},{"from_user_id_str":"9518356","profile_image_url":"http://a2.twimg.com/profile_images/119165723/ocaml-icon_normal.png","created_at":"Wed, 26 Jan 2011 04:19:19 +0000","from_user":"planet_ocaml","id_str":"30117557788741632","metadata":{"result_type":"recent"},"to_user_id":null,"text":"I so miss #haskell type classes in #ocaml - i want to do something like refinement. Also why does ocaml not have... http://bit.ly/geYRwt","id":30117557788741632,"from_user_id":9518356,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"<a href="http://twitterfeed.com" rel="nofollow">twitterfeed</a>"},{"from_user_id_str":"218059","profile_image_url":"http://a1.twimg.com/profile_images/1053837723/twitter-icon9_normal.jpg","created_at":"Wed, 26 Jan 2011 04:16:32 +0000","from_user":"aprikip","id_str":"30116854940835840","metadata":{"result_type":"recent"},"to_user_id":null,"text":"yatex-mode\u3084haskell-mode\u306e\u3053\u3068\u3067\u3059\u306d\u3001\u308f\u304b\u308a\u307e\u3059\u3002","id":30116854940835840,"from_user_id":218059,"geo":null,"iso_language_code":"ja","to_user_id_str":null,"source":"<a href="http://sites.google.com/site/yorufukurou/" rel="nofollow">YoruFukurou</a>"},{"from_user_id_str":"216363","profile_image_url":"http://a1.twimg.com/profile_images/72454310/Tim-Avatar_normal.png","created_at":"Wed, 26 Jan 2011 04:15:30 +0000","from_user":"dysinger","id_str":"30116594684264448","metadata":{"result_type":"recent"},"to_user_id":null,"text":"Haskell in Hawaii tonight for me... #fun","id":30116594684264448,"from_user_id":216363,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"<a href="http://www.nambu.com/" rel="nofollow">Nambu</a>"},{"from_user_id_str":"1774820","profile_image_url":"http://a2.twimg.com/profile_images/61169291/dan_desert_thumb_normal.jpg","created_at":"Wed, 26 Jan 2011 04:13:36 +0000","from_user":"DanMil","id_str":"30116117682851840","metadata":{"result_type":"recent"},"to_user_id":1594784,"text":"@ojrac @chewedwire @tomheon Haskell isn't a language, it's a belief system. A seductive one...","id":30116117682851840,"from_user_id":1774820,"to_user":"ojrac","geo":null,"iso_language_code":"en","to_user_id_str":"1594784","source":"<a href="http://twitter.com/">web</a>"},{"from_user_id_str":"659256","profile_image_url":"http://a0.twimg.com/profile_images/746976711/angular-final_normal.jpg","created_at":"Wed, 26 Jan 2011 04:11:06 +0000","from_user":"djspiewak","id_str":"30115488931520512","metadata":{"result_type":"recent"},"to_user_id":null,"text":"One of the very nice things about Haskell as opposed to SML is the reduced proliferation of identifiers (e.g. andb, orb, etc). #typeclasses","id":30115488931520512,"from_user_id":659256,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"<a href="http://itunes.apple.com/us/app/twitter/id409789998?mt=12" rel="nofollow">Twitter for Mac</a>"},{"from_user_id_str":"144546280","profile_image_url":"http://a1.twimg.com/a/1295051201/images/default_profile_1_normal.png","created_at":"Wed, 26 Jan 2011 04:06:12 +0000","from_user":"listwarenet","id_str":"30114255890026496","metadata":{"result_type":"recent"},"to_user_id":null,"text":"http://www.listware.net/201101/haskell-cafe/84752-re-haskell-cafe-gpl-license-of-h-matrix-and-prelude-numeric.html Re: Haskell-c","id":30114255890026496,"from_user_id":144546280,"geo":null,"iso_language_code":"no","to_user_id_str":null,"source":"<a href="http://1e10.org/cloud/" rel="nofollow">1e10</a>"},{"from_user_id_str":"1594784","profile_image_url":"http://a2.twimg.com/profile_images/378515773/square-profile_normal.jpg","created_at":"Wed, 26 Jan 2011 04:01:29 +0000","from_user":"ojrac","id_str":"30113067333324800","metadata":{"result_type":"recent"},"to_user_id":null,"text":"RT @tomheon: @ojrac @chewedwire Don't worry, learning Haskell will not give you any clear idea what monad means.","id":30113067333324800,"from_user_id":1594784,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"<a href="http://twitter.com/">web</a>"},{"from_user_id_str":"207589736","profile_image_url":"http://a3.twimg.com/profile_images/1225527428/headshot_1_normal.jpg","created_at":"Wed, 26 Jan 2011 04:00:13 +0000","from_user":"ashleevelazq101","id_str":"30112747555397632","metadata":{"result_type":"recent"},"to_user_id":null,"text":"Federal investigation finds safety violations at The Acadia Hospital: By Meg Haskell, BDN Staff The investigatio... http://bit.ly/dONnpn","id":30112747555397632,"from_user_id":207589736,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"<a href="http://twitterfeed.com" rel="nofollow">twitterfeed</a>"},{"from_user_id_str":"17671137","profile_image_url":"http://a2.twimg.com/profile_images/290264834/Haskell-logo-outer-glow_normal.png","created_at":"Wed, 26 Jan 2011 03:58:00 +0000","from_user":"Hackage","id_str":"30112192346984448","metadata":{"result_type":"recent"},"to_user_id":null,"text":"streams 0.4, added by EdwardKmett: Various Haskell 2010 stream comonads http://bit.ly/idBkPe","id":30112192346984448,"from_user_id":17671137,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"<a href="http://twitterfeed.com" rel="nofollow">twitterfeed</a>"},{"from_user_id_str":"17489366","profile_image_url":"http://a3.twimg.com/sticky/default_profile_images/default_profile_2_normal.png","created_at":"Wed, 26 Jan 2011 03:58:00 +0000","from_user":"aapnoot","id_str":"30112191881420800","metadata":{"result_type":"recent"},"to_user_id":null,"text":"streams 0.4, added by EdwardKmett: Various Haskell 2010 stream comonads http://bit.ly/idBkPe","id":30112191881420800,"from_user_id":17489366,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"<a href="http://twitterfeed.com" rel="nofollow">twitterfeed</a>"},{"from_user_id_str":"8530482","profile_image_url":"http://a2.twimg.com/profile_images/137867266/n608671563_7396_normal.jpg","created_at":"Wed, 26 Jan 2011 03:50:12 +0000","from_user":"jeffmclamb","id_str":"30110229207187456","metadata":{"result_type":"recent"},"to_user_id":null,"text":"Angel - daemon to run and monitor processes like daemontools or god, written in Haskell http://ff.im/-wNyLk","id":30110229207187456,"from_user_id":8530482,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"<a href="http://friendfeed.com" rel="nofollow">FriendFeed</a>"},{"from_user_id_str":"177539201","profile_image_url":"http://a0.twimg.com/profile_images/1178368800/img_normal.jpeg","created_at":"Wed, 26 Jan 2011 03:46:01 +0000","from_user":"tomheon","id_str":"30109174645919744","metadata":{"result_type":"recent"},"to_user_id":1594784,"text":"@ojrac @chewedwire Don't worry, learning Haskell will not give you any clear idea what monad means.","id":30109174645919744,"from_user_id":177539201,"to_user":"ojrac","geo":null,"iso_language_code":"en","to_user_id_str":"1594784","source":"<a href="http://twitter.com/">web</a>"},{"from_user_id_str":"1594784","profile_image_url":"http://a2.twimg.com/profile_images/378515773/square-profile_normal.jpg","created_at":"Wed, 26 Jan 2011 03:44:34 +0000","from_user":"ojrac","id_str":"30108808684503040","metadata":{"result_type":"recent"},"to_user_id":128028225,"text":"@chewedwire @tomheon Why are you making me curious about Haskell? I LIKE not knowing what monad means!!","id":30108808684503040,"from_user_id":1594784,"to_user":"chewedwire","geo":null,"iso_language_code":"en","to_user_id_str":"128028225","source":"<a href="http://twitter.com/">web</a>"},{"from_user_id_str":"23750094","profile_image_url":"http://a0.twimg.com/profile_images/951373780/Moeinthecar_normal.jpg","created_at":"Wed, 26 Jan 2011 03:41:54 +0000","from_user":"shokalshab","id_str":"30108140443795456","metadata":{"result_type":"recent"},"to_user_id":null,"text":"I'm at Magnitude Cheer @ Gymnastics Olympica USA (7735 Haskell Ave., btw Saticoy & Strathern, Van Nuys) http://4sq.com/gmXfaL","id":30108140443795456,"from_user_id":23750094,"geo":null,"iso_language_code":"en","place":{"id":"4e4a2a2f86cb2946","type":"poi","full_name":"Gymnastics Olympica USA, Van Nuys"},"to_user_id_str":null,"source":"<a href="http://foursquare.com" rel="nofollow">foursquare</a>"},{"from_user_id_str":"8135112","profile_image_url":"http://a0.twimg.com/profile_images/1165240350/LIMITED_normal.jpg","created_at":"Wed, 26 Jan 2011 03:41:35 +0000","from_user":"Claricei","id_str":"30108059208515584","metadata":{"result_type":"recent"},"to_user_id":null,"text":"RT @kristenmchugh22: @KeithOlbermann Ryan looks like Jughead w/o the hat. And sounds as mealy-mouthed as Eddie Haskell.","id":30108059208515584,"from_user_id":8135112,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"<a href="http://www.tweetdeck.com" rel="nofollow">TweetDeck</a>"},{"from_user_id_str":"128028225","profile_image_url":"http://a1.twimg.com/profile_images/1224994593/Coding_Drunk_normal.jpg","created_at":"Wed, 26 Jan 2011 03:40:02 +0000","from_user":"chewedwire","id_str":"30107670367182848","metadata":{"result_type":"recent"},"to_user_id":177539201,"text":"@tomheon Cool, I'll take a look. I feel like I should mention this: http://bit.ly/hVstDM","id":30107670367182848,"from_user_id":128028225,"to_user":"tomheon","geo":null,"iso_language_code":"en","to_user_id_str":"177539201","source":"<a href="http://twitter.com/">web</a>"},{"from_user_id_str":"8679778","profile_image_url":"http://a3.twimg.com/profile_images/1195318056/me_nyc_12_18_10_icon_normal.jpg","created_at":"Wed, 26 Jan 2011 03:38:42 +0000","from_user":"kristenmchugh22","id_str":"30107332381777920","metadata":{"result_type":"recent"},"to_user_id":756269,"text":"@KeithOlbermann Ryan looks like Jughead w/o the hat. And sounds as mealy-mouthed as Eddie Haskell.","id":30107332381777920,"from_user_id":8679778,"to_user":"KeithOlbermann","geo":null,"iso_language_code":"en","to_user_id_str":"756269","source":"<a href="http://www.tweetdeck.com" rel="nofollow">TweetDeck</a>"},{"from_user_id_str":"103316559","profile_image_url":"http://a1.twimg.com/profile_images/1179458751/bc3beab4-d59d-4e78-b13b-50747986cfa2_normal.png","created_at":"Wed, 26 Jan 2011 03:36:15 +0000","from_user":"cityslikr","id_str":"30106719153557504","metadata":{"result_type":"recent"},"to_user_id":null,"text":""Social safety net into a hammock." So says Eddie Haskell with the GOP response. #SOTU","id":30106719153557504,"from_user_id":103316559,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"<a href="http://blackberry.com/twitter" rel="nofollow">Twitter for BlackBerry\u00ae</a>"},{"from_user_id_str":"169063143","profile_image_url":"http://a0.twimg.com/profile_images/1160506212/9697_1_normal.gif","created_at":"Wed, 26 Jan 2011 03:31:52 +0000","from_user":"wrkforce_safety","id_str":"30105614919143424","metadata":{"result_type":"recent"},"to_user_id":null,"text":"Federal investigation finds safety violations at The Acadia Hospital: By Meg Haskell, BDN Staff The investigatio... http://bit.ly/gA60C1","id":30105614919143424,"from_user_id":169063143,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"<a href="http://twitterfeed.com" rel="nofollow">twitterfeed</a>"},{"from_user_id_str":"177539201","profile_image_url":"http://a0.twimg.com/profile_images/1178368800/img_normal.jpeg","created_at":"Wed, 26 Jan 2011 03:29:40 +0000","from_user":"tomheon","id_str":"30105060960632832","metadata":{"result_type":"recent"},"to_user_id":128028225,"text":"@chewedwire Great book on Haskell: http://oreilly.com/catalog/9780596514983","id":30105060960632832,"from_user_id":177539201,"to_user":"chewedwire","geo":null,"iso_language_code":"en","to_user_id_str":"128028225","source":"<a href="http://twitter.com/">web</a>"},{"from_user_id_str":"782692","profile_image_url":"http://a0.twimg.com/profile_images/1031304589/Profile.2007.1_normal.jpg","created_at":"Wed, 26 Jan 2011 03:29:19 +0000","from_user":"turnageb","id_str":"30104974591533057","metadata":{"result_type":"recent"},"to_user_id":null,"text":"RT @ovillalon: Paul Ryan solves the mystery of whatever happened to Eddie Haskell. #sotu","id":30104974591533057,"from_user_id":782692,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"<a href="http://www.tweetdeck.com" rel="nofollow">TweetDeck</a>"},{"from_user_id_str":"128028225","profile_image_url":"http://a1.twimg.com/profile_images/1224994593/Coding_Drunk_normal.jpg","created_at":"Wed, 26 Jan 2011 03:28:04 +0000","from_user":"chewedwire","id_str":"30104657267265536","metadata":{"result_type":"recent"},"to_user_id":177539201,"text":"@tomheon I always loved the pattern matching in SML and it looks like Haskell is MUCH better at it. I'm messing around now at tryhaskell.org","id":30104657267265536,"from_user_id":128028225,"to_user":"tomheon","geo":null,"iso_language_code":"en","to_user_id_str":"177539201","source":"<a href="http://twitter.com/">web</a>"},{"from_user_id_str":"12834082","profile_image_url":"http://a3.twimg.com/profile_images/1083036140/mugshot_normal.png","created_at":"Wed, 26 Jan 2011 03:28:01 +0000","from_user":"ovillalon","id_str":"30104647213518848","metadata":{"result_type":"recent"},"to_user_id":null,"text":"Paul Ryan solves the mystery of whatever happened to Eddie Haskell. #sotu","id":30104647213518848,"from_user_id":12834082,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"<a href="http://twitter.com/">web</a>"},{"from_user_id_str":"13540930","profile_image_url":"http://a1.twimg.com/profile_images/1205312219/23467_350321383101_821143101_5114147_3010041_n_normal.jpg","created_at":"Wed, 26 Jan 2011 03:26:02 +0000","from_user":"goodfox","id_str":"30104146455560192","metadata":{"result_type":"recent"},"to_user_id":10226179,"text":"@billykeene22 Bordeaux is one of my heroes. I was so excited when he accepted the invitation to campus. He's been a great friend to Haskell.","id":30104146455560192,"from_user_id":13540930,"to_user":"billykeene22","geo":null,"iso_language_code":"en","to_user_id_str":"10226179","source":"<a href="http://www.ubertwitter.com/bb/download.php" rel="nofollow">\u00dcberTwitter</a>"},{"from_user_id_str":"18616016","profile_image_url":"http://a2.twimg.com/profile_images/1173522726/214397343_normal.jpg","created_at":"Wed, 26 Jan 2011 03:25:18 +0000","from_user":"josej30","id_str":"30103962313031681","metadata":{"result_type":"recent"},"to_user_id":14870909,"text":"@cris7ian Ahh bueno multiparadigma ya es respetable :) Empezar\u00e9 a explotar la parte funcional de los lenguajes ahora #Haskell","id":30103962313031681,"from_user_id":18616016,"to_user":"Cris7ian","geo":null,"iso_language_code":"es","to_user_id_str":"14870909","source":"<a href="http://www.tweetdeck.com" rel="nofollow">TweetDeck</a>"},{"from_user_id_str":"14870909","profile_image_url":"http://a2.twimg.com/profile_images/1176930429/oso_yo_normal.png","created_at":"Wed, 26 Jan 2011 03:23:43 +0000","from_user":"Cris7ian","id_str":"30103562360983553","metadata":{"result_type":"recent"},"to_user_id":18616016,"text":"@josej30 hahaha no, es multiparadigma y es bastante lazy. Nothing like haskell, pero s\u00ed, el de Flash","id":30103562360983553,"from_user_id":14870909,"to_user":"josej30","geo":null,"iso_language_code":"es","to_user_id_str":"18616016","source":"<a href="http://www.echofon.com/" rel="nofollow">Echofon</a>"},{"from_user_id_str":"2421643","profile_image_url":"http://a0.twimg.com/profile_images/1190361665/ernestgrumbles-17_normal.jpg","created_at":"Wed, 26 Jan 2011 03:20:24 +0000","from_user":"ernestgrumbles","id_str":"30102730756333568","metadata":{"result_type":"recent"},"to_user_id":null,"text":"Wow... WolframAlpha did not know who Eddie Haskell is. Guess I'll never use that "knowledge engine" again.","id":30102730756333568,"from_user_id":2421643,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"<a href="http://twitter.com/">web</a>"},{"from_user_id_str":"128028225","profile_image_url":"http://a1.twimg.com/profile_images/1224994593/Coding_Drunk_normal.jpg","created_at":"Wed, 26 Jan 2011 03:14:21 +0000","from_user":"chewedwire","id_str":"30101204428132352","metadata":{"result_type":"recent"},"to_user_id":177539201,"text":"@tomheon How is Haskell better/different from CL or Scheme? I honestly don't know, although I'm becoming more curious.","id":30101204428132352,"from_user_id":128028225,"to_user":"tomheon","geo":null,"iso_language_code":"en","to_user_id_str":"177539201","source":"<a href="http://twitter.com/">web</a>"},{"from_user_id_str":"13540930","profile_image_url":"http://a1.twimg.com/profile_images/1205312219/23467_350321383101_821143101_5114147_3010041_n_normal.jpg","created_at":"Wed, 26 Jan 2011 03:06:54 +0000","from_user":"goodfox","id_str":"30099329809129473","metadata":{"result_type":"recent"},"to_user_id":null,"text":"A day of vision & speeches. #SOTU now. And a wonderful Haskell Convocation address earlier today by Sinte Gleske President Lionel Bordeaux.","id":30099329809129473,"from_user_id":13540930,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"<a href="http://www.ubertwitter.com/bb/download.php" rel="nofollow">\u00dcberTwitter</a>"},{"from_user_id_str":"119185220","profile_image_url":"http://a0.twimg.com/profile_images/1089027228/dfg_normal.jpg","created_at":"Wed, 26 Jan 2011 03:03:38 +0000","from_user":"LaLiciouz_03","id_str":"30098510623805440","metadata":{"result_type":"recent"},"to_user_id":null,"text":"The Game with the girl room 330 Haskell follow us...","id":30098510623805440,"from_user_id":119185220,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"<a href="http://twitter.com/">web</a>"},{"from_user_id_str":"18616016","profile_image_url":"http://a2.twimg.com/profile_images/1173522726/214397343_normal.jpg","created_at":"Wed, 26 Jan 2011 02:57:25 +0000","from_user":"josej30","id_str":"30096946144215040","metadata":{"result_type":"recent"},"to_user_id":null,"text":"Voy a extra\u00f1ar Haskell cuando regrese al mundo imperativo. Hay alg\u00fan lenguaje imperativo que tenga este poder funcional? #ci3661","id":30096946144215040,"from_user_id":18616016,"geo":null,"iso_language_code":"es","to_user_id_str":null,"source":"<a href="http://www.tweetdeck.com" rel="nofollow">TweetDeck</a>"},{"from_user_id_str":"17671137","profile_image_url":"http://a2.twimg.com/profile_images/290264834/Haskell-logo-outer-glow_normal.png","created_at":"Wed, 26 Jan 2011 02:55:32 +0000","from_user":"Hackage","id_str":"30096471814574080","metadata":{"result_type":"recent"},"to_user_id":null,"text":"comonad-transformers 0.9.0, added by EdwardKmett: Haskell 98 comonad transformers http://bit.ly/h6xIsf","id":30096471814574080,"from_user_id":17671137,"geo":null,"iso_language_code":"no","to_user_id_str":null,"source":"<a href="http://twitterfeed.com" rel="nofollow">twitterfeed</a>"},{"from_user_id_str":"177539201","profile_image_url":"http://a0.twimg.com/profile_images/1178368800/img_normal.jpeg","created_at":"Wed, 26 Jan 2011 02:48:12 +0000","from_user":"tomheon","id_str":"30094626920603649","metadata":{"result_type":"recent"},"to_user_id":null,"text":"Every time I look at Haskell I love it more.","id":30094626920603649,"from_user_id":177539201,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"<a href="http://twitter.com/">web</a>"},{"from_user_id_str":"60792568","profile_image_url":"http://a0.twimg.com/profile_images/1203647517/glenda-flash_normal.jpg","created_at":"Wed, 26 Jan 2011 02:40:42 +0000","from_user":"r_takaishi","id_str":"30092735935422464","metadata":{"result_type":"recent"},"to_user_id":null,"text":"Haskell\u3088\u308aD\u8a00\u8a9e\u304c\u4e0a\u3068\u306f\u601d\u308f\u306a\u304b\u3063\u305f\uff0e http://www.tiobe.com/index.php/content/paperinfo/tpci/index.html","id":30092735935422464,"from_user_id":60792568,"geo":null,"iso_language_code":"ja","to_user_id_str":null,"source":"<a href="http://twmode.sf.net/" rel="nofollow">twmode</a>"},{"from_user_id_str":"160145510","profile_image_url":"http://a0.twimg.com/profile_images/1218108166/going_galt_normal.jpg","created_at":"Wed, 26 Jan 2011 02:39:31 +0000","from_user":"wtp1787","id_str":"30092439653974018","metadata":{"result_type":"recent"},"to_user_id":null,"text":"RT @KLSouth: Eddie Haskell Goes to Washington... "You look really nice tonight, Ms Cleaver"","id":30092439653974018,"from_user_id":160145510,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"<a href="http://www.tweetdeck.com" rel="nofollow">TweetDeck</a>"},{"from_user_id_str":"96616016","profile_image_url":"http://a1.twimg.com/profile_images/1196978169/Picture0002_normal.jpg","created_at":"Wed, 26 Jan 2011 02:39:20 +0000","from_user":"MelissaRNMBA","id_str":"30092392203816960","metadata":{"result_type":"recent"},"to_user_id":14862975,"text":"@KLSouth At least Eddie Haskell was entertaining.","id":30092392203816960,"from_user_id":96616016,"to_user":"KLSouth","geo":null,"iso_language_code":"en","to_user_id_str":"14862975","source":"<a href="http://twitter.com/">web</a>"},{"from_user_id_str":"14862975","profile_image_url":"http://a0.twimg.com/profile_images/421596393/kls_4_normal.JPG","created_at":"Wed, 26 Jan 2011 02:38:29 +0000","from_user":"KLSouth","id_str":"30092178327871489","metadata":{"result_type":"recent"},"to_user_id":null,"text":"Eddie Haskell Goes to Washington... "You look really nice tonight, Ms Cleaver"","id":30092178327871489,"from_user_id":14862975,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"<a href="http://www.tweetdeck.com" rel="nofollow">TweetDeck</a>"},{"from_user_id_str":"144546280","profile_image_url":"http://a1.twimg.com/a/1295051201/images/default_profile_1_normal.png","created_at":"Wed, 26 Jan 2011 02:36:17 +0000","from_user":"listwarenet","id_str":"30091626869161984","metadata":{"result_type":"recent"},"to_user_id":null,"text":"http://www.listware.net/201101/haskell-beginners/84641-haskell-beginners-wildcards-in-expressions.html Haskell-beginners - Wild","id":30091626869161984,"from_user_id":144546280,"geo":null,"iso_language_code":"no","to_user_id_str":null,"source":"<a href="http://1e10.org/cloud/" rel="nofollow">1e10</a>"},{"from_user_id_str":"24538048","profile_image_url":"http://a2.twimg.com/profile_images/1117267605/rope_normal.jpg","created_at":"Wed, 26 Jan 2011 02:23:14 +0000","from_user":"dbph","id_str":"30088341030440960","metadata":{"result_type":"recent"},"to_user_id":null,"text":"RT @dnene: Skilled Calisthenics. Haskell code that outputs python which spits ruby which emits the haskell source. http://j.mp/YlQUL via @mfeathers","id":30088341030440960,"from_user_id":24538048,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"<a href="http://twitter.com/">web</a>"},{"from_user_id_str":"158951567","profile_image_url":"http://a3.twimg.com/profile_images/962721419/Logo_3_normal.jpg","created_at":"Wed, 26 Jan 2011 01:58:31 +0000","from_user":"YubaVetTech","id_str":"30082124207886336","metadata":{"result_type":"recent"},"to_user_id":null,"text":"Dr. Haskell has just been awarded the prestigious Hayward Award for \u2018Excellence in Education\u2019. This award honors... http://fb.me/QgQCxd74","id":30082124207886336,"from_user_id":158951567,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"<a href="http://www.facebook.com/twitter" rel="nofollow">Facebook</a>"},{"from_user_id_str":"158951567","profile_image_url":"http://a3.twimg.com/profile_images/962721419/Logo_3_normal.jpg","created_at":"Wed, 26 Jan 2011 01:56:04 +0000","from_user":"YubaVetTech","id_str":"30081505476743168","metadata":{"result_type":"recent"},"to_user_id":null,"text":"Dr. Haskell has just been awarded the prestigous Haward Award for Excellence in Education. This award honors... http://fb.me/PC9mYmCR","id":30081505476743168,"from_user_id":158951567,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"<a href="http://www.facebook.com/twitter" rel="nofollow">Facebook</a>"},{"from_user_id_str":"2331498","profile_image_url":"http://a3.twimg.com/profile_images/494632120/me_normal.jpg","created_at":"Wed, 26 Jan 2011 01:43:50 +0000","from_user":"Verus","id_str":"30078427155406848","metadata":{"result_type":"recent"},"to_user_id":79273052,"text":"@kami_joe \u3044\u3084\uff0c\u8ab2\u984c\u306f\u89e3\u6c7a\u5bfe\u8c61(\u30d1\u30ba\u30eb\u3068\u304b)\u3092\u4e0e\u3048\u3089\u308c\u3066\uff0c\u554f\u984c\u5b9a\u7fa9\u3068\u30d7\u30ed\u30b0\u30e9\u30df\u30f3\u30b0(\u6307\u5b9a\u8a00\u8a9e\u306fC++\u3082\u3057\u304f\u306fJava)\u3068\u3044\u3046\u3044\u308f\u3070\u666e\u901a\u306a\u8ab2\u984c\u3067\u306f\u3042\u308b\u3093\u3060\u3051\u3069\uff0e\u95a2\u6570\u578b\u8a00\u8a9e\u306fHaskell\u306e\u6388\u696d\u304c\u307e\u305f\u5225\u306b\u3042\u308b\u306e\uff0e","id":30078427155406848,"from_user_id":2331498,"to_user":"kami_joe","geo":null,"iso_language_code":"ja","to_user_id_str":"79273052","source":"<a href="http://itunes.apple.com/us/app/twitter/id409789998?mt=12" rel="nofollow">Twitter for Mac</a>"},{"from_user_id_str":"79151233","profile_image_url":"http://a2.twimg.com/a/1295051201/images/default_profile_2_normal.png","created_at":"Wed, 26 Jan 2011 01:40:37 +0000","from_user":"cz_newdrafts","id_str":"30077617361125376","metadata":{"result_type":"recent"},"to_user_id":null,"text":"Haskell programming language http://bit.ly/gpPAwB","id":30077617361125376,"from_user_id":79151233,"geo":null,"iso_language_code":"no","to_user_id_str":null,"source":"<a href="http://tommorris.org/" rel="nofollow">tommorris' hacksample</a>"},{"from_user_id_str":"2331498","profile_image_url":"http://a3.twimg.com/profile_images/494632120/me_normal.jpg","created_at":"Wed, 26 Jan 2011 01:02:57 +0000","from_user":"Verus","id_str":"30068139416879104","metadata":{"result_type":"recent"},"to_user_id":9252720,"text":"@shukukei Java\u3068C\u306f\u3042\u308b\u7a0b\u5ea6\u66f8\u3051\u3066\u3042\u305f\u308a\u307e\u3048\u306a\u3068\u3053\u308d\u304c\u3042\u308b\u304b\u3089\u306a\u30fc\uff0ePython\u306f\u500b\u4eba\u7684\u306b\u611f\u899a\u304c\u5408\u308f\u306a\u3044\uff0e\u611f\u899a\u306a\u306e\u3067\uff0c\u3082\u3046\u3069\u3046\u3057\u3088\u3046\u3082\u306a\u3044\uff57 \u3044\u307e\u306fScala\u3068Haskell\u3092\u3082\u3063\u3068\u6975\u3081\u305f\u3044\u3068\u3053\u308d\uff0e","id":30068139416879104,"from_user_id":2331498,"to_user":"shukukei","geo":null,"iso_language_code":"ja","to_user_id_str":"9252720","source":"<a href="http://itunes.apple.com/us/app/twitter/id409789998?mt=12" rel="nofollow">Twitter for Mac</a>"},{"from_user_id_str":"2781460","profile_image_url":"http://a0.twimg.com/profile_images/82526625/.joeyicon_normal.jpg","created_at":"Wed, 26 Jan 2011 01:02:35 +0000","from_user":"joeyhess","id_str":"30068046538219520","metadata":{"result_type":"recent"},"to_user_id":null,"text":"just figured out that I can use parameterized types to remove a dependency loop in git-annex's type definitions. whee #haskell","id":30068046538219520,"from_user_id":2781460,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"<a href="http://identi.ca" rel="nofollow">identica</a>"},{"from_user_id_str":"144546280","profile_image_url":"http://a1.twimg.com/a/1295051201/images/default_profile_1_normal.png","created_at":"Wed, 26 Jan 2011 00:54:22 +0000","from_user":"listwarenet","id_str":"30065977920061440","metadata":{"result_type":"recent"},"to_user_id":null,"text":"http://www.listware.net/201101/haskell-beginners/84466-haskell-beginners-bytestring-question.html Haskell-beginners - Bytestrin","id":30065977920061440,"from_user_id":144546280,"geo":null,"iso_language_code":"no","to_user_id_str":null,"source":"<a href="http://1e10.org/cloud/" rel="nofollow">1e10</a>"},{"from_user_id_str":"1291845","profile_image_url":"http://a0.twimg.com/profile_images/1225743404/ThinOxygen-small-opaque-solidarity_normal.png","created_at":"Wed, 26 Jan 2011 00:52:07 +0000","from_user":"_aaron_","id_str":"30065412242669568","metadata":{"result_type":"recent"},"to_user_id":null,"text":"wanted: librly licensed high level native compiled lang with min runtime (otherwise cobra/mono would be perfect) for win. lua? haskell? ooc?","id":30065412242669568,"from_user_id":1291845,"geo":null,"iso_language_code":"en","to_user_id_str":null,"source":"<a href="http://twitter.com/">web</a>"}],"max_id":30120402839666689,"since_id":0,"refresh_url":"?since_id=30120402839666689&q=haskell","next_page":"?page=2&max_id=30120402839666689&rpp=50&q=haskell","results_per_page":50,"page":1,"completed_in":0.291696,"since_id_str":"0","max_id_str":"30120402839666689","query":"haskell"} --------------------------------------------------------------------------------