├── .gitignore ├── freebase ├── LICENSE └── freebase.tsv ├── README.md ├── Makefile ├── csv2csv.ml ├── LICENSE ├── json2csv.ml └── csv2json.ml /.gitignore: -------------------------------------------------------------------------------- 1 | *.cm* 2 | *.annot 3 | *.[oa] 4 | *~ 5 | csv2json 6 | -------------------------------------------------------------------------------- /freebase/LICENSE: -------------------------------------------------------------------------------- 1 | Source: Freebase, licensed under CC-BY 2 | http://creativecommons.org/licenses/by/2.5/ 3 | 4 | Other content from Wikipedia, licensed under CC BY-SA 5 | http://creativecommons.org/licenses/by-sa/2.5/ 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | csv2json 2 | ======== 3 | 4 | Convert a CSV file with a header containing the field names into 5 | JSON records, one per line, omitting empty fields. 6 | 7 | Installation 8 | ------------ 9 | 10 | Requires OCaml, ocamlfind (Findlib), 11 | [csv](https://forge.ocamlcore.org/projects/csv/) 12 | and [yojson](https://github.com/mjambon/yojson). 13 | 14 | ``` 15 | $ make 16 | $ make install 17 | ``` 18 | 19 | Also supports `make PREFIX=... install` and `make BINDIR=... install`. 20 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: all 2 | all: csv2json json2csv csv2csv 3 | 4 | csv2json: csv2json.ml 5 | ocamlfind opt -o $@ -package csv,yojson -linkpkg csv2json.ml 6 | 7 | json2csv: json2csv.ml 8 | ocamlfind opt -o $@ -package csv,yojson -linkpkg json2csv.ml 9 | 10 | csv2csv: csv2csv.ml 11 | ocamlfind opt -o $@ -package csv -linkpkg csv2csv.ml 12 | 13 | .PHONY: demo 14 | demo: csv2json json2csv csv2csv 15 | ./csv2csv -din TAB < freebase/freebase.tsv > freebase.csv 16 | ./csv2json -d TAB < freebase/freebase.tsv | ydump > freebase.json 17 | cat freebase.json 18 | ./json2csv name nationality < freebase.json 19 | 20 | ifndef PREFIX 21 | PREFIX = $(HOME) 22 | endif 23 | 24 | ifndef BINDIR 25 | BINDIR = $(PREFIX)/bin 26 | endif 27 | 28 | .PHONY: install 29 | install: 30 | cp csv2json json2csv csv2csv $(BINDIR) 31 | 32 | .PHONY: clean 33 | clean: 34 | rm -f *.o *.cm* *~ 35 | rm -f csv2json json2csv csv2csv freebase.csv freebase.json 36 | -------------------------------------------------------------------------------- /csv2csv.ml: -------------------------------------------------------------------------------- 1 | open Printf 2 | 3 | let set_delim r s = 4 | match s with 5 | | "TAB" -> r := '\t' 6 | | s when String.length s = 1 -> r := s.[0] 7 | | s -> 8 | raise 9 | (Arg.Bad "Delimiter must be a single ascii character or TAB") 10 | 11 | let main () = 12 | let delim_in = ref ',' in 13 | let delim_out = ref ',' in 14 | let options = [ 15 | "-din", Arg.String (set_delim delim_in), 16 | " 17 | Input field delimiter (byte or the string 'TAB'; default: ',')"; 18 | "-dout", Arg.String (set_delim delim_out), 19 | " 20 | Output field delimiter (byte or the string 'TAB'; default: ',')"; 21 | ] 22 | in 23 | let anon_fun s = raise (Arg.Bad ("Don't know what to do with " ^ s)) in 24 | let usage_msg = "Usage: csv2json [options]\nOptions:\n" in 25 | Arg.parse options anon_fun usage_msg; 26 | let ic = stdin in 27 | let oc = stdout in 28 | let input = Csv.of_channel ~separator: !delim_in ic in 29 | let output = Csv.to_channel ~separator: !delim_out oc in 30 | try 31 | while true do 32 | Csv.output_record output (Csv.next input) 33 | done; 34 | assert false 35 | 36 | with End_of_file -> 37 | flush oc 38 | 39 | let () = main () 40 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2012 Mr. Number 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions 6 | are met: 7 | 1. Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | 2. Redistributions in binary form must reproduce the above copyright 10 | notice, this list of conditions and the following disclaimer in the 11 | documentation and/or other materials provided with the distribution. 12 | 3. The name of the author may not be used to endorse or promote products 13 | derived from this software without specific prior written permission. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 | IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 | OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 | IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 | NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 | THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | -------------------------------------------------------------------------------- /json2csv.ml: -------------------------------------------------------------------------------- 1 | open Printf 2 | 3 | let string_of_json (x : Yojson.Basic.json) = 4 | match x with 5 | | `String s -> s 6 | | `Int i -> string_of_int i 7 | | `Float f -> sprintf "%f" f 8 | | `Bool b -> if b then "1" else "0" 9 | | `Null -> "" 10 | | `List _ 11 | | `Assoc _ -> Yojson.Basic.to_string x 12 | 13 | let get_field k json = 14 | match json with 15 | `Assoc l -> 16 | (try string_of_json (List.assoc k l) 17 | with Not_found -> "") 18 | | _ -> "" 19 | 20 | let translate_record oc colnames json = 21 | let row = List.map (fun k -> get_field k json) colnames in 22 | Csv.output_record oc row 23 | 24 | let main () = 25 | let delim = ref ',' in 26 | let no_header = ref false in 27 | let colnames = ref [] in 28 | let options = [ 29 | "-d", Arg.String ( 30 | function 31 | | "TAB" -> delim := '\t' 32 | | s when String.length s = 1 -> delim := s.[0] 33 | | s -> 34 | raise 35 | (Arg.Bad "Delimiter must be a single ascii character or TAB") 36 | ), 37 | " 38 | Field delimiter (single byte or the string 'TAB'; default: ',')"; 39 | 40 | "-n", Arg.Set no_header, 41 | " 42 | Omit header row containing field names." 43 | ] 44 | in 45 | let anon_fun s = colnames := s :: !colnames in 46 | let usage_msg = "\ 47 | Usage: json2csv [options] COLNAME1 [COLNAME2 ...] 48 | Options: 49 | " 50 | in 51 | Arg.parse options anon_fun usage_msg; 52 | let colnames = List.rev !colnames in 53 | let ic = stdin in 54 | let oc = stdout in 55 | let stream = Yojson.Basic.stream_from_channel ic in 56 | let csv_out = Csv.to_channel ~separator: !delim oc in 57 | if not !no_header then 58 | Csv.output_record csv_out colnames; 59 | Stream.iter (translate_record csv_out colnames) stream; 60 | flush oc 61 | 62 | let () = main () 63 | -------------------------------------------------------------------------------- /csv2json.ml: -------------------------------------------------------------------------------- 1 | open Printf 2 | 3 | type header_kind = First_line | Numbered of int 4 | 5 | let make_numbered_labels first l = 6 | let _, acc = 7 | List.fold_left 8 | (fun (i, acc) x -> (i+1, (string_of_int i, x) :: acc)) 9 | (first, []) l 10 | in 11 | List.rev acc 12 | 13 | let main () = 14 | let header_kind = ref First_line in 15 | let delim = ref ',' in 16 | let options = [ 17 | "-d", Arg.String ( 18 | function 19 | | "TAB" -> delim := '\t' 20 | | s when String.length s = 1 -> delim := s.[0] 21 | | s -> 22 | raise 23 | (Arg.Bad "Delimiter must be a single ascii character or TAB") 24 | ), 25 | " 26 | Field delimiter (single byte or the string 'TAB'; default: ',')"; 27 | 28 | "-n", Arg.Unit (fun () -> header_kind := Numbered 1), 29 | " 30 | Use numbers for field labels, treat first row as data"; 31 | 32 | "-n0", Arg.Unit (fun () -> header_kind := Numbered 0), 33 | " 34 | Use numbers for field labels starting from 0, 35 | treat first row as data"; 36 | 37 | "-n1", Arg.Unit (fun () -> header_kind := Numbered 0), 38 | " 39 | Use numbers for field labels starting from 1, 40 | treat first row as data (same as -n)"; 41 | ] 42 | in 43 | let anon_fun s = raise (Arg.Bad ("Don't know what to do with " ^ s)) in 44 | let usage_msg = "Usage: csv2json [options]\nOptions:\n" in 45 | Arg.parse options anon_fun usage_msg; 46 | let ic = stdin in 47 | let oc = stdout in 48 | let stream = Csv.of_channel ~separator: !delim ic in 49 | let errors = ref 0 in 50 | let header_kind = !header_kind in 51 | try 52 | let head = 53 | match header_kind with 54 | First_line -> Csv.next stream 55 | | Numbered first -> [] 56 | in 57 | while true do 58 | try 59 | let pairs = 60 | let row = Csv.next stream in 61 | match header_kind with 62 | First_line -> 63 | (try List.combine head row 64 | with _ -> incr errors; raise Exit) 65 | | Numbered first -> 66 | make_numbered_labels first row 67 | in 68 | let fields = 69 | List.fold_right ( 70 | fun (k, v) acc -> 71 | match v with 72 | "" -> acc 73 | | v -> (k, `String v) :: acc 74 | ) pairs [] 75 | in 76 | fprintf oc "%s\n" (Yojson.Basic.to_string (`Assoc fields)) 77 | with Exit -> () 78 | done; 79 | assert false 80 | 81 | with End_of_file -> 82 | flush oc; 83 | if !errors <> 0 then ( 84 | eprintf "%i errors\n%!" !errors; 85 | false 86 | ) 87 | else 88 | true 89 | 90 | let () = 91 | if main () then exit 0 92 | else exit 1 93 | -------------------------------------------------------------------------------- /freebase/freebase.tsv: -------------------------------------------------------------------------------- 1 | name id date_of_birth place_of_birth nationality religion gender metaweb_user_s parents children employment_history signature spouse_s sibling_s weight_kg height_meters education profession quotations places_lived ethnicity quotationsbook_id age tvrage_id notable_professions languages 2 | Charles Gyamfi /m/03d8c1t 1929 Accra Ghana Male 1.83 /m/03pmxqz 3 | Fabián Santana /m/03d8_8g 1985-07-29 General Rodríguez Argentina 1.81 /m/03pt51b 4 | Alfred Jay Moran Jr /m/03ws3hy Male 5 | Francisco de Rioja /m/0413fcb 1583 Male 6 | John Fullarton /m/03wdspz 1645 7 | Khin Aung Myint /m/03d88cz Male Politician 8 | Robert P Stiller /m/03wxwky Male /m/03w_5mf 9 | Georgi Nikolov /m/04n5zc6 1983-06-05 Sofia Bulgaria Male 1.89 10 | Michael E Morrill /m/03wxw1c Male /m/03x65p9 11 | Jamie Anderson /m/0413jt_ 1990-09-13 South Lake Tahoe United States of America Female Snowboarder 12 | Gérard Dionne /m/04n5xmw 1919-06-19 Saint-Basile, New Brunswick Canada Roman Catholicism Male 13 | Stephen Lazarus /m/03ws34_ Male /m/03x8bz0,/m/03x8byv 14 | Takuya Honda /m/03yf3pv 1985-04-17 Sagamihara Japan 1.77 15 | Herman Foster /m/03xns5f Male 16 | Peter Z Horvath /m/03wxvvh Male /m/03x3w8p 17 | Saúl Mendoza /m/03wzkp 1967-01-06 Male 18 | Eric Anderson /m/04dyv3 1915-09-15 Bradford Male 1.88 19 | Pedro Zape /m/04n5wb8 1949-06-03 Colombia Colombia Male 20 | Tony Bird /m/03wfcks 1974-09-01 Cardiff Wales Male 21 | Kaushik Bagchi /m/03d7yrk 22 | Earl W Cole III /m/03wxvpl Male /m/03x6cgv 23 | Michael J Hannon /m/03wxvc8 Male /m/03x7m9r 24 | Rayner Heppenstall /m/04h2h9 1911-07-27 England Male Novelist 25 | Dedo I Merseburg /m/03wdrq3 Male Dietrich I von Wettin Marquis Dietrich II of Niederlausitz 26 | Chris Ayers /m/03wxv8s Male /m/03xdhcx 27 | John Mumford /m/03d7g5_ 1957 United States of America Male 28 | Renée Faure /m/04n5qmp 1919-11-04 Paris Female 29 | Pablo Bastianini /m/047r4s8 1982-11-09 Zárate Argentina Male 1.87 30 | Gregory Hansen /m/03wxtmy Male /m/03x81pv 31 | Karl Grossman /m/04ngw4 1863-12-13 Neuruppin Germany Male 32 | Karen J May /m/03wxt_d Lancaster United States of America Female /m/04g689c,/m/04j8kv4,/m/04j8kvj,/m/04j8kvx,/m/04g8b6w,/m/04j8kts,/m/04g8b76 /m/04g8bjk Businessperson,Certified Public Accountant /m/04j74l9 Caucasian race 33 | Herman Koehler /m/03d7f29 1859-12-14 Wisconsin Male 34 | Juan Lembeye /m/04h2fw 1816 Male 35 | Lyman F. Sheats Jr. /m/04hf3dp 36 | Fuchsia Dunlop /m/04jf5fs Chef 37 | Sten Hagander /m/03d7d7_ 1891-11-22 Sweden Male 38 | Lee Ann Raz /m/04142kf 1979-10-03 Israel Female Singer 39 | Eric Alterman /m/03h912r Male /m/03h912p 40 | William Chapple /m/047r44b 1864-07-14 Alexandra 41 | Tamlyn Tomita /m/04nhnb 1966-01-27 Okinawa Prefecture United States of America,Japan Female 1.57 /m/03yw55l Actor /m/03pj8k9,/m/03yw55v Japanese American 42 | Barbara Reskin /m/03d77k2 Saint Paul Female /m/04hh0sq 43 | Robert Bilheimer /m/0414928 44 | John Merton /m/04njyfy 1901-02-18 Washington, D.C. United States of America Male Lane Bradford,Bob LaVarre /m/04njykx Actor 45 | Julie H Edwards /m/03ws2sh Female 46 | Brenda Carty /m/04g37hs Female 47 | Jasmine /m/04g370z 48 | Joseph S. Fruton /m/0414g7g 1912-05-14 Częstochowa Poland,United States of America Male /m/04fsb35 49 | Robert L Bauman /m/03wxt3c Male /m/03x0vf_,/m/03x0vg5 50 | Aroti Dutt /m/0414hf3 1924 Kolkata India Satyendra Chandra Mitra 51 | Georges Dionne /m/0414hjm Male 52 | Paul Harrison /m/03xnr1_ 53 | Agnès Humbert /m/04n5pb1 1894-10-12 Dieppe Female 54 | John Lecompte /m/0414khm 1914-07-28 Male 55 | Larry Lacewell /m/03d73pc 1937-02-12 Fordyce Male Coach /m/03pdw7l 56 | James G Andress /m/03wqsj0 Male /m/04jrkh2,/m/04jrkgq,/m/04g9_fg,/m/04g9_f2,/m/04g9_dy,/m/04j75vf,/m/04j75vp,/m/04j75v4,/m/04jrkgb,/m/04j75w3 /m/04g9_bb,/m/04g9_bk Businessperson /m/04g9_ft Caucasian race 57 | Keith Arkell /m/03d7312 1961-01-08 Birmingham Male /m/07y4jf3 58 | Kevin Mcleod /m/03wxsxh Male 59 | Alexander Wilson /m/03c2_h7 Male 60 | Jean Paul Richard /m/03wxsrl 61 | Steven Markheim /m/03wtt_6 Male /m/03xbdzs 62 | Patrick F Daly /m/03wxsq0 Male 63 | François Louis Thomas Francia /m/04dxl0 1772 France Male 64 | Boris Berman /m/03d6x7n 1948-04-03 Moscow United States of America Male Pianist /m/03ph41x 65 | M.R. Dhar /m/04nkl4t 66 | Nicolas Lupot /m/03wdkll 1758 Stuttgart Male 67 | David Leroy Nickens /m/04ctgzt 1794 Virginia United States of America Male African American 68 | Ashutosh Roy /m/03ws2pj /m/03x63zm,/m/03x63zf 69 | Frank D'Amelio /m/04dxf75 Male /m/04kcbry,/m/04kcbsz,/m/04kcbsq,/m/04kcbtd,/m/04kcbsg,/m/04dxf72,/m/04kcbs5 /m/04dy3yn,/m/04dy3yy 70 | Henry S. Randall /m/0414z8h 1811-05-03 Male 71 | Henryk Chmielewski /m/04dxdr 1923-06-07 Warsaw Male 72 | Hans Geyer /m/03ws2c6 Male 73 | James R Grover Jr /m/03wxr3n Male 74 | Annie Armstrong /m/0415635 1850-07-11 Baltimore Female /m/04hfjxc 75 | Jean-Pascal Chaigne /m/041575z 1977 France 76 | Zdeňka Veřmiřovská /m/03d6l3v 1913-06-27 Kopřivnice Female 77 | Giosuè Carducci /m/03dsx 1835-07-27 Pietrasanta Italy Male Teacher,Poet 78 | Eva Baeza /m/04nl2ly Female 79 | Lene Maria Christensen /m/04nl2xq Female 80 | Demetrios Pieridis /m/0415943 1811 81 | Charles W. Young /m/03m2hgb Male /m/03m2hg8 82 | Pablo Herrera Barrantes /m/04n5lw9 1987-02-14 Alajuela Costa Rica Male 1.68 83 | George A Cope /m/03wxr22 /m/05zppz 84 | Youra Guller /m/03d6hcl 1895 Marseille Female Pianist 85 | Elena Hill /m/04nl4ct Female 86 | Jack Esch /m/03xnqjl 87 | Yvon Bilodeau /m/03d6gsv 1951-01-18 Vimy, Alberta Canada Male 95.2543977 88 | Gregory K Bell /m/03wxr0p Male /m/03x7btb 89 | Crawford Baptie /m/0415_37 1959-02-24 Glasgow Scotland Male 90 | Michael Burke /m/047qyk_ 1977-02-25 Jacksonville United States of America 91 | Francis L. Dade /m/0415b1 1793 Male /m/03pl1ss 92 | Tomáš Hubáček /m/03xp4p5 93 | James J. Cline /m/03d6dxt Pomona Male 94 | Kim Bo-young /m/04nl8kw 95 | Paul Wiseman /m/03wxqz5 Male /m/03w_ms9 96 | Vivek Paul /m/03g1h7v 97 | Emil Schoenemann /m/0468c3y 1882-04-18 Berlin Germany Male Cinematographer 98 | Steven M Chapman /m/03wxqxs Male /m/03x82r5 99 | Heinrich Julian Schmidt /m/04nldq 1818-03-07 Kwidzyn Germany Male Journalist 100 | Julián Bourges /m/03d6d5d Buenos Aires Argentina Male /m/04hh1bb 101 | --------------------------------------------------------------------------------