├── ebin └── .gitignore ├── vsn.mk ├── rebar ├── doc ├── erlsom.doc └── image001.gif ├── examples ├── continuation │ ├── itunes_example_be.xml │ ├── BookStore.xml │ ├── itunes_example.xml │ └── continuation_example.erl ├── example1 │ ├── valid.xml │ ├── abb4.xsd │ ├── example1.hrl │ ├── abb1.xsd │ ├── abb3.xsd │ ├── abb7.xsd │ ├── abb2.xsd │ ├── abb11.xsd │ ├── abb5.xsd │ ├── abb6.xsd │ ├── abb8.xsd │ ├── abb9.xsd │ ├── example1.xsd │ ├── abb10.xsd │ ├── example1.xml │ └── example1.erl ├── complex_form │ ├── foo.xml │ ├── complex_form_example.erl │ └── erlsom_complex_form.erl ├── erlsom_example │ ├── example_in.xml │ ├── example_in.xsd │ ├── example_out.xsd │ ├── erlsom.hrl │ └── erlsom_example.erl ├── book_store │ ├── BookStore.hrl │ ├── BookStore.xsd │ ├── book_store.erl │ └── BookStore.xml ├── soap_example │ ├── example_in.xml │ ├── example_in.xsd │ ├── example_out.xsd │ ├── erlsom.hrl │ ├── soap_example.erl │ └── soap-envelope.xsd └── erlsom_sax_example │ ├── erlsom_sax_example.erl │ ├── tpp_auth.xml │ ├── sax_example.xml │ ├── search_request.xsd │ └── erlsom_simple_form.erl ├── priv ├── extension │ ├── simpleContentExtension.xml │ ├── Makefile │ ├── extension.xml │ ├── extension.xsd │ └── simpleContentExtension.xsd ├── choice_complex │ ├── choice_complex.xml │ └── choice_complex.xsd ├── all │ ├── all.xml │ └── all.xsd ├── xsi_type │ ├── ext.xml │ ├── base.xsd │ └── ext.xsd ├── xsi_type_no_prefix │ ├── test.xml │ └── test.xsd └── gexf │ ├── schema │ ├── hierarchy.xsd │ ├── phylogenics.xsd │ ├── dynamics.xsd │ ├── gexf.xsd │ ├── viz.xsd │ └── data.xsd │ └── data │ ├── basic.gexf │ ├── data.gexf │ ├── test.gexf │ └── dynamics.gexf ├── .gitignore ├── .project ├── include ├── exception.hrl └── erlsom.hrl ├── rebar.config ├── CHANGELOG.md ├── src ├── erlsom.app.src ├── erlsom.hrl ├── erlsom_add.erl ├── erlsom_sax.hrl ├── erlsom_parse.hrl ├── erlsom_compile.hrl ├── erlsom_simple_form.erl ├── erlsom_sax.erl ├── erlsom_sax_lib.erl ├── erlsom_ucs.erl ├── erlsom_writeHrl.erl └── erlsom_example_value.erl ├── make-upload ├── make-release ├── Makefile ├── test ├── erlsom_gexf_tests.erl └── erlsom_tests.erl └── COPYING.LESSER /ebin/.gitignore: -------------------------------------------------------------------------------- 1 | erlsom.app 2 | *.beam 3 | -------------------------------------------------------------------------------- /vsn.mk: -------------------------------------------------------------------------------- 1 | ERLSOM_VSN=1.4.1 2 | 3 | 4 | 5 | -------------------------------------------------------------------------------- /rebar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/willemdj/erlsom/HEAD/rebar -------------------------------------------------------------------------------- /doc/erlsom.doc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/willemdj/erlsom/HEAD/doc/erlsom.doc -------------------------------------------------------------------------------- /doc/image001.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/willemdj/erlsom/HEAD/doc/image001.gif -------------------------------------------------------------------------------- /examples/continuation/itunes_example_be.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/willemdj/erlsom/HEAD/examples/continuation/itunes_example_be.xml -------------------------------------------------------------------------------- /examples/example1/valid.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 123 4 | value 1 5 | value 2 6 | 7 | -------------------------------------------------------------------------------- /priv/extension/simpleContentExtension.xml: -------------------------------------------------------------------------------- 1 | 2 | myuri 3 | -------------------------------------------------------------------------------- /priv/choice_complex/choice_complex.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | text 4 | 5 | 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | *.beam 3 | config.log 4 | config.status 5 | include.mk 6 | .rebar 7 | .eunit 8 | .test 9 | _build/ 10 | erlsom.plt 11 | rebar.lock 12 | -------------------------------------------------------------------------------- /priv/extension/Makefile: -------------------------------------------------------------------------------- 1 | all: validate 2 | 3 | validate: 4 | @xmllint --noout --schema extension.xsd extension.xml 5 | @xmllint --noout --schema simpleContentExtension.xsd simpleContentExtension.xml 6 | -------------------------------------------------------------------------------- /examples/complex_form/foo.xml: -------------------------------------------------------------------------------- 1 | 2 | x 3 | x 4 | y 5 | 6 | -------------------------------------------------------------------------------- /.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | erlsom 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /priv/extension/extension.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | Bart 4 | Simpson 5 | Springfield 6 | U.S.A. 7 |
Evergreen Terrace
8 |
9 | -------------------------------------------------------------------------------- /include/exception.hrl: -------------------------------------------------------------------------------- 1 | -ifdef(OTP_RELEASE). %% this implies 21 or higher 2 | -define(EXCEPTION(Class, Reason, Stacktrace), Class:Reason:Stacktrace). 3 | -define(GET_STACK(Stacktrace), Stacktrace). 4 | -else. 5 | -define(EXCEPTION(Class, Reason, _), Class:Reason). 6 | -define(GET_STACK(_), erlang:get_stacktrace()). 7 | -endif. 8 | -------------------------------------------------------------------------------- /priv/all/all.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | some name 5 | some name 6 | 7 | 8 | some name1 9 | some name2 10 | 11 | 12 | -------------------------------------------------------------------------------- /examples/erlsom_example/example_in.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 3 4 | 7 5 | 1 6 | 123 7 | 17 8 | 2 9 | 10 | -------------------------------------------------------------------------------- /priv/xsi_type/ext.xml: -------------------------------------------------------------------------------- 1 | 2 | 7 | a 8 | b 9 | 10 | -------------------------------------------------------------------------------- /examples/example1/abb4.xsd: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /examples/example1/example1.hrl: -------------------------------------------------------------------------------- 1 | %% HRL file generated by ERLSOM 2 | %% 3 | %% It is possible to change the name of the record fields. 4 | %% 5 | %% It is possible to add default values, but be aware that these will 6 | %% only be used when *writing* an xml document. 7 | 8 | -record('case', {anyAttribs, 'name', 'result', 'xsd', 'xml'}). 9 | -record('testConfig', {anyAttribs, 'path', 'case'}). 10 | -------------------------------------------------------------------------------- /examples/book_store/BookStore.hrl: -------------------------------------------------------------------------------- 1 | %% HRL file generated by ERLSOM 2 | %% 3 | %% It is possible to change the name of the record fields. 4 | %% 5 | %% It is possible to add default values, but be aware that these will 6 | %% only be used when *writing* an xml document. 7 | 8 | -record('book_type', {anyAttribs, 'ISBN', 'title', 'author', 'date', 'publisher'}). 9 | -record('book_store', {anyAttribs, 'book'}). 10 | -------------------------------------------------------------------------------- /examples/example1/abb1.xsd: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /examples/example1/abb3.xsd: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /priv/xsi_type_no_prefix/test.xml: -------------------------------------------------------------------------------- 1 | 2 | 6 | 7 | base 8 | ext 9 | 10 | -------------------------------------------------------------------------------- /rebar.config: -------------------------------------------------------------------------------- 1 | {erl_opts, [debug_info]}. 2 | 3 | %% Options for running dialyzer 4 | %% {plt, PltFile} 5 | %% 'src': run Dialyzer on the source files as in 'dialyzer --src' 6 | %% {warnings, [WarnOpts]}: turn on/off Dialyzer warnings 7 | {dialyzer_opts, [{plt, "erlsom.plt"}, 8 | {warnings, [race_conditions]}, 9 | src]}. 10 | {eunit_opts, [verbose]}. 11 | {cover_enabled, true}. 12 | -------------------------------------------------------------------------------- /priv/gexf/schema/hierarchy.xsd: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /priv/gexf/data/basic.gexf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /examples/example1/abb7.xsd: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /examples/example1/abb2.xsd: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /examples/soap_example/example_in.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 3 6 | 7 7 | 1 8 | 123 9 | 17 10 | 3 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /priv/xsi_type/base.xsd: -------------------------------------------------------------------------------- 1 | 2 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | CHANGELOG 2 | ========= 3 | 4 | 1.4.0 5 | ----- 6 | 7 | * New option `strict` to enforce additional type conversion and checking. 8 | 9 | * Several modifications to allow better embedding of the parser (not 10 | documented). 11 | 12 | * :warning: `models` that were compiled with earlier versions are no 13 | longer supported. If you have stored a `model` and you want to upgrade 14 | to the new version, the model must be recompiled. 15 | 16 | 1.3.1 17 | ----- 18 | 19 | * First tagged version. 20 | -------------------------------------------------------------------------------- /examples/example1/abb11.xsd: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /examples/example1/abb5.xsd: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /src/erlsom.app.src: -------------------------------------------------------------------------------- 1 | {application,erlsom, 2 | [{description,"XML parser. Supports SAX style parsing as well as XML Schema based data mapping: create records from XML (and vice versa)"}, 3 | {vsn,"1.5.2"}, 4 | {modules,[]}, 5 | {maintainers,["Willem de Jong"]}, 6 | {licenses,["GNU Lesser GPL, Version 3"]}, 7 | {links,[{"Github","https://github.com/willemdj/erlsom"}]}, 8 | {registered,[]}, 9 | {env,[]}, 10 | {applications,[kernel,stdlib,inets]}]}. 11 | -------------------------------------------------------------------------------- /make-upload: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # This is make it a wee bit more convenient for 4 | # me to publish the tar balls 5 | # set -x 6 | 7 | if [ $# != 1 ]; then 8 | echo 'usage: make-upload ' 9 | exit 1 10 | fi 11 | 12 | 13 | echo "cd incoming " > /tmp/cmds.$$ 14 | echo "put $1 " >> /tmp/cmds.$$ 15 | ncftp -u anonymous -p klacke@hyber.org upload.sourceforge.net < /tmp/cmds.$$ 16 | rm /tmp/cmds.$$ 17 | 18 | 19 | 20 | echo "Now login at sourceforge at go the page" 21 | echo "http://sourceforge.net/project/admin/editpackages.php?group_id=45637" 22 | echo "and create the new package ... " 23 | -------------------------------------------------------------------------------- /examples/example1/abb6.xsd: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /examples/erlsom_sax_example/erlsom_sax_example.erl: -------------------------------------------------------------------------------- 1 | %%% a simple example of the use of erlsom. 2 | %%% 3 | -module(erlsom_sax_example). 4 | 5 | %% user interface 6 | -export([run/0]). 7 | 8 | run() -> 9 | case file:read_file(xml()) of 10 | {ok, Bin} -> 11 | {ok, _, _} = erlsom:parse_sax(Bin, ok, fun callback/2); 12 | Error -> 13 | Error 14 | end, 15 | ok. 16 | 17 | callback(Event, State) -> 18 | io:format("~p\n", [Event]), 19 | State. 20 | 21 | %% this is just to make it easier to test this little example 22 | xml() -> filename:join([codeDir(), "sax_example.xml"]). 23 | codeDir() -> filename:dirname(code:which(?MODULE)). 24 | -------------------------------------------------------------------------------- /examples/erlsom_example/example_in.xsd: -------------------------------------------------------------------------------- 1 | 2 | 6 | 7 | 8 | 9 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /examples/example1/abb8.xsd: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /examples/soap_example/example_in.xsd: -------------------------------------------------------------------------------- 1 | 2 | 6 | 7 | 8 | 9 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /examples/complex_form/complex_form_example.erl: -------------------------------------------------------------------------------- 1 | %% shows how you could (in theory, at least) apply xpath to the 2 | %% result of erlsom_complex_form. 3 | -module(complex_form_example). 4 | -include_lib("xmerl/include/xmerl.hrl"). 5 | 6 | -export([run/0]). 7 | 8 | run() -> 9 | {ok, ParsedDoc, _} = erlsom_complex_form:scan_file(xml()), 10 | Result = xmerl_xpath:string("//myelement[. = 'x']/text()", ParsedDoc), 11 | io:format("result of Xpath query \"//myelement[. = 'x']/text()\"~n"), 12 | io:format("~p~n", [Result]). 13 | 14 | %% this is just to make it easier to test this little example 15 | xml() -> filename:join([codeDir(), "foo.xml"]). 16 | codeDir() -> filename:dirname(code:which(?MODULE)). 17 | -------------------------------------------------------------------------------- /examples/example1/abb9.xsd: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /priv/xsi_type/ext.xsd: -------------------------------------------------------------------------------- 1 | 2 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /make-release: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -x 4 | . ./vsn.mk 5 | 6 | echo tagging and packing release ${ERLSOM_VSN} 7 | 8 | CVS_RSH=ssh 9 | Y=`echo ${ERLSOM_VSN} | sed 's/\./-/g'` 10 | 11 | if [ ! -f ./make-release ]; then 12 | echo "need to be in top dir"; exit 1; 13 | fi 14 | 15 | cd .. 16 | rm -rf downloads 2> /dev/null 17 | rm -rf erlsom-0.0.1 2> /dev/null 18 | 19 | cvs tag -F erlsom-${Y} 20 | 21 | rm -rf tmp 22 | mkdir tmp 23 | 24 | cvs export -d tmp -r erlsom-$Y . 25 | cd tmp 26 | rm -rf erlsom/CVSROOT 27 | rm erlsom/make-release 28 | rm erlsom/make-upload 29 | mv erlsom erlsom-${ERLSOM_VSN} 30 | tar cfz erlsom-${ERLSOM_VSN}.tar.gz erlsom-${ERLSOM_VSN} 31 | 32 | 33 | echo new release resides in `pwd`/erlsom-${ERLSOM_VSN}.tar.gz 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | APPLICATION := erlsom 2 | 3 | ERL := erl 4 | EPATH := -pa ebin 5 | TEST_EPATH := -pa .eunit 6 | 7 | DIALYZER=dialyzer 8 | DIALYZER_OPTS=-Wno_return -Wrace_conditions -Wunderspecs -Wbehaviours 9 | PLT_FILE=.erlsom_plt 10 | APPS=kernel stdlib 11 | 12 | .PHONY: all clean test 13 | 14 | all: compile 15 | 16 | compile: 17 | @./rebar compile 18 | 19 | doc: 20 | @./rebar doc 21 | 22 | clean: 23 | @./rebar clean 24 | 25 | build-plt: compile 26 | @./rebar build-plt 27 | 28 | check-plt: compile 29 | @./rebar check-plt 30 | 31 | dialyze: 32 | @./rebar dialyze 33 | 34 | eunit: 35 | @./rebar eunit 36 | 37 | shell: compile 38 | $(ERL) -sname $(APPLICATION) $(EPATH) 39 | 40 | touch: 41 | find . -name '*' -print | xargs touch -m 42 | find . -name '*.erl' -print | xargs touch -m 43 | -------------------------------------------------------------------------------- /examples/example1/example1.xsd: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /priv/all/all.xsd: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /priv/choice_complex/choice_complex.xsd: -------------------------------------------------------------------------------- 1 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /examples/example1/abb10.xsd: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /priv/extension/extension.xsd: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /priv/xsi_type_no_prefix/test.xsd: -------------------------------------------------------------------------------- 1 | 2 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /examples/soap_example/example_out.xsd: -------------------------------------------------------------------------------- 1 | 2 | 6 | 7 | 8 | 9 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /priv/gexf/schema/phylogenics.xsd: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /examples/erlsom_example/example_out.xsd: -------------------------------------------------------------------------------- 1 | 2 | 6 | 7 | 8 | 9 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /examples/book_store/BookStore.xsd: -------------------------------------------------------------------------------- 1 | 2 | 6 | 7 | 8 | 9 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /examples/erlsom_sax_example/tpp_auth.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 6 | 7 | Field 1 8 | Text 1 9 | 10 | 11 | 12 | testnumber 13 | 14 | 15 | 491703434123 16 | 17 | 18 | 1 19 | 2 20 | 21 | 22 | 2004-04-28T10:46:34.797Z 23 | Pizza 24 | 25 | 26 | 2004-04-28T10:46:34.796Z 27 | 28 | 29 | test001 30 | 31 | 32 | 116 33 | 100 34 | 16 35 | FULL 36 | 2 37 | EUR 38 | 39 | 40 | -------------------------------------------------------------------------------- /examples/book_store/book_store.erl: -------------------------------------------------------------------------------- 1 | %%% a simple example of the use of erlsom. 2 | %%% 3 | -module(book_store). 4 | -include("BookStore.hrl"). 5 | 6 | %% user interface 7 | -export([run/0]). 8 | 9 | run() -> 10 | %% compile xsd 11 | {ok, Model} = erlsom:compile_xsd_file(xsd()), 12 | %% parse xml 13 | {ok, #book_store{book=Books}, _} = erlsom:scan_file(xml(), Model), 14 | %% do something with the content 15 | lists:foreach(fun process_book/1, Books). 16 | 17 | process_book(#book_type{'ISBN' = ISBN, title = Title, author = Author, 18 | date = Date, publisher = Publisher}) -> 19 | io:format("Title : ~s~n" 20 | "Author : ~s~n" 21 | "Publisher: ~s~n" 22 | "ISBN : ~s~n" 23 | "Date : ~s~n~n", 24 | [Title, Author, Publisher, ISBN, formatDate(Date)]). 25 | 26 | formatDate(undefined) -> ""; 27 | formatDate(Date) -> Date. 28 | 29 | %% this is just to make it easier to test this little example 30 | xsd() -> filename:join([codeDir(), "BookStore.xsd"]). 31 | xml() -> filename:join([codeDir(), "BookStore.xml"]). 32 | codeDir() -> filename:dirname(code:which(?MODULE)). 33 | -------------------------------------------------------------------------------- /examples/erlsom_sax_example/sax_example.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 6 | 7 | 8 | 9 | 11 | 12 | 13 | '> 14 | %xx; 15 | ]> 16 | 17 | 18 | 3 19 | 7 20 | ]]> and some more text 21 | and now an entity: < and some more text 22 | and now defined entity: &zz; and some more text 23 | and now the weird case: &aa; and some more text 24 | 123 25 | 17 26 | 27 | 28 | 2 29 | 30 | -------------------------------------------------------------------------------- /priv/extension/simpleContentExtension.xsd: -------------------------------------------------------------------------------- 1 | 2 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /examples/erlsom_example/erlsom.hrl: -------------------------------------------------------------------------------- 1 | %%% Copyright (C) 2006 - 2008 Willem de Jong 2 | %%% 3 | %%% This file is part of Erlsom. 4 | %%% 5 | %%% Erlsom is free software: you can redistribute it and/or modify 6 | %%% it under the terms of the GNU Lesser General Public License as 7 | %%% published by the Free Software Foundation, either version 3 of 8 | %%% the License, or (at your option) any later version. 9 | %%% 10 | %%% Erlsom is distributed in the hope that it will be useful, 11 | %%% but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | %%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | %%% GNU Lesser General Public License for more details. 14 | %%% 15 | %%% You should have received a copy of the GNU Lesser General Public 16 | %%% License along with Erlsom. If not, see 17 | %%% . 18 | %%% 19 | %%% Author contact: w.a.de.jong@gmail.com 20 | 21 | %%% ==================================================================== 22 | %%% Header file for erlsom 23 | %%% ==================================================================== 24 | 25 | %% prefix=the prefix that will be used in the result 26 | 27 | -ifndef(_ERLSOM_HRL_). 28 | -define(_ERLSOM_HRL_, true). 29 | 30 | -record(ns, {uri, prefix}). 31 | -record(qname, {uri, localPart, prefix, mappedPrefix}). 32 | 33 | -endif. 34 | -------------------------------------------------------------------------------- /src/erlsom.hrl: -------------------------------------------------------------------------------- 1 | %%% Copyright (C) 2006 - 2008 Willem de Jong 2 | %%% 3 | %%% This file is part of Erlsom. 4 | %%% 5 | %%% Erlsom is free software: you can redistribute it and/or modify 6 | %%% it under the terms of the GNU Lesser General Public License as 7 | %%% published by the Free Software Foundation, either version 3 of 8 | %%% the License, or (at your option) any later version. 9 | %%% 10 | %%% Erlsom is distributed in the hope that it will be useful, 11 | %%% but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | %%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | %%% GNU Lesser General Public License for more details. 14 | %%% 15 | %%% You should have received a copy of the GNU Lesser General Public 16 | %%% License along with Erlsom. If not, see 17 | %%% . 18 | %%% 19 | %%% Author contact: w.a.de.jong@gmail.com 20 | 21 | %%% ==================================================================== 22 | %%% Header file for erlsom 23 | %%% ==================================================================== 24 | 25 | %% prefix=the prefix that will be used in the result 26 | -record(ns, {uri, 27 | prefix, 28 | efd = unqualified :: qualified | unqualified % elementFormDefault 29 | }). 30 | -record(qname, {uri, localPart, prefix, mappedPrefix}). 31 | -------------------------------------------------------------------------------- /examples/soap_example/erlsom.hrl: -------------------------------------------------------------------------------- 1 | %%% Copyright (C) 2006 - 2008 Willem de Jong 2 | %%% 3 | %%% This file is part of Erlsom. 4 | %%% 5 | %%% Erlsom is free software: you can redistribute it and/or modify 6 | %%% it under the terms of the GNU Lesser General Public License as 7 | %%% published by the Free Software Foundation, either version 3 of 8 | %%% the License, or (at your option) any later version. 9 | %%% 10 | %%% Erlsom is distributed in the hope that it will be useful, 11 | %%% but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | %%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | %%% GNU Lesser General Public License for more details. 14 | %%% 15 | %%% You should have received a copy of the GNU Lesser General Public 16 | %%% License along with Erlsom. If not, see 17 | %%% . 18 | %%% 19 | %%% Author contact: w.a.de.jong@gmail.com 20 | 21 | %%% ==================================================================== 22 | %%% Header file for erlsom 23 | %%% ==================================================================== 24 | 25 | %% prefix=the prefix that will be used in the result 26 | 27 | -ifndef(_ERLSOM_HRL_). 28 | -define(_ERLSOM_HRL_, true). 29 | 30 | 31 | -record(ns, {uri, prefix}). 32 | -record(qname, {uri, localPart, prefix, mappedPrefix}). 33 | 34 | -endif. 35 | 36 | 37 | 38 | -------------------------------------------------------------------------------- /examples/book_store/BookStore.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Het zijn net mensen 5 | Joris Luyendijk 6 | 2006 7 | Podium 8 | 9 | 10 | Portnoy's Complaint 11 | Philip Roth 12 | 1993 13 | Vintage 14 | 15 | 16 | My Life and Times 17 | Paul McCartney 18 | 1998 19 | McMillin publishing 20 | 21 | 22 | Illusions The Adventures of a Reluctant Messiah 23 | Richard Bach 24 | 1977 25 | Dell publishing Co. 26 | 27 | 28 | The First and Last Freedom 29 | J. Krishnamurti 30 | Harper & Row 31 | 32 | 33 | -------------------------------------------------------------------------------- /include/erlsom.hrl: -------------------------------------------------------------------------------- 1 | %%% Copyright (C) 2006 - 2008 Willem de Jong 2 | %%% 3 | %%% This file is part of Erlsom. 4 | %%% 5 | %%% Erlsom is free software: you can redistribute it and/or modify 6 | %%% it under the terms of the GNU Lesser General Public License as 7 | %%% published by the Free Software Foundation, either version 3 of 8 | %%% the License, or (at your option) any later version. 9 | %%% 10 | %%% Erlsom is distributed in the hope that it will be useful, 11 | %%% but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | %%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | %%% GNU Lesser General Public License for more details. 14 | %%% 15 | %%% You should have received a copy of the GNU Lesser General Public 16 | %%% License along with Erlsom. If not, see 17 | %%% . 18 | %%% 19 | %%% Author contact: w.a.de.jong@gmail.com 20 | 21 | %%% ==================================================================== 22 | %%% Header file for erlsom 23 | %%% ==================================================================== 24 | 25 | %% prefix=the prefix that will be used in the result 26 | 27 | -ifndef(_ERLSOM_HRL_). 28 | -define(_ERLSOM_HRL_, true). 29 | 30 | 31 | -record(ns, {uri, 32 | prefix, 33 | efd = unqualified :: qualified | unqualified % elementFormDefault 34 | }). 35 | -record(qname, {uri, localPart, prefix, mappedPrefix}). 36 | 37 | -endif. 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /examples/example1/example1.xml: -------------------------------------------------------------------------------- 1 | 2 | . 3 | 4 | 5 | abb1 6 | OK 7 | abb1.xsd 8 | valid.xml 9 | 10 | 11 | 12 | abb2 13 | OK 14 | abb2.xsd 15 | valid.xml 16 | 17 | 18 | 19 | abb3 20 | OK 21 | abb3.xsd 22 | valid.xml 23 | 24 | 25 | 26 | abb4 27 | OK 28 | abb4.xsd 29 | valid.xml 30 | 31 | 32 | 33 | abb5 34 | OK 35 | abb5.xsd 36 | valid.xml 37 | 38 | 39 | 40 | abb6 41 | OK 42 | abb6.xsd 43 | valid.xml 44 | 45 | 46 | 47 | abb7 48 | OK 49 | abb7.xsd 50 | valid.xml 51 | 52 | 53 | 54 | abb8 55 | OK 56 | abb8.xsd 57 | valid.xml 58 | 59 | 60 | 61 | abb9 62 | OK 63 | abb9.xsd 64 | valid.xml 65 | 66 | 67 | 68 | abb10 69 | OK 70 | abb10.xsd 71 | valid.xml 72 | 73 | 74 | 75 | abb11 76 | OK 77 | abb11.xsd 78 | valid.xml 79 | 80 | 81 | 82 | -------------------------------------------------------------------------------- /test/erlsom_gexf_tests.erl: -------------------------------------------------------------------------------- 1 | -module(erlsom_gexf_tests). 2 | 3 | %% ------------------------------------------------------------------ 4 | %% Tests 5 | %% ------------------------------------------------------------------ 6 | 7 | -include_lib("eunit/include/eunit.hrl"). 8 | -include_lib("erlsom/src/erlsom.hrl"). 9 | -include_lib("erlsom/src/erlsom_parse.hrl"). 10 | 11 | -define(XSD_FILE, ["gexf", "schema", "gexf.xsd"]). 12 | -define(INCLUDE_PATHS, [["gexf", "schema"]]). 13 | 14 | compile_schema_test() -> 15 | {ok, _Model} = erlsom_tests:compile_xsd(?XSD_FILE, ?INCLUDE_PATHS). 16 | 17 | unique_namespaces_test() -> 18 | {ok, Model} = erlsom_tests:compile_xsd(?XSD_FILE, ?INCLUDE_PATHS), 19 | Namespaces = Model#model.nss, 20 | ?assertEqual(lists:usort(Namespaces), Namespaces), 21 | ok. 22 | 23 | parse_file_test() -> 24 | {ok, Model} = erlsom_tests:compile_xsd(?XSD_FILE, ?INCLUDE_PATHS), 25 | {ok, _Tree} = erlsom_tests:parse_file(["gexf", "data", "test.gexf"], Model), 26 | ok. 27 | 28 | 29 | %% @doc makeAttrRef returns ":parent-content". It is an error. 30 | leading_ns_delimeter_test_() -> 31 | NS = [#ns{uri = "http://www.gexf.net/1.2draft", prefix = ""}], 32 | Ref = #qname{uri = "http://www.gexf.net/1.2draft", 33 | localPart = "parent-content", 34 | prefix = "ns1", 35 | mappedPrefix = []}, 36 | [?_assertEqual("parent-content", erlsom_lib:makeAttrRef(Ref, NS))]. 37 | 38 | 39 | stability_test_() -> 40 | [{T, 41 | erlsom_tests:verify_stability_( 42 | ?XSD_FILE, ["gexf", "data", T], ?INCLUDE_PATHS)} 43 | || T <- ["test.gexf", "basic.gexf", "data.gexf", "dynamics.gexf"]]. 44 | -------------------------------------------------------------------------------- /priv/gexf/schema/dynamics.xsd: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /priv/gexf/data/data.gexf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Gephi.org 5 | A Web network 6 | 7 | 8 | 9 | 10 | 11 | 12 | true 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | -------------------------------------------------------------------------------- /priv/gexf/data/test.gexf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Gephi.org 5 | A Web network 6 | 7 | 8 | 9 | 10 | 11 | 12 | true 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | -------------------------------------------------------------------------------- /examples/erlsom_example/erlsom_example.erl: -------------------------------------------------------------------------------- 1 | %%% a simple example of the use of erlsom. 2 | %%% 3 | -module(erlsom_example). 4 | 5 | %% user interface 6 | -export([run/0]). 7 | 8 | %% define records 9 | -record('in:arguments', {anyAttribs, values, precision}). 10 | -record('out:resultType', {anyAttribs, result}). 11 | -record('out:resultType-error', {anyAttribs, error}). 12 | -record('out:resultType-okResult', {anyAttribs, value}). 13 | -record('out:errorType', {anyAttribs, errorCode, errorDescription}). 14 | 15 | run() -> 16 | %% compile xsd 17 | {ok, ModelIn} = erlsom:compile_xsd_file(example_in_xsd(), [{prefix, "in"}, 18 | {strict, false}]), 19 | {ok, ModelOut} = erlsom:compile_xsd_file(example_out_xsd(),[{prefix, "out"}, 20 | {strict, false}]), 21 | 22 | %% parse xml 23 | {ok, Input, _} = erlsom:scan_file(example_in_xml(), ModelIn), 24 | 25 | %% do something with the content 26 | case Input of 27 | #'in:arguments'{values = undefined} -> 28 | Error = #'out:errorType'{errorCode = "01", 29 | errorDescription = "No arguments provided"}, 30 | Result = #'out:resultType-error'{error = Error}; 31 | #'in:arguments'{values = List, precision = Precision} -> 32 | Result = #'out:resultType-okResult'{value = calcAverage(List, Precision)} 33 | end, 34 | 35 | %% generate xml. 36 | Response = #'out:resultType'{result=Result}, 37 | XmlResult = erlsom:write(Response, ModelOut), 38 | io:format("Result: ~p~n", [XmlResult]), 39 | ok. 40 | 41 | calcAverage(List, Precision) -> 42 | calcAverage(List, Precision, 0, 0). 43 | calcAverage([], Precision, Acc, NrOfElements) -> 44 | lists:flatten(io_lib:format("~.*f", [Precision, Acc/NrOfElements])); 45 | calcAverage([Head|Tail], Precision, Acc, NrOfElements) -> 46 | calcAverage(Tail, Precision, Acc + Head, NrOfElements + 1). 47 | 48 | %% this is just to make it easier to test this little example 49 | example_in_xsd() -> filename:join([codeDir(), "example_in.xsd"]). 50 | example_out_xsd() -> filename:join([codeDir(), "example_out.xsd"]). 51 | example_in_xml() -> filename:join([codeDir(), "example_in.xml"]). 52 | codeDir() -> filename:dirname(code:which(?MODULE)). 53 | 54 | -------------------------------------------------------------------------------- /examples/example1/example1.erl: -------------------------------------------------------------------------------- 1 | -module(example1). 2 | -export([test_erlsom/1]). 3 | -export([run/0]). 4 | %% this example has 2 purposes: 5 | %% 6 | %% - It shows how easy Erlsom makes it for you to use an XML configuration file. 7 | %% The configuration file describes a set of 10 test cases, which are run by 8 | %% this example. The configuration file is described by "example1.xsd". 9 | %% Compiling this XSD and then parsing the configuration file ("example1.xml") 10 | %% gives you access to an Erlang structure of records that corresponds with the 11 | %% XML schema. 12 | %% 13 | %%- It shows how 11 different schemas (names "abb1.xsd" through "abb11.xsd") can 14 | %% describe the same XML document (named "abb.xml"), and it shows the output 15 | %% that results from running Erlsom on this file using these schema’s. To run 16 | %% the example for XSD abb1.xsd, use the command example1:test_erlsom("abb1"). 17 | 18 | 19 | %% example1.hrl contains the record definitions. 20 | %% It was generated using erlsom:writeHrl 21 | -include("example1.hrl"). 22 | run() -> 23 | test_erlsom("abb11"). 24 | 25 | test_erlsom(Test) -> 26 | XsdFile = filename:join([codeDir(), "example1.xsd"]), 27 | {ok, Model} = erlsom:compile_xsd_file(XsdFile), 28 | Xml = filename:join([codeDir(), "example1.xml"]), 29 | {ok, TestSuite, _} = erlsom:scan_file(Xml, Model), 30 | Cases = TestSuite#testConfig.'case', 31 | Dir = codeDir(), 32 | case findCase(Cases, Test) of 33 | {ok, Case} -> execute_case(Case, Dir); 34 | _Else -> io:format("Case not found\n", []) 35 | end. 36 | 37 | execute_case(#'case'{name=Name, xsd=XSD, xml=XML}, Path) -> 38 | io:format("example: ~p\n", [Name]), 39 | XsdFile = filename:join([Path, XSD]), 40 | io:format("compiling xsd ~p...\n", [XsdFile]), 41 | Result = erlsom:compile_file(XsdFile, []), 42 | case Result of 43 | {error, Message} -> 44 | io:format("XSD error: ~p\n", [Message]); 45 | {ok, Model} -> 46 | Xml = filename:join([Path, XML]), 47 | erlsom:parse_file(Xml, Model) 48 | end. 49 | 50 | findCase([], _Name) -> 51 | false; 52 | findCase([Case = #'case'{name=Name}| _], Name) -> 53 | {ok, Case}; 54 | findCase([_| Tail], Name) -> 55 | findCase(Tail, Name). 56 | 57 | %% this is just to make it easier to test this little example 58 | codeDir() -> filename:dirname(code:which(?MODULE)). 59 | -------------------------------------------------------------------------------- /examples/continuation/BookStore.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | My Life and Times 5 | Paul McCartney 6 | 1998 7 | McMillin publishing 8 | 9 | 10 | Het zijn net mensen 11 | Joris Luyendijk 12 | 2006 13 | Podium 14 | 15 | 16 | Tannöd 17 | Andrea Maria Schenkel 18 | 2006 19 | btb 20 | 21 | 22 | 33 Augenblicke des Glücks 23 | Ingo Schulze 24 | 1995 25 | Süddeutsche Zeitung 26 | 27 | 28 | Portnoy's Complaint 29 | Philip Roth 30 | 1993 31 | Vintage 32 | 33 | 34 | Der Schwimmer 35 | Zsusza Bánk 36 | 2002 37 | Fischer Verlag 38 | 39 | 40 | My Life and Times 41 | Paul McCartney 42 | 1998 43 | McMillin publishing 44 | 45 | 46 | Illusions The Adventures of a Reluctant Messiah 47 | Richard Bach 48 | 1977 49 | Dell publishing Co. 50 | 51 | 52 | The First and Last Freedom 53 | J. Krishnamurti 54 | Harper & Row 55 | 56 | 57 | -------------------------------------------------------------------------------- /priv/gexf/data/dynamics.gexf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Gexf.net 5 | A Web network changing over time 6 | 7 | 8 | 9 | 10 | 11 | true 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | -------------------------------------------------------------------------------- /examples/continuation/itunes_example.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 5 | 6 | 7 | Major Version1 8 | Minor Version1 9 | Application Version4.6 10 | Music Folder 11 | file://localhost/Users/niel/Music/iTunes/iTunes%20Music/ 12 | Library Persistent IDŒŒŒŒŒŒŒŒŒŒ 13 | Tracks 14 | 15 | 16 | 17 | 35 18 | 19 | Track ID35 20 | NameGula Gula 21 | ArtistJan Garbarek 22 | ComposerMari Boine Persen, arr Jan Garbarek 23 | AlbumI Took Up The Runes & look what I got & look where I am now 24 | GenreJazz 25 | KindAAC audio file 26 | Size5892093 27 | Total Time363578 28 | Disc Number1 29 | Disc Count1 30 | Track Number1 31 | Track Count10 32 | Year1990 33 | Date Modified2005-06-06T04:11:43Z 34 | Date Added2005-06-06T04:11:18Z 35 | Bit Rate128 36 | Sample Rate44100 37 | Track TypeFile 38 | Locationfile://localhost/C:/Documents%20and%20Settings/User1/My%20Documents/My%20Music/iTunes/iTunes%20Music/Jan%20Garbarek/I%20Took%20Up%20The%20Runes/01%20Gula%20Gula.m4a/ 39 | File Folder Count4 40 | Library Folder Count1 41 | 42 | 43 | 35 44 | 45 | Track ID35 46 | NameGula Gula 47 | ArtistJan Garbarek 48 | ComposerMari Boine Persen, arr Jan Garbarek 49 | AlbumI Took Up The Runes & look what I got & look where I am now 50 | File Folder Count4 51 | Library Folder Count1 52 | 53 | 54 | 36 55 | 56 | Track ID36 57 | NameGula Gula 58 | ArtistJan Garbarek 59 | ComposerMari Boine Persen, arr Jan Garbarek 60 | AlbumI Took Up The Runes & look what I got & look where I am now 61 | File Folder Count4 62 | Library Folder Count1 63 | 64 | 65 | 66 | 67 | 68 | -------------------------------------------------------------------------------- /src/erlsom_add.erl: -------------------------------------------------------------------------------- 1 | %%% Copyright (C) 2006 - 2008 Willem de Jong 2 | %%% 3 | %%% This file is part of Erlsom. 4 | %%% 5 | %%% Erlsom is free software: you can redistribute it and/or modify 6 | %%% it under the terms of the GNU Lesser General Public License as 7 | %%% published by the Free Software Foundation, either version 3 of 8 | %%% the License, or (at your option) any later version. 9 | %%% 10 | %%% Erlsom is distributed in the hope that it will be useful, 11 | %%% but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | %%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | %%% GNU Lesser General Public License for more details. 14 | %%% 15 | %%% You should have received a copy of the GNU Lesser General Public 16 | %%% License along with Erlsom. If not, see 17 | %%% . 18 | %%% 19 | %%% Author contact: w.a.de.jong@gmail.com 20 | 21 | %%% ==================================================================== 22 | %%% adds an XSD to an existing Erlsom Model 23 | %%% ==================================================================== 24 | 25 | %%% Adds an XSD/namespace to an existing model. This is useful only if the 26 | %%% existing model contains 'any' elements that have to be parsed. A typical 27 | %%% example is the soap envelope. In order to parse the body, the parser needs 28 | %%% to know it's 'model'. 29 | 30 | %%% Compiles the model for the imported xsd, adds the types to the existing 31 | %%% model, adds all the alternatives from the _document element to the 32 | %%% _document element of the existing model, adds the namespaces, and finally 33 | %%% updates the alternatives for all 'any' types in the model. 34 | 35 | -module(erlsom_add). 36 | -export([add/3]). 37 | -export([add_xsd_model/1]). 38 | -export([add_model/2]). 39 | 40 | -include("erlsom_parse.hrl"). 41 | -include("erlsom_compile.hrl"). 42 | 43 | %% debug(Text) -> 44 | %% io:format("~p\n", [Text]). 45 | 46 | 47 | %% -record(model, {tps, nss, tns}). 48 | %% -record(type, {nm, tp = sequence, els, atts = [], anyAttr, nillable, nr, mn = 1, mx = 1}). 49 | %% -record(el, {alts, mn = 1, mx = 1, nr}). 50 | 51 | %% Returns the new #model. 52 | add(Xsd, Options, Model1) -> 53 | {ok, Model2} = erlsom:compile_xsd(Xsd, Options), 54 | add_model(Model1, Model2). 55 | 56 | add_xsd_model(Model1) -> 57 | add_model(Model1, erlsom_parseXsd:xsdModel()). 58 | 59 | add_model(Model1 = #model{tps = Tps, nss = Nss, tns = Tns, th = Th}, 60 | _Model2 = #model{tps = NewTps, nss = NewNss, th = NewTh}) -> 61 | [Document | OtherTypes] = Tps, 62 | #type{nm = '_document', els = [Element]} = Document, 63 | #el{alts = Alts} = Element, 64 | 65 | [New_Document | OtherNewTypes] = NewTps, 66 | #type{nm = '_document', els = [NewElement]} = New_Document, 67 | #el{alts = NewAlts} = NewElement, 68 | 69 | CombinedAlts = lists:umerge(lists:usort(Alts), lists:usort(NewAlts)), 70 | CombinedElement = Element#el{alts = CombinedAlts}, 71 | CombinedDocument = Document#type{els = [CombinedElement]}, 72 | CombinedTypes = [CombinedDocument | lists:umerge(lists:usort(OtherTypes), lists:usort(OtherNewTypes))], 73 | CombinedNss = lists:umerge(lists:usort(Nss), lists:usort(NewNss)), 74 | CombinedTh = lists:umerge(lists:usort(Th), lists:usort(NewTh)), 75 | 76 | Info = #schemaInfo{namespaces = CombinedNss, targetNamespace = Tns}, 77 | 78 | UpdatedTypes = erlsom_pass2:pass5(CombinedTypes, Info), 79 | 80 | Model1#model{tps = UpdatedTypes, nss = CombinedNss, th = CombinedTh}. 81 | 82 | -------------------------------------------------------------------------------- /examples/soap_example/soap_example.erl: -------------------------------------------------------------------------------- 1 | %%% a simple example of the use of erlsom. 2 | %%% 3 | -module(soap_example). 4 | -include("erlsom.hrl"). 5 | 6 | %% user interface 7 | -export([run/0]). 8 | 9 | %% define records (generated by writeHrl) 10 | -record('in:arguments', {anyAttribs, values, precision}). 11 | -record('out:resultType', {anyAttribs, result}). 12 | -record('out:resultType-error', {anyAttribs, error}). 13 | -record('out:resultType-okResult', {anyAttribs, value}). 14 | -record('out:errorType', {anyAttribs, errorCode, errorDescription}). 15 | 16 | -record('sp:UpgradeType', {anyAttribs, 'SupportedEnvelope'}). 17 | -record('sp:SupportedEnvType', {anyAttribs, 'qname'}). 18 | -record('sp:NotUnderstoodType', {anyAttribs, 'qname'}). 19 | -record('sp:detail', {anyAttribs, choice}). 20 | -record('sp:subcode', {anyAttribs, 'Value', 'Subcode'}). 21 | -record('sp:faultcode', {anyAttribs, 'Value', 'Subcode'}). 22 | -record('sp:reasontext', {anyAttribs, 'xml:lang', '#text'}). 23 | -record('sp:faultreason', {anyAttribs, 'Text'}). 24 | -record('sp:Fault', {anyAttribs, 'Code', 'Reason', 'Node', 'Role', 'Detail'}). 25 | -record('sp:Body', {anyAttribs, choice}). 26 | -record('sp:Header', {anyAttribs, choice}). 27 | -record('sp:Envelope', {anyAttribs, 'Header', 'Body'}). 28 | 29 | run() -> 30 | {ModelIn, ModelOut} = compileXSDs(), 31 | 32 | %% parse xml 33 | Xml = filename:join([codeDir(), "example_in.xml"]), 34 | Result = case erlsom:scan_file(Xml, ModelIn) of 35 | {ok, #'sp:Envelope'{'Body' = #'sp:Body'{choice = Content}}, _} -> 36 | processContent(Content); 37 | {error, _} -> 38 | soapError("Sender", "Incorrect message") 39 | end, 40 | 41 | %% add envelope 42 | Response = #'sp:Envelope'{'Body' = #'sp:Body'{choice = Result}}, 43 | %% generate xml. 44 | erlsom:write(Response, ModelOut). 45 | 46 | 47 | processContent(Content) -> 48 | %% do something with the content 49 | case Content of 50 | [#'in:arguments'{values = undefined}] -> 51 | soapError("sp:Sender", "No arguments provided"); 52 | [#'in:arguments'{values = List, precision = Precision}] -> 53 | Result = #'out:resultType-okResult'{value = calcAverage(List, Precision)}, 54 | [#'out:resultType'{result=Result}]; 55 | _ -> 56 | soapError("sp:Sender", "Unexpected error") 57 | end. 58 | 59 | 60 | soapError(Code, Reason) -> 61 | FaultCode = #'sp:faultcode'{'Value' = Code}, 62 | ReasonRec = #'sp:faultreason'{'Text' = [#'sp:reasontext'{'xml:lang' = "EN", '#text' = Reason}]}, 63 | [#'sp:Fault'{'Code' = FaultCode, 'Reason' = ReasonRec}]. 64 | 65 | 66 | compileXSDs() -> 67 | EnvelopeXsd = filename:join([codeDir(), "soap-envelope.xsd"]), 68 | BodyXsd = filename:join([codeDir(), "example_in.xsd"]), 69 | ResultXsd = filename:join([codeDir(), "example_out.xsd"]), 70 | {ok, SoapModel} = erlsom:compile_xsd_file(EnvelopeXsd, [{prefix, "sp"}, {strict, false}]), 71 | {ok, ModelIn} = erlsom:add_xsd_file(BodyXsd, [{prefix, "in"}, {strict, false}], SoapModel), 72 | {ok, ModelOut} = erlsom:add_xsd_file(ResultXsd, [{prefix, "out"}, {strict, false}], SoapModel), 73 | {ModelIn, ModelOut}. 74 | 75 | calcAverage(List, Precision) -> 76 | calcAverage(List, Precision, 0, 0). 77 | calcAverage([], Precision, Acc, NrOfElements) -> 78 | lists:flatten(io_lib:format("~.*f", [Precision, Acc/NrOfElements])); 79 | calcAverage([Head|Tail], Precision, Acc, NrOfElements) -> 80 | calcAverage(Tail, Precision, Acc + Head, NrOfElements + 1). 81 | 82 | %% this is just to make it easier to test this little example 83 | codeDir() -> filename:dirname(code:which(?MODULE)). 84 | -------------------------------------------------------------------------------- /priv/gexf/schema/gexf.xsd: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | Tree 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | Datatypes 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | -------------------------------------------------------------------------------- /src/erlsom_sax.hrl: -------------------------------------------------------------------------------- 1 | %%% Copyright (C) 2006 - 2008 Willem de Jong 2 | %%% 3 | %%% This file is part of Erlsom. 4 | %%% 5 | %%% Erlsom is free software: you can redistribute it and/or modify 6 | %%% it under the terms of the GNU Lesser General Public License as 7 | %%% published by the Free Software Foundation, either version 3 of 8 | %%% the License, or (at your option) any later version. 9 | %%% 10 | %%% Erlsom is distributed in the hope that it will be useful, 11 | %%% but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | %%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | %%% GNU Lesser General Public License for more details. 14 | %%% 15 | %%% You should have received a copy of the GNU Lesser General Public 16 | %%% License along with Erlsom. If not, see 17 | %%% . 18 | %%% 19 | %%% Author contact: w.a.de.jong@gmail.com 20 | 21 | %%% ==================================================================== 22 | %%% data structures produced by erlsom_sax 23 | %%% ==================================================================== 24 | 25 | %% data structures produced by sax.erl. 26 | 27 | -record(attribute, {localName, prefix = [], uri = [], value}). 28 | 29 | -record(erlsom_sax_state, 30 | {user_state, 31 | callback, 32 | encoding, %% of input document 33 | continuation_state, 34 | entities = [], 35 | par_entities = [], 36 | current_entity = '__top', 37 | namespaces = [], 38 | endtags = [], 39 | output, %% determines the encoding of text and attribute values 40 | expand_entities = true, %% if false, user defined entities will 41 | %% be ignored in the DTD, and use of entities 42 | %% will fail. 43 | max_entity_depth = 2, %% Maximum level of nesting of entities. 2 means: an 44 | %% an entity can refer to 1 or more other entities, 45 | %% but none of those can contain entity references. 46 | max_entity_size = 2000, %% Maximum size of a single entity 47 | max_nr_of_entities = 100, %% Maximum number of entities that can be defined. 48 | %% Note that a large number can lead to long 49 | %% processing to find cycles, unless max depth has 50 | %% been set to a small number. 51 | max_expanded_entity_size = 10000000, %% Maximum total number of bytes of all 52 | %% expanded entities together. 53 | entity_size_acc = 0, %% accumulated size of entities 54 | continuation_fun, 55 | %% entity_relations is used to check on circular definitions 56 | entity_relations = []}). 57 | 58 | %% useful macro approach copied from xmerl 59 | -define(space, 32). 60 | -define(cr, 13). 61 | -define(lf, 10). 62 | -define(tab, 9). 63 | 64 | %% whitespace consists of 'space', 'carriage return', 'line feed' or 'tab' 65 | -define(is_whitespace(C), 66 | C =:= ?space; C =:= ?cr ; C =:= ?lf; C =:= ?tab). 67 | 68 | -define(is_namestart_char(C), 69 | C > 96, C < 123; C > 64, C < 91; C =:= $_). 70 | 71 | -define(is_namestart_char2(C), %% also for characters <> 7 bit ascii 72 | C > 96, C < 123; C > 64, C < 91; C =:= $_; 73 | C > 191, C =/= 215, C =/= 247). %% this check is far from complete! 74 | 75 | -define(is_name_char(C), 76 | C > 96, C < 123; 77 | C > 64, C < 91; 78 | C > 47, C < 58; 79 | C =:= $_; 80 | C =:= $-; 81 | C =:= $.). 82 | 83 | -define(is_name_char2(C), %% also for characters <> 7 bit ascii 84 | C > 96, C < 123; 85 | C > 64, C < 91; 86 | C > 47, C < 58; 87 | C =:= $_; 88 | C =:= $-; 89 | C =:= $.; 90 | C > 191, C /= 215, C /= 247). %% this check is far from complete! 91 | -------------------------------------------------------------------------------- /examples/erlsom_sax_example/search_request.xsd: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | -------------------------------------------------------------------------------- /src/erlsom_parse.hrl: -------------------------------------------------------------------------------- 1 | %%% Copyright (C) 2006 - 2008 Willem de Jong 2 | %%% 3 | %%% This file is part of Erlsom. 4 | %%% 5 | %%% Erlsom is free software: you can redistribute it and/or modify 6 | %%% it under the terms of the GNU Lesser General Public License as 7 | %%% published by the Free Software Foundation, either version 3 of 8 | %%% the License, or (at your option) any later version. 9 | %%% 10 | %%% Erlsom is distributed in the hope that it will be useful, 11 | %%% but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | %%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | %%% GNU Lesser General Public License for more details. 14 | %%% 15 | %%% You should have received a copy of the GNU Lesser General Public 16 | %%% License along with Erlsom. If not, see 17 | %%% . 18 | %%% 19 | %%% Author contact: w.a.de.jong@gmail.com 20 | 21 | %%% ==================================================================== 22 | %%% header file for erslom_parse 23 | %%% ==================================================================== 24 | 25 | %% header file for erlsom_parse. Contains the record definitions. 26 | 27 | %% the records that form the model - see erlsom_parse.erl for a 28 | %% description. 29 | 30 | -record(model, {tps, nss, 31 | tns, %% target namespace (the URI, a string) 32 | th, %% type hierarchy, see 'tree'-functions in erlsom_lib 33 | any_attribs, %% Include "any_atributes" (i.e. attributes that 34 | %% have not been explicitly declared in the XSD) 35 | %% in the result. If set to true these will be in 36 | %% the second element of the record. 37 | value_fun %% Function that is called after the parsing 38 | %% of a complex type (resulting in creation of a 39 | %% record) has been created. Can be used 40 | %% to modify the value (or for side effects). 41 | }). 42 | -record(type, {nm, tp = sequence, els, atts = [], anyAttr, nillable, nr, 43 | mn = 1, mx = 1, mxd = false, %% mn & mx are only used by erlsom_compile 44 | typeName}). %% typeName is the 'real' name, to be used in xsi:type attributes 45 | %% for derived types. The 'nm' field is actually a key, which may 46 | %% include an additional prefix to differentiate between elements, types 47 | %% and groups. 48 | -record(el, {alts, mn = 1, mx = 1, nillable, 49 | nr %% nr is actually the position of the value in the 50 | %% result record. 51 | }). 52 | -record(alt, {tag, tp, nxt = [], mn = 1, mx = 1, rl = true, anyInfo}). 53 | -record(att, {nm, nr, opt, tp}). 54 | %% -record(ns, {uri, pf}). 55 | -record(elInfo, {anyAttr}). 56 | -record(anyAttr, {prCont, ns, tns}). %% for anyAttributes 57 | -record(anyInfo, {prCont, ns, tns}). %% for any elements 58 | 59 | -record(state, {currentState, resultSoFar, model, namespaces, 60 | allNamespaces, continuationState, value_acc = [], 61 | value_fun}). 62 | 63 | -record(cs, {re, %% remaining elements 64 | sf, %% nr of elements of the current type received so far 65 | er, %% element record: the result (so far) for this type 66 | rl, %% 'real element': do we expect an end-tag? 67 | mxd}). %% is this a mixed type? 68 | 69 | -record(all, {re, %% remaining elements 70 | nr, %% the sequence number of the current element 71 | er}). %% element record: the result (so far) for this type 72 | 73 | %% altState is used for parsing alternatives within 74 | %% a choice that can occur more than once 75 | -record(altState, {name, %% the tag we are processing 76 | type, %% the type of this element 77 | real, %% is this a 'real' element or a group ref 78 | receivedSoFar, %% number of elements received 79 | acc, %% values of elements already processed 80 | min, %% minOccurs 81 | max}). %% maxOccurs 82 | 83 | -record(anyState, {anyInfo}). 84 | -------------------------------------------------------------------------------- /examples/continuation/continuation_example.erl: -------------------------------------------------------------------------------- 1 | -module(continuation_example). 2 | %% Example to show how the Erlsom Sax parser can be used in combination 3 | %% with a 'continuation function'. This enables parsing of very big documents 4 | %% in a sort of streaming mode. 5 | %% 6 | %% When the sax parser reaches the end of a block of data, it calls the 7 | %% continuation function. This should return the next block of data. 8 | %% 9 | %% the continuation function is a function that takes 2 arguments: Tail and 10 | %% State. 11 | %% - Tail is the (short) list of characters that could not yet be parsed 12 | %% because it might be a special token or not. Since this still has to 13 | %% be parsed, it should be put in front of the next block of data. 14 | %% - State is information that is passed by the parser to the callback 15 | %% functions transparently. This can be used to keep track of the 16 | %% location in the file etc. 17 | %% The function returns {NewData, NewState}, where NewData is a list of 18 | %% characters/unicode code points, and NewState the new value for the State. 19 | 20 | -export([run/0]). 21 | 22 | %% 'chunk' is the number of characters that is read at a time. 23 | %% should be tuned for the best result. (109 is obviously not a good value, 24 | %% it should be bigger than that - try it out). 25 | -define(chunk, 109). 26 | 27 | run() -> 28 | F = fun count_books/2, %% the callback function that handles the sax events 29 | G = fun continue_file/2, %% the callback function that returns the next 30 | %% chunk of data 31 | %% open file 32 | {ok, Handle} = file:open(xml(), [read, raw, binary]), 33 | Position = 0, 34 | CState = {Handle, Position, ?chunk}, 35 | SaxCallbackState = undefined, 36 | %% erlsom:parse_sax() returns {ok, FinalState, TrailingBytes}, 37 | %% where TrailingBytes is the rest of the input-document 38 | %% that follows after the last closing tag of the XML, and Result 39 | %% is the value of the State after processing the last SAX event. 40 | {ok, Result, _TrailingBytes} = 41 | erlsom:parse_sax(<<>>, SaxCallbackState, F, 42 | [{continuation_function, G, CState}]), 43 | %% close file 44 | ok = file:close(Handle), 45 | 46 | %% Result is a list [{track_id, count}, ...] 47 | lists:foreach(fun({Date, Count}) -> 48 | io:format("Date: ~p - count: ~p~n", [Date, Count]) 49 | end, Result), 50 | ok. 51 | 52 | %% this is a continuation function that reads chunks of data 53 | %% from a file. 54 | continue_file(Tail, {Handle, Offset, Chunk}) -> 55 | %% read the next chunk 56 | case file:pread(Handle, Offset, Chunk) of 57 | {ok, Data} -> 58 | {<>, {Handle, Offset + Chunk, Chunk}}; 59 | eof -> 60 | {Tail, {Handle, Offset, Chunk}} 61 | end. 62 | 63 | %% This function is specific for the example. It counts the number 64 | %% of books per year. 65 | %% 66 | %% The input is the sax-event and the state. 67 | %% The output is the new state. 68 | %% 69 | %% The state consists of a stack that corresponds to 70 | %% the level in the XML, and an accumulator for the result: [{Date, Count}]. 71 | %% Additionally there is a field 'element_acc' which contains 72 | %% an intermediate result while parsing character data, 73 | %% because there can be more than 1 character event per element (in theory). 74 | -record(state, {stack = [], acc = [], element_acc = ""}). 75 | count_books(startDocument, _) -> 76 | #state{}; 77 | count_books({startElement, _, Tag, _, _}, #state{stack = Stack} = State) -> 78 | State#state{stack = [Tag | Stack]}; 79 | count_books({characters, Value}, 80 | #state{stack = ["date", "book", "book_store"], 81 | element_acc = ElementAcc} = State)-> 82 | State#state{element_acc = ElementAcc ++ Value}; 83 | count_books({endElement, _, _, _}, 84 | #state{stack = ["date" | Tail], 85 | acc = Acc, 86 | element_acc = Value} = State)-> 87 | State#state{stack = Tail, acc = processBook(Acc, Value), element_acc = ""}; 88 | count_books({endElement, _, _, _}, #state{stack = [_ | Tail]} = State)-> 89 | State#state{stack = Tail}; 90 | count_books(endDocument, #state{acc = Acc})-> 91 | Acc; 92 | count_books(_, S) -> S. 93 | 94 | processBook(List, Date) -> 95 | case lists:keysearch(Date, 1, List) of 96 | false -> 97 | [{Date, 1} | List]; 98 | {value, {_, Count}} -> 99 | lists:keyreplace(Date, 1, List, {Date, Count + 1}) 100 | end. 101 | 102 | %% this is just to make it easier to test this little example 103 | xml() -> filename:join([codeDir(), "BookStore.xml"]). 104 | codeDir() -> filename:dirname(code:which(?MODULE)). 105 | -------------------------------------------------------------------------------- /priv/gexf/schema/viz.xsd: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | -------------------------------------------------------------------------------- /src/erlsom_compile.hrl: -------------------------------------------------------------------------------- 1 | %%% Copyright (C) 2006 - 2008 Willem de Jong 2 | %%% 3 | %%% This file is part of Erlsom. 4 | %%% 5 | %%% Erlsom is free software: you can redistribute it and/or modify 6 | %%% it under the terms of the GNU Lesser General Public License as 7 | %%% published by the Free Software Foundation, either version 3 of 8 | %%% the License, or (at your option) any later version. 9 | %%% 10 | %%% Erlsom is distributed in the hope that it will be useful, 11 | %%% but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | %%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | %%% GNU Lesser General Public License for more details. 14 | %%% 15 | %%% You should have received a copy of the GNU Lesser General Public 16 | %%% License along with Erlsom. If not, see 17 | %%% . 18 | %%% 19 | %%% Author contact: w.a.de.jong@gmail.com 20 | 21 | %%% ==================================================================== 22 | %%% Header file for erlsom_compile 23 | %%% ==================================================================== 24 | 25 | 26 | %% records for the structures as found in the XSD 27 | -record(schemaType, 28 | {elInfo, targetNamespace, elementFormDefault, attributeFormDefault, blockDefault, finalDefault, 29 | version, id, imports, elements}). 30 | -record(importType, {elInfo, id, namespace, schemaLocation, annotation}). 31 | -record(includeType, {elInfo, id, schemaLocation, annotation}). 32 | -record(redefineType, {elInfo, id, schemaLocation, elements}). 33 | -record(globalElementType, {elInfo, name, type, default, fixed, id, abstract, substitutionGroup, final, nillable, 34 | annotation, simpleOrComplex, unique}). 35 | -record(localElementType, {elInfo, name, type, default, fixed, form, ref, minOccurs, maxOccurs, nillable, annotation, 36 | simpleOrComplex, unique}). 37 | -record(globalComplexTypeType, {elInfo, name, final, abstract, block, mixed, id, annotation, model, attributes, anyAttribute}). 38 | -record(localComplexTypeType, {elInfo, mixed, annotation, model, attributes, anyAttribute}). 39 | -record(globalSimpleTypeType, {elInfo, name, id, final, annotation, model}). 40 | -record(localSimpleTypeType, {elInfo, annotation, model}). 41 | -record(simpleContentType, {elInfo, annotation, model, id}). 42 | -record(groupDefType, {elInfo, name, annotation, model}). 43 | -record(groupRefType, {elInfo, ref, minOccurs, maxOccurs}). 44 | -record(annotationType, {elInfo, annotation}). 45 | -record(globalAttributeType, {elInfo, name, type, use, fixed, default, id, model}). 46 | -record(localAttributeType, {elInfo, name, type, use, ref, fixed, form, default, model}). 47 | -record(choiceType, {elInfo, id, minOccurs, maxOccurs, annotation, alternatives}). 48 | -record(sequenceType, {elInfo, annotation, elements, minOccurs, maxOccurs}). 49 | -record(allType, {elInfo, annotation, elements, minOccurs, maxOccurs}). 50 | -record(attributeGroupDefType, {elInfo, id, name, annotation, attributes, anyAttribute}). 51 | -record(attributeGroupRefType, {elInfo, ref, id}). 52 | -record(anyType, {elInfo, any, minOccurs, maxOccurs, namespace, processContents}). 53 | -record(anyAttributeType, {elInfo, id, namespace, processContents, annotation}). 54 | -record(extensionType, {elInfo, base, annotation, attributes, anyAttribute}). 55 | -record(extensionTypeC, {elInfo, base, annotation, model, attributes, anyAttribute}). 56 | -record(restrictionType, {elInfo, annotation, any, attributes, anyAttribute, base}). 57 | -record(restrictionTypeC, {elInfo, base, annotation, model, attributes, anyAttribute}). 58 | -record(complexContentType, {elInfo, annotation, model, mixed}). 59 | 60 | %% This is added to the XSD to allow generation of an XML document 61 | -record(namespaceType, {prefix, 'URI'}). 62 | 63 | %% the rest is for internal use in the translation of the XSD to the 64 | %% format used by the parser 65 | %% path is used to give local elements a unique name (the 'path' to the element) 66 | -record(schemaInfo, {targetNamespace, elementFormDefault, namespacePrefix, namespaces, 67 | path=[], attGrps, atts, th, 68 | strict = false :: boolean(), %% enforce additional type checks/conversions 69 | include_any_attrs = false, %% if true, the second element in the result 70 | %% types will be used for 71 | %% attributes that were not explicitly declared 72 | value_fun %% Function that is called after the parsing 73 | %% of a complex type (resulting in creation of a 74 | %% record) has been created. Can be used 75 | %% to modify the value (or for side effects). 76 | }). 77 | 78 | %% typeInfo - the intermediate format. 79 | %% global (true or false): we need to find out in the 80 | %% end whether this type should be available as 'top level' element in the 81 | %% xml document. 82 | -record(typeInfo, {typeName, 83 | global, 84 | typeType, 85 | typeRef, 86 | elements, 87 | attributes = [], 88 | anyAttr, 89 | seqOrAll, 90 | extends, 91 | restricts, 92 | mixed, 93 | base, 94 | substitutionGroup, 95 | min = 1, 96 | max = 1}). 97 | 98 | -record(elementInfo, {alternatives, min = 1, max = 1, nillable}). 99 | -record(alternative, {tag, type, real, min = 1, max = 1, anyInfo}). 100 | -record(attrib, {name, optional, type, ref}). 101 | -record(attGrp, {name, atts, anyAttr}). 102 | -------------------------------------------------------------------------------- /examples/soap_example/soap-envelope.xsd: -------------------------------------------------------------------------------- 1 | 17 | 18 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | Elements replacing the wildcard MUST be namespace qualified, but can be in the targetNamespace 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 58 | 59 | 60 | 61 | 62 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | Fault reporting structure 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 105 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 125 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | -------------------------------------------------------------------------------- /test/erlsom_tests.erl: -------------------------------------------------------------------------------- 1 | -module(erlsom_tests). 2 | 3 | %% ------------------------------------------------------------------ 4 | %% Tests 5 | %% ------------------------------------------------------------------ 6 | 7 | -include_lib("eunit/include/eunit.hrl"). 8 | -include("src/erlsom_parse.hrl"). 9 | -compile([nowarn_export_all, export_all]). 10 | 11 | gexf_test_() -> 12 | {"Test XML/XSD in GEXF format.", {module, erlsom_gexf_tests}}. 13 | 14 | all_test_() -> 15 | {"Test XSD with the xs:all tag.", 16 | verify_stability_(["all", "all.xsd"], 17 | ["all", "all.xml"], 18 | [])}. 19 | 20 | extension_test_() -> 21 | [{"Test XSD type extensions.", 22 | verify_stability_(["extension", "extension.xsd"], 23 | ["extension", "extension.xml"], 24 | [])}, 25 | {"Test simpleContent extension stable", 26 | verify_stability_(["extension", "simpleContentExtension.xsd"], 27 | ["extension", "simpleContentExtension.xml"], 28 | [])}, 29 | {"Test simpleContent #text not duplicated", fun () -> 30 | {ok, #model{tps = Types}} = compile_xsd(["extension", "simpleContentExtension.xsd"], []), 31 | ok = lists:foreach(fun (Type = #type{els = Els}) -> 32 | IsTextEl = fun 33 | (#el{alts = [#alt{tag = '#text'}]}) -> true; 34 | (_) -> false 35 | end, 36 | % No duplicate entries for the #text. 37 | % Type bellow is only used to get more informative failure. 38 | case {Type, lists:filter(IsTextEl, Els)} of 39 | {_, []} -> ok; 40 | {_, [_]} -> ok 41 | end 42 | end, Types) 43 | end}]. 44 | 45 | %% @doc 46 | %% compile the XSD schema file with the given relative path, example: 47 | %% compile_xsd(["all", "all.xsd"], []) 48 | compile_xsd(Path, IncludePaths) -> 49 | IncludeDirs = [priv_path(I) || I <- IncludePaths], 50 | erlsom:compile_xsd_file(priv_path(Path), [{include_dirs, IncludeDirs}]). 51 | 52 | %% @doc 53 | %% parse an xml document with a compiled XSD model, example: 54 | %% parse_file(["all", "all.xml"], Model) 55 | parse_file(Path, Model) -> 56 | erlsom:parse_file(priv_path(Path), Model). 57 | 58 | %% @doc 59 | %% verify the parser/generator stability, example: 60 | %% verify_stability(["all", "all.xsd"], ["all", "all.xml"], []) 61 | verify_stability(XsdPath, XmlPath, IncludeDirs) -> 62 | {ok, Model} = compile_xsd(XsdPath, IncludeDirs), 63 | {ok, Tree1} = parse_file(XmlPath, Model), 64 | {ok, XML} = erlsom:write(Tree1, Model), 65 | {ok, Tree2} = erlsom:parse(XML, Model), 66 | {lists:last(XmlPath), ?_assertEqual(Tree1, Tree2)}. 67 | 68 | %% @doc 69 | %% test generator function for verify_stability tests 70 | verify_stability_(XsdPath, XmlPath, IncludeDirs) -> 71 | fun() -> verify_stability(XsdPath, XmlPath, IncludeDirs) end. 72 | 73 | priv_path(Path) -> 74 | filename:join([code:priv_dir(erlsom) | Path]). 75 | 76 | 77 | %% 78 | %% Check if xsi:type is parsed and written correctly. 79 | %% 80 | %% This test was introduced to check/fix the following bugs: 81 | %% * The ext namespace was not added to the corresponding element when writing model to the XSD. 82 | %% * The xsi namespace was duplicated with different prefix if the model had xsi namespace defined with other prefix. 83 | %% 84 | %% Before the fix, the `Written' XML was looking like this: 85 | %% ``` 86 | %% 92 | %% a 93 | %% b 94 | %% 95 | %% ''' 96 | xsi_type_write_test() -> 97 | % 98 | % Parse the XSD model. 99 | {ok, Base} = erlsom:compile_xsd_file( 100 | priv_path(["xsi_type", "base.xsd"]), 101 | [ 102 | {include_any_attribs, true}, 103 | {prefix, "b"} 104 | ] 105 | ), 106 | {ok, Ext} = erlsom:compile_xsd_file( 107 | priv_path(["xsi_type", "ext.xsd"]), 108 | [ 109 | {include_any_attribs, true}, 110 | {prefix, "e"}, 111 | {include_dirs, [priv_path(["xsi_type"])]}, 112 | {include_files, [{"urn:erlsom/xsi_type/base", "b", priv_path(["xsi_type", "base.xsd"])}]} 113 | ] 114 | ), 115 | Model = erlsom:add_model(Base, Ext), 116 | io:format("Model=~p~n", [Model]), 117 | % 118 | % Parse the XML. 119 | {ok, Xml} = file:read_file(priv_path(["xsi_type", "ext.xml"])), 120 | {ok, Parsed1} = erlsom:parse(Xml, Model), io:format("Parsed1=~p~n", [Parsed1]), 121 | {ok, Written} = erlsom:write(Parsed1, Model), io:format("Written=~p~n", [Written]), 122 | {ok, Parsed2} = erlsom:parse(Written, Model), io:format("Parsed2=~p~n", [Parsed2]), 123 | ?assertEqual( 124 | erlang:setelement(2, Parsed1, []), % Compare ignoring the extra attributes, because they 125 | erlang:setelement(2, Parsed2, []) % have type names with prefixes, as defined in the XML. 126 | ). 127 | 128 | % TODO: XSI:type and xsi:nil in one element. 129 | 130 | 131 | %% 132 | %% Check, if document can be parsed in the case, when an element is put to the 133 | %% global namespace explicitly (`xmlns=""') and has derived type specified. 134 | %% 135 | xsi_type_no_prefix_read_test() -> 136 | % 137 | % Parse the XSD model. 138 | {ok, Model} = erlsom:compile_xsd_file( 139 | priv_path(["xsi_type_no_prefix", "test.xsd"]), 140 | [ 141 | {include_any_attribs, true}, 142 | {prefix, "t"} 143 | ] 144 | ), 145 | io:format("Model=~p~n", [Model]), 146 | % 147 | % Parse the XML. 148 | {ok, Xml} = file:read_file(priv_path(["xsi_type_no_prefix", "test.xml"])), 149 | {ok, Parsed} = erlsom:parse(Xml, Model), 150 | io:format("Parsed=~p~n", [Parsed]), 151 | ?assertMatch({'ExtType', _, "base", "ext"}, Parsed). 152 | 153 | 154 | choice_complex_test() -> 155 | {ok, Xsd} = file:read_file(priv_path(["choice_complex", "choice_complex.xsd"])), 156 | {ok, Xml} = file:read_file(priv_path(["choice_complex", "choice_complex.xml"])), 157 | {ok, Model} = erlsom:compile(Xsd), 158 | {ok, _Data, _} = erlsom:scan(Xml, Model). 159 | 160 | -------------------------------------------------------------------------------- /examples/erlsom_sax_example/erlsom_simple_form.erl: -------------------------------------------------------------------------------- 1 | %%% translate XML to the 'simple form' as used by XMERL. 2 | %%% 3 | -module(erlsom_simple_form). 4 | 5 | %% user interface 6 | -export([simple_form/1]). 7 | %% with options 8 | -export([simple_form/2]). 9 | 10 | -include_lib("erlsom/src/erlsom_sax.hrl"). 11 | 12 | -export([callback/2]). 13 | -export([nameFun/3]). 14 | 15 | -record(sState, {stack, nameFun, options}). 16 | 17 | simple_form(File) -> 18 | simple_form(File, []). 19 | 20 | simple_form(File, Options) -> 21 | case file:read_file(File) of 22 | {ok, Bin} -> 23 | erlsom:sax(binary_to_list(Bin), 24 | #sState{stack = [], nameFun = fun erlsom_simple_form:nameFun/3, options = Options}, 25 | fun erlsom_simple_form:callback/2); 26 | Error -> 27 | Error 28 | end. 29 | 30 | callback(Event, State) -> 31 | 32 | %% debugState(State), 33 | %% debugEvent(Event), 34 | try 35 | case Event of 36 | startDocument -> 37 | State; 38 | {startElement, _Uri, _LocalName, _Prefix, _Attributes} -> 39 | %% debug(Event), 40 | startElement(Event, State); 41 | {endElement, _Uri, _LocalName, _Prefix} -> 42 | endElement(Event, State); 43 | {characters, _Characters} -> 44 | characters(Event, State); 45 | {ignorableWhitespace, _Characters} -> State; 46 | {processingInstruction, _Target, _Data} -> State; 47 | {startPrefixMapping, _Prefix, _URI} -> 48 | State; 49 | {endPrefixMapping, _Prefix} -> 50 | State; 51 | endDocument -> 52 | case State of 53 | #sState{stack = [Root]} -> 54 | %% debug(Result), 55 | {ok, Root}; 56 | _Else -> 57 | %% debug(State), 58 | throw({error, "unexpected end"}) 59 | end; 60 | {error, Message} -> 61 | throw(Message); 62 | {'EXIT', Message} -> 63 | exit(Message) 64 | end 65 | catch 66 | error:Reason -> throwError(error, {Reason,erlang:get_stacktrace()}, Event, State); 67 | Class:Exception -> throwError(Class, Exception, Event, State) 68 | end. 69 | 70 | %% Stack contains the tree that is growing as the elements come in. 71 | %% [{root, [attributes], [element1, element2]}, 72 | %% {element3, [attributes], [element3.1, element3.2]}, 73 | %% {element3.3, [attributes], [element3.3.1]}] (but in reverse order...) 74 | 75 | %% When a startElement event comes in, add a new element to the stack: 76 | %% [{root, [attributes], [element1, element2]}, 77 | %% {element3, [attributes], [element3.1, element3.2]}, 78 | %% {element3.3, [attributes], [element3.3.1]}, 79 | %% {element3.3.2, [attributes], []}] 80 | 81 | %% When a textElement event comes in, insert it into the top element: 82 | %% [{root, [attributes], [element1, element2]}, 83 | %% {element3, [attributes], [element3.1, element3.2]}, 84 | %% {element3.3, [attributes], [element3.3.1, element3.3.2]}] 85 | %% {element3.3, [attributes], [element3.3.1]}, 86 | %% {element3.3.2, [attributes], [{#text, "the text"}]}] 87 | 88 | %% When an endElement comes in, insert the top element of the stack in the 89 | %% layer below it (its parent): 90 | %% [{root, [attributes], [element1, element2]}, 91 | %% {element3, [attributes], [element3.1, element3.2]}, 92 | %% {element3.3, [attributes], [element3.3.1, element3.3.2]}] 93 | 94 | startElement({startElement, Uri, LocalName, Prefix, Attributes}, 95 | State = #sState{stack = Stack, nameFun = NameFun}) -> 96 | Name = NameFun(LocalName, Uri, Prefix), 97 | State#sState{stack = [{Name, processAttributes(Attributes, State), []} | Stack]}. 98 | 99 | endElement({endElement, _Uri, _LocalName, _Prefix}, 100 | State = #sState{stack = [{Name, Attributes, Elements}]}) -> 101 | State#sState{stack = [{Name, Attributes, lists:reverse(Elements)}]}; 102 | 103 | endElement({endElement, _Uri, _LocalName, _Prefix}, 104 | State) -> 105 | #sState{stack = [{Name, Attributes, Elements} | [{ParentName, ParentAttributes, ParentElements} | Tail]]} = State, 106 | State#sState{stack = [{ParentName, 107 | ParentAttributes, 108 | [{Name, Attributes, lists:reverse(Elements)} | ParentElements]} | Tail]}. 109 | 110 | characters({characters, Characters}, 111 | State = #sState{stack = [{Name, 112 | Attributes, 113 | [FirstBit | OtherElements] 114 | } | Tail]}) 115 | when is_list(FirstBit) -> 116 | State#sState{stack = [{Name, Attributes, [FirstBit ++ Characters | OtherElements]} | Tail]}; 117 | characters({characters, Characters}, 118 | State = #sState{stack = [{Name, Attributes, Elements} | Tail]}) -> 119 | State#sState{stack = [{Name, Attributes, [Characters | Elements]} | Tail]}. 120 | 121 | processAttributes(Attributes, State) -> 122 | processAttributes(Attributes, State, []). 123 | processAttributes([], _State, Acc) -> 124 | lists:reverse(Acc); 125 | processAttributes([#attribute{localName=LocalName, uri=Uri, prefix = Prefix, value=Value} | Tail], 126 | State = #sState{nameFun = NameFun}, 127 | Acc) -> 128 | processAttributes(Tail, State, [{NameFun(LocalName, Uri, Prefix), Value} | Acc]). 129 | 130 | nameFun(Name, [], _Prefix) -> 131 | Name; 132 | nameFun(Name, Namespace, _Prefix) -> 133 | "{" ++ Namespace ++ "}" ++ Name. 134 | 135 | 136 | throwError(Class, Exception, Event, 137 | #sState{stack = Stack}) -> 138 | %% "Error while parsing type " 139 | %% Take the ElementRecord at current state, and print the first element 140 | Message = [{exception, Exception}, 141 | %% for each of the elements in ResultSoFar, 142 | %% take the 'elementRecord' element and print the first element (the type). 143 | {stack, printStackTrace(Stack)}, 144 | %% "Received: " 145 | {received, Event}], 146 | case Class of 147 | 'error' -> exit({error, Message}); 148 | 'throw' -> throw({error, Message}); 149 | 'exit' -> exit({error, Message}) 150 | end; 151 | 152 | throwError(Class, Exception, _Event, 153 | _Something) -> 154 | case Class of 155 | 'error' -> exit({error, Exception}); 156 | 'throw' -> throw({error, Exception}); 157 | 'exit' -> exit({error, Exception}) 158 | end. 159 | 160 | printStackTrace(Stack) -> 161 | printStackTrace(Stack, []). 162 | printStackTrace([], Acc) -> 163 | Acc; 164 | printStackTrace([{Name, _, _} | Tail], Acc) -> 165 | printStackTrace(Tail, [{element, Name} | Acc]). 166 | 167 | -------------------------------------------------------------------------------- /priv/gexf/schema/data.xsd: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | -------------------------------------------------------------------------------- /COPYING.LESSER: -------------------------------------------------------------------------------- 1 | GNU LESSER GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | 9 | This version of the GNU Lesser General Public License incorporates 10 | the terms and conditions of version 3 of the GNU General Public 11 | License, supplemented by the additional permissions listed below. 12 | 13 | 0. Additional Definitions. 14 | 15 | As used herein, "this License" refers to version 3 of the GNU Lesser 16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU 17 | General Public License. 18 | 19 | "The Library" refers to a covered work governed by this License, 20 | other than an Application or a Combined Work as defined below. 21 | 22 | An "Application" is any work that makes use of an interface provided 23 | by the Library, but which is not otherwise based on the Library. 24 | Defining a subclass of a class defined by the Library is deemed a mode 25 | of using an interface provided by the Library. 26 | 27 | A "Combined Work" is a work produced by combining or linking an 28 | Application with the Library. The particular version of the Library 29 | with which the Combined Work was made is also called the "Linked 30 | Version". 31 | 32 | The "Minimal Corresponding Source" for a Combined Work means the 33 | Corresponding Source for the Combined Work, excluding any source code 34 | for portions of the Combined Work that, considered in isolation, are 35 | based on the Application, and not on the Linked Version. 36 | 37 | The "Corresponding Application Code" for a Combined Work means the 38 | object code and/or source code for the Application, including any data 39 | and utility programs needed for reproducing the Combined Work from the 40 | Application, but excluding the System Libraries of the Combined Work. 41 | 42 | 1. Exception to Section 3 of the GNU GPL. 43 | 44 | You may convey a covered work under sections 3 and 4 of this License 45 | without being bound by section 3 of the GNU GPL. 46 | 47 | 2. Conveying Modified Versions. 48 | 49 | If you modify a copy of the Library, and, in your modifications, a 50 | facility refers to a function or data to be supplied by an Application 51 | that uses the facility (other than as an argument passed when the 52 | facility is invoked), then you may convey a copy of the modified 53 | version: 54 | 55 | a) under this License, provided that you make a good faith effort to 56 | ensure that, in the event an Application does not supply the 57 | function or data, the facility still operates, and performs 58 | whatever part of its purpose remains meaningful, or 59 | 60 | b) under the GNU GPL, with none of the additional permissions of 61 | this License applicable to that copy. 62 | 63 | 3. Object Code Incorporating Material from Library Header Files. 64 | 65 | The object code form of an Application may incorporate material from 66 | a header file that is part of the Library. You may convey such object 67 | code under terms of your choice, provided that, if the incorporated 68 | material is not limited to numerical parameters, data structure 69 | layouts and accessors, or small macros, inline functions and templates 70 | (ten or fewer lines in length), you do both of the following: 71 | 72 | a) Give prominent notice with each copy of the object code that the 73 | Library is used in it and that the Library and its use are 74 | covered by this License. 75 | 76 | b) Accompany the object code with a copy of the GNU GPL and this license 77 | document. 78 | 79 | 4. Combined Works. 80 | 81 | You may convey a Combined Work under terms of your choice that, 82 | taken together, effectively do not restrict modification of the 83 | portions of the Library contained in the Combined Work and reverse 84 | engineering for debugging such modifications, if you also do each of 85 | the following: 86 | 87 | a) Give prominent notice with each copy of the Combined Work that 88 | the Library is used in it and that the Library and its use are 89 | covered by this License. 90 | 91 | b) Accompany the Combined Work with a copy of the GNU GPL and this license 92 | document. 93 | 94 | c) For a Combined Work that displays copyright notices during 95 | execution, include the copyright notice for the Library among 96 | these notices, as well as a reference directing the user to the 97 | copies of the GNU GPL and this license document. 98 | 99 | d) Do one of the following: 100 | 101 | 0) Convey the Minimal Corresponding Source under the terms of this 102 | License, and the Corresponding Application Code in a form 103 | suitable for, and under terms that permit, the user to 104 | recombine or relink the Application with a modified version of 105 | the Linked Version to produce a modified Combined Work, in the 106 | manner specified by section 6 of the GNU GPL for conveying 107 | Corresponding Source. 108 | 109 | 1) Use a suitable shared library mechanism for linking with the 110 | Library. A suitable mechanism is one that (a) uses at run time 111 | a copy of the Library already present on the user's computer 112 | system, and (b) will operate properly with a modified version 113 | of the Library that is interface-compatible with the Linked 114 | Version. 115 | 116 | e) Provide Installation Information, but only if you would otherwise 117 | be required to provide such information under section 6 of the 118 | GNU GPL, and only to the extent that such information is 119 | necessary to install and execute a modified version of the 120 | Combined Work produced by recombining or relinking the 121 | Application with a modified version of the Linked Version. (If 122 | you use option 4d0, the Installation Information must accompany 123 | the Minimal Corresponding Source and Corresponding Application 124 | Code. If you use option 4d1, you must provide the Installation 125 | Information in the manner specified by section 6 of the GNU GPL 126 | for conveying Corresponding Source.) 127 | 128 | 5. Combined Libraries. 129 | 130 | You may place library facilities that are a work based on the 131 | Library side by side in a single library together with other library 132 | facilities that are not Applications and are not covered by this 133 | License, and convey such a combined library under terms of your 134 | choice, if you do both of the following: 135 | 136 | a) Accompany the combined library with a copy of the same work based 137 | on the Library, uncombined with any other library facilities, 138 | conveyed under the terms of this License. 139 | 140 | b) Give prominent notice with the combined library that part of it 141 | is a work based on the Library, and explaining where to find the 142 | accompanying uncombined form of the same work. 143 | 144 | 6. Revised Versions of the GNU Lesser General Public License. 145 | 146 | The Free Software Foundation may publish revised and/or new versions 147 | of the GNU Lesser General Public License from time to time. Such new 148 | versions will be similar in spirit to the present version, but may 149 | differ in detail to address new problems or concerns. 150 | 151 | Each version is given a distinguishing version number. If the 152 | Library as you received it specifies that a certain numbered version 153 | of the GNU Lesser General Public License "or any later version" 154 | applies to it, you have the option of following the terms and 155 | conditions either of that published version or of any later version 156 | published by the Free Software Foundation. If the Library as you 157 | received it does not specify a version number of the GNU Lesser 158 | General Public License, you may choose any version of the GNU Lesser 159 | General Public License ever published by the Free Software Foundation. 160 | 161 | If the Library as you received it specifies that a proxy can decide 162 | whether future versions of the GNU Lesser General Public License shall 163 | apply, that proxy's public statement of acceptance of any version is 164 | permanent authorization for you to choose that version for the 165 | Library. 166 | -------------------------------------------------------------------------------- /src/erlsom_simple_form.erl: -------------------------------------------------------------------------------- 1 | %%% Copyright (C) 2006 - 2008 Willem de Jong 2 | %%% 3 | %%% This file is part of Erlsom. 4 | %%% 5 | %%% Erlsom is free software: you can redistribute it and/or modify 6 | %%% it under the terms of the GNU Lesser General Public License as 7 | %%% published by the Free Software Foundation, either version 3 of 8 | %%% the License, or (at your option) any later version. 9 | %%% 10 | %%% Erlsom is distributed in the hope that it will be useful, 11 | %%% but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | %%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | %%% GNU Lesser General Public License for more details. 14 | %%% 15 | %%% You should have received a copy of the GNU Lesser General Public 16 | %%% License along with Erlsom. If not, see 17 | %%% . 18 | %%% 19 | %%% Author contact: w.a.de.jong@gmail.com 20 | 21 | %%% translate XML to the 'simple form' as used by XMERL. 22 | -module(erlsom_simple_form). 23 | 24 | %% user interface 25 | -export([scan/2]). 26 | -export([callback/2]). 27 | -export([new_state/1]). 28 | 29 | -include("erlsom_sax.hrl"). 30 | -include("exception.hrl"). 31 | 32 | -record(sState, {stack, nameFun, options}). 33 | 34 | scan(Xml, Options) -> 35 | case lists:keysearch('nameFun', 1, Options) of 36 | {value, {_, Fun}} -> 37 | Options2 = lists:keydelete('nameFun', 1, Options); 38 | _ -> 39 | Fun = fun nameFun/3, 40 | Options2 = Options 41 | end, 42 | erlsom:parse_sax(Xml, 43 | #sState{stack = [], nameFun = Fun}, 44 | fun callback/2, Options2). 45 | 46 | 47 | new_state(Namefun) -> 48 | #sState{stack = [], nameFun = Namefun, options = []}. 49 | 50 | %% 51 | %% It is also possible to call erlsom_simple_form:callback from within 52 | %% another callback function (to parse a part of an xml document). 53 | callback(Event, State) -> 54 | 55 | %% debugState(State), 56 | %% debugEvent(Event), 57 | try 58 | case Event of 59 | startDocument -> 60 | case State of 61 | #sState{} -> 62 | State; 63 | %% could be more options in the future, but for now there 64 | %% is just 1 65 | [{name_function, NameFun}] -> 66 | new_state(NameFun); 67 | _ -> 68 | new_state(fun nameFun/3) 69 | end; 70 | {startElement, _Uri, _LocalName, _Prefix, _Attributes} -> 71 | %% debug(Event), 72 | startElement(Event, State); 73 | {endElement, _Uri, _LocalName, _Prefix} -> 74 | endElement(Event, State); 75 | {characters, _Characters} -> 76 | characters(Event, State); 77 | {ignorableWhitespace, _Characters} -> State; 78 | {processingInstruction, _Target, _Data} -> State; 79 | {startPrefixMapping, _Prefix, _URI} -> 80 | State; 81 | {endPrefixMapping, _Prefix} -> 82 | State; 83 | endDocument -> 84 | case State of 85 | {result, Document} -> 86 | Document; 87 | _Else -> 88 | %% debug(State), 89 | throw({error, "unexpected end"}) 90 | end; 91 | {error, Message} -> 92 | throw(Message); 93 | {'EXIT', Message} -> 94 | exit(Message) 95 | end 96 | catch 97 | ?EXCEPTION(error, Reason, Stacktrace) -> throwError(error, {Reason, ?GET_STACK(Stacktrace)}, Event, State); 98 | Class:Exception -> throwError(Class, Exception, Event, State) 99 | end. 100 | 101 | %% Stack contains the tree that is growing as the elements come in. 102 | %% [{root, [attributes], [element1, element2]}, 103 | %% {element3, [attributes], [element3.1, element3.2]}, 104 | %% {element3.3, [attributes], [element3.3.1]}] (but in reverse order...) 105 | 106 | %% When a startElement event comes in, add a new element to the stack: 107 | %% [{root, [attributes], [element1, element2]}, 108 | %% {element3, [attributes], [element3.1, element3.2]}, 109 | %% {element3.3, [attributes], [element3.3.1]}, 110 | %% {element3.3.2, [attributes], []}] 111 | 112 | %% When a textElement event comes in, insert it into the top element: 113 | %% [{root, [attributes], [element1, element2]}, 114 | %% {element3, [attributes], [element3.1, element3.2]}, 115 | %% {element3.3, [attributes], [element3.3.1, element3.3.2]}] 116 | %% {element3.3, [attributes], [element3.3.1]}, 117 | %% {element3.3.2, [attributes], [{#text, "the text"}]}] 118 | 119 | %% When an endElement comes in, insert the top element of the stack in the 120 | %% layer below it (its parent): 121 | %% [{root, [attributes], [element1, element2]}, 122 | %% {element3, [attributes], [element3.1, element3.2]}, 123 | %% {element3.3, [attributes], [element3.3.1, element3.3.2]}] 124 | 125 | startElement({startElement, Uri, LocalName, Prefix, Attributes}, 126 | State = #sState{stack = Stack, nameFun = NameFun}) -> 127 | Name = NameFun(LocalName, Uri, Prefix), 128 | State#sState{stack = [{Name, processAttributes(Attributes, State), []} | Stack]}. 129 | 130 | endElement({endElement, _Uri, _LocalName, _Prefix}, 131 | #sState{stack = [{Name, Attributes, Elements}]}) -> 132 | Document = {Name, Attributes, lists:reverse(Elements)}, 133 | %% {result, Document} is a special value that signals to the calling function that 134 | %% the parsing is done. This can be useful when parsing a part of a larger 135 | %% document. 136 | {result, Document}; 137 | 138 | endElement({endElement, _Uri, _LocalName, _Prefix}, 139 | State) -> 140 | #sState{stack = [{Name, Attributes, Elements} | [{ParentName, ParentAttributes, ParentElements} | Tail]]} = State, 141 | State#sState{stack = [{ParentName, 142 | ParentAttributes, 143 | [{Name, Attributes, lists:reverse(Elements)} | ParentElements]} | Tail]}. 144 | 145 | characters({characters, Characters}, 146 | State = #sState{stack = [{Name, 147 | Attributes, 148 | [FirstBit | OtherElements] 149 | } | Tail]}) 150 | when is_list(FirstBit) -> 151 | State#sState{stack = [{Name, Attributes, [FirstBit ++ Characters | OtherElements]} | Tail]}; 152 | characters({characters, Characters}, 153 | State = #sState{stack = [{Name, 154 | Attributes, 155 | [FirstBit | OtherElements] 156 | } | Tail]}) 157 | when is_binary(FirstBit) -> 158 | State#sState{stack = [{Name, Attributes, [<> | OtherElements]} | Tail]}; 159 | characters({characters, Characters}, 160 | State = #sState{stack = [{Name, Attributes, Elements} | Tail]}) -> 161 | State#sState{stack = [{Name, Attributes, [Characters | Elements]} | Tail]}. 162 | 163 | processAttributes(Attributes, State) -> 164 | processAttributes(Attributes, State, []). 165 | processAttributes([], _State, Acc) -> 166 | lists:reverse(Acc); 167 | processAttributes([#attribute{localName=LocalName, uri=Uri, prefix = Prefix, value=Value} | Tail], 168 | State = #sState{nameFun = NameFun}, 169 | Acc) -> 170 | processAttributes(Tail, State, [{NameFun(LocalName, Uri, Prefix), Value} | Acc]). 171 | 172 | nameFun(Name, [], _Prefix) -> 173 | Name; 174 | nameFun(Name, Namespace, _Prefix) -> 175 | "{" ++ Namespace ++ "}" ++ Name. 176 | 177 | 178 | throwError(Class, Exception, Event, 179 | #sState{stack = Stack}) -> 180 | %% "Error while parsing type " 181 | %% Take the ElementRecord at current state, and print the first element 182 | Message = [{exception, Exception}, 183 | %% for each of the elements in ResultSoFar, 184 | %% take the 'elementRecord' element and print the first element (the type). 185 | {stack, printStackTrace(Stack)}, 186 | %% "Received: " 187 | {received, Event}], 188 | case Class of 189 | 'error' -> exit({error, Message}); 190 | 'throw' -> throw({error, Message}); 191 | 'exit' -> exit({error, Message}) 192 | end; 193 | 194 | throwError(Class, Exception, _Event, 195 | _Something) -> 196 | case Class of 197 | 'error' -> exit({error, Exception}); 198 | 'throw' -> throw({error, Exception}); 199 | 'exit' -> exit({error, Exception}) 200 | end. 201 | 202 | printStackTrace(Stack) -> 203 | printStackTrace(Stack, []). 204 | printStackTrace([], Acc) -> 205 | Acc; 206 | printStackTrace([{Name, _, _} | Tail], Acc) -> 207 | printStackTrace(Tail, [{element, Name} | Acc]). 208 | 209 | -------------------------------------------------------------------------------- /src/erlsom_sax.erl: -------------------------------------------------------------------------------- 1 | %%% Copyright (C) 2006 - 2008 Willem de Jong 2 | %%% 3 | %%% This file is part of Erlsom. 4 | %%% 5 | %%% Erlsom is free software: you can redistribute it and/or modify 6 | %%% it under the terms of the GNU Lesser General Public License as 7 | %%% published by the Free Software Foundation, either version 3 of 8 | %%% the License, or (at your option) any later version. 9 | %%% 10 | %%% Erlsom is distributed in the hope that it will be useful, 11 | %%% but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | %%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | %%% GNU Lesser General Public License for more details. 14 | %%% 15 | %%% You should have received a copy of the GNU Lesser General Public 16 | %%% License along with Erlsom. If not, see 17 | %%% . 18 | %%% 19 | %%% Author contact: w.a.de.jong@gmail.com 20 | 21 | %%% ==================================================================== 22 | %%% An XML parser, using the SAX model. 23 | %%% ==================================================================== 24 | 25 | -module(erlsom_sax). 26 | 27 | -include("erlsom_sax.hrl"). 28 | 29 | -type attribute() :: #attribute{}. 30 | -export_type([attribute/0]). 31 | 32 | -export([parseDocument/3]). 33 | -export([parseDocument/4]). 34 | 35 | %%%%%%%%%%%%%%%%%%%%%%%%% 36 | %% 37 | %% Interface 38 | %% 39 | %% parseDocument(Xml, State, EventFun) 40 | %% parseDocument(Xml, State, EventFun, Options) 41 | %% 42 | %% Xml = A list of integers that correspond with the characters in an XML 43 | %% document. Can be either 1 byte characters according to ISO ..., 44 | %% or integers that correspond to Unicode code points (see...). 45 | %% 46 | %% State - a term() that is passed to the EventFun. 47 | %% 48 | %% Eventfun - a fun() that is called by the parser whenever it has parsed 49 | %% a bit of the Xml input. The function is called by the parser according 50 | %% to the Sax specification (see [SAX]). 51 | %% 52 | %% EventFun should accept the following arguments: 53 | %% - Event, a tuple that describes the event, see below. 54 | %% - State - a term() 55 | %% 56 | %% EventFun should return State, a term() that wil be passed back to the next 57 | %% invocation of EventFun. 58 | %% 59 | %% Options - a list of options. Currently the only option is 60 | %% {continuation_function, CFunction}, where CFuntion is a fun() that 61 | %% returns the next block of data. 62 | %% CFunction should be a function that takes 2 arguments: Tail and State. 63 | %% - Tail is the (short) list of characters that could not yet be parsed 64 | %% because it might be a special token or not. Since this still has to 65 | %% be parsed, it should be put in front of the next block of data. 66 | %% - State is information that is passed by the parser to the callback 67 | %% functions transparently. This can be used to keep track of the 68 | %% location in the file etc. 69 | %% CFunction returns {NewData, NewState}, where NewData is a list of 70 | %% characters/unicode code points, and NewState the new value for the State. 71 | %% 72 | %% Returns: State 73 | %% (i.e.: the result of the last invocation of the callback function) 74 | %% 75 | %% parseDocumentBinary(Xml, State, EventFun, Encoding) 76 | %% parseDocument(Xml, State, EventFun, Encoding, Options) 77 | %% 78 | %% Just like parseDocument, but working on a binary in stead of a list. 79 | %% Encoding = the encoding of the binary (atom()). Supported values: 80 | %% - 'utf-8' 81 | %% - 'latin-1' 82 | %%%%%%%%%%%%%%%%%%%%%%%%% 83 | 84 | %%%%%%%%%%%%%%%%%%%%%%%%% 85 | %% 86 | %% Events sent out by the SAX parser. 87 | %% 88 | %% Based on org.xml.sax ContentHandler interface [SAX]. 89 | %% 90 | %% startDocument 91 | %% 92 | %% endDocument 93 | %% Will NOT be sent out in case of an error 94 | %% 95 | %% {startPrefixMapping, Prefix, URI} 96 | %% Begin the scope of a prefix - URI namespace mapping 97 | %% Will be sent immediately before the corresponding startElement event. 98 | %% 99 | %% {endPrefixMapping, Prefix} 100 | %% End the scope of a prefix - URI namespace mapping 101 | %% Will be sent immediately before the corresponding endElement event. 102 | %% 103 | %% {startElement, Uri, LocalName, Prefix, [Attributes]} 104 | %% Receive notification of the beginning of an element. 105 | %% There will be a corresponding endElement (even when the element is 106 | %% empty). 107 | %% All three name components will be provided. 108 | %% 109 | %% [Attributes] is a list of attribute records, see sax.hrl. 110 | %% Namespace attributes (xmlns:*) will not be reported. 111 | %% There will be NO attribute values for defaulted attributes! 112 | %% 113 | %% Providing 'Prefix'in stead of 'Qualified name' is probably not quite 114 | %% in line with the SAX spec, but it appears to be more convenient. 115 | %% 116 | %% {endElement, Uri, LocalName, Prefix} 117 | %% Receive notification of the end of an element. 118 | %% 119 | %% {characters, Characters} 120 | %% Receive notification of character data. 121 | %% All character data will be in one chunk, except if there is a 122 | %% CDATA section included inside a character section. In that case 123 | %% there will be separate events for the characters before the CDATA, the 124 | %% CDATA section and the characters following it (if any, of course). 125 | %% 126 | %% {ignorableWhitespace, Characters} 127 | %% If a character data section (as it would be reported by the 'characters' 128 | %% event, see above) consists ONLY of whitespace, it will be 129 | %% reported as ignorableWhitespace. 130 | %% 131 | %% {processingInstruction, Target, Data} 132 | %% 133 | %% {error, Description} 134 | %% {internalError, Description} 135 | %% 136 | %%%%%%%%%%%%%%%%%%%%%%%% 137 | 138 | parseDocument(Xml, UserState, Callback) -> 139 | parseDocument(Xml, UserState, Callback, []). 140 | 141 | parseDocument(Xml, UserState, Callback, Options) -> 142 | S = (getOptions(Options))#erlsom_sax_state{callback = Callback, 143 | user_state = UserState}, 144 | parseDocument(Xml, S). 145 | 146 | parseDocument(Xml, S) when is_list(Xml) -> 147 | erlsom_sax_list:parse(Xml, S); 148 | 149 | parseDocument(Xml, S) when is_binary(Xml) -> 150 | case S#erlsom_sax_state.encoding of 151 | undefined -> 152 | {Encoding, Xml2, CState2} = 153 | erlsom_lib:detectEncoding(Xml, S#erlsom_sax_state.continuation_fun, 154 | S#erlsom_sax_state.continuation_state), 155 | parseDocumentBinary(Encoding, Xml2, 156 | S#erlsom_sax_state{continuation_state = CState2}); 157 | Encoding -> 158 | parseDocumentBinary(Encoding, Xml, S) 159 | end. 160 | 161 | parseDocumentBinary(Encoding, Xml, State) -> 162 | case Encoding of 163 | 'utf8' -> 164 | erlsom_sax_utf8:parse(Xml, State); 165 | 'utf16be' -> 166 | erlsom_sax_utf16be:parse(Xml, State); 167 | 'utf16le' -> 168 | erlsom_sax_utf16le:parse(Xml, State); 169 | 'latin-1' -> 170 | erlsom_sax_latin1:parse(Xml, State); 171 | 'iso_8859_1' -> 172 | erlsom_sax_latin1:parse(Xml, State); 173 | 'iso_8859_15' -> 174 | erlsom_sax_latin9:parse(Xml, State); 175 | 'list' -> 176 | erlsom_sax_list:parse(Xml, State); 177 | _ -> 178 | throw({error, "Encoding not supported: " ++ atom_to_list(Encoding)}) 179 | end. 180 | 181 | getOptions(Options) -> 182 | getOptions(Options, #erlsom_sax_state{}). 183 | 184 | getOptions([], S) -> 185 | case S#erlsom_sax_state.continuation_fun of 186 | undefined -> 187 | S#erlsom_sax_state{continuation_fun = fun(T, St) -> {T, St} end}; 188 | _ -> 189 | S 190 | end; 191 | getOptions([expand_entities | T], S) -> 192 | getOptions(T, S#erlsom_sax_state{expand_entities = true}); 193 | getOptions([{expand_entities, V} | T], S) when is_boolean(V) -> 194 | getOptions(T, S#erlsom_sax_state{expand_entities = V}); 195 | getOptions([{output_encoding, V} | T], S) -> 196 | getOptions(T, S#erlsom_sax_state{output = V}); 197 | getOptions([{continuation_function, Cf, Cs} | T], S) when is_function(Cf) -> 198 | getOptions(T, S#erlsom_sax_state{continuation_fun = Cf, 199 | continuation_state = Cs}); 200 | getOptions([{encoding, V} | T], S) -> 201 | getOptions(T, S#erlsom_sax_state{encoding = list_to_atom(V)}); 202 | getOptions([{max_entity_depth, V} | T], S) when is_integer(V); V == infinity -> 203 | getOptions(T, S#erlsom_sax_state{max_entity_depth = V}); 204 | getOptions([{max_entity_size, V} | T], S) when is_integer(V); V == infinity -> 205 | getOptions(T, S#erlsom_sax_state{max_entity_size = V}); 206 | getOptions([{max_nr_of_entities, V} | T], S) 207 | when is_integer(V); V == infinity -> 208 | getOptions(T, S#erlsom_sax_state{max_nr_of_entities = V}); 209 | getOptions([{max_expanded_entity_size, V} | T], S) 210 | when is_integer(V); V == infinity -> 211 | getOptions(T, S#erlsom_sax_state{max_expanded_entity_size = V}). 212 | -------------------------------------------------------------------------------- /src/erlsom_sax_lib.erl: -------------------------------------------------------------------------------- 1 | %%% Copyright (C) 2006 - 2011 Willem de Jong 2 | %%% 3 | %%% This file is part of Erlsom. 4 | %%% 5 | %%% Erlsom is free software: you can redistribute it and/or modify 6 | %%% it under the terms of the GNU Lesser General Public License as 7 | %%% published by the Free Software Foundation, either version 3 of 8 | %%% the License, or (at your option) any later version. 9 | %%% 10 | %%% Erlsom is distributed in the hope that it will be useful, 11 | %%% but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | %%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | %%% GNU Lesser General Public License for more details. 14 | %%% 15 | %%% You should have received a copy of the GNU Lesser General Public 16 | %%% License along with Erlsom. If not, see 17 | %%% . 18 | %%% 19 | %%% Author contact: w.a.de.jong@gmail.com 20 | 21 | %%% ==================================================================== 22 | %%% A couple of functions used by erlsom_sax (for each encoding variant) 23 | %%% ==================================================================== 24 | 25 | %%% Version: 20-01-2008 26 | 27 | -module(erlsom_sax_lib). 28 | 29 | -include("erlsom_sax.hrl"). 30 | -export([test/0]). 31 | -export([findCycle/4]). 32 | -export([continueFun/3]). 33 | -export([continueFun/4]). 34 | -export([continueFun2/4]). 35 | -export([continueFun/5]). 36 | -export([continueFun/6]). 37 | -export([continueFun2/6]). 38 | -export([mapStartPrefixMappingCallback/3]). 39 | -export([mapEndPrefixMappingCallback/3]). 40 | -export([createStartTagEvent/3]). 41 | 42 | %% there are 4 variants of this function, with different numbers of arguments 43 | %% The names of the first arguments aren't really meaningful, they can 44 | %% be anything - they are only there to be passed to 'ParseFun'. 45 | continueFun(V1, V2, V3, T, State, ParseFun) -> 46 | {Tail, ContinuationState2} = 47 | (State#erlsom_sax_state.continuation_fun)(T, State#erlsom_sax_state.continuation_state), 48 | case Tail of 49 | T -> throw({error, "Malformed: Unexpected end of data"}); 50 | _ -> 51 | ParseFun(V1, V2, V3, Tail, 52 | State#erlsom_sax_state{continuation_state = ContinuationState2}) 53 | end. 54 | 55 | continueFun2(T, V1, V2, V3, State, ParseFun) -> 56 | {Tail, ContinuationState2} = 57 | (State#erlsom_sax_state.continuation_fun)(T, State#erlsom_sax_state.continuation_state), 58 | case Tail of 59 | T -> throw({error, "Malformed: Unexpected end of data"}); 60 | _ -> 61 | ParseFun(Tail, V1, V2, V3, 62 | State#erlsom_sax_state{continuation_state = ContinuationState2}) 63 | end. 64 | 65 | continueFun(Prefix, Head, T, State, ParseFun) -> 66 | {Tail, ContinuationState2} = 67 | (State#erlsom_sax_state.continuation_fun)(T, State#erlsom_sax_state.continuation_state), 68 | case Tail of 69 | T -> throw({error, "Malformed: Unexpected end of data"}); 70 | _ -> 71 | ParseFun(Prefix, Head, Tail, 72 | State#erlsom_sax_state{continuation_state = ContinuationState2}) 73 | end. 74 | 75 | continueFun(Head, T, State, ParseFun) -> 76 | {Tail, ContinuationState2} = 77 | (State#erlsom_sax_state.continuation_fun)(T, State#erlsom_sax_state.continuation_state), 78 | case Tail of 79 | T -> throw({error, "Malformed: Unexpected end of data"}); 80 | _ -> 81 | ParseFun(Head, Tail, 82 | State#erlsom_sax_state{continuation_state = ContinuationState2}) 83 | end. 84 | 85 | continueFun2(T, Head, State, ParseFun) -> 86 | {Tail, ContinuationState2} = 87 | (State#erlsom_sax_state.continuation_fun)(T, State#erlsom_sax_state.continuation_state), 88 | case Tail of 89 | T -> throw({error, "Malformed: Unexpected end of data"}); 90 | _ -> 91 | ParseFun(Tail, Head, 92 | State#erlsom_sax_state{continuation_state = ContinuationState2}) 93 | end. 94 | 95 | continueFun(T, State, ParseFun) -> 96 | {Tail, ContinuationState2} = 97 | (State#erlsom_sax_state.continuation_fun)(T, State#erlsom_sax_state.continuation_state), 98 | case Tail of 99 | T -> throw({error, "Malformed: Unexpected end of data"}); 100 | _ -> 101 | ParseFun(Tail, 102 | State#erlsom_sax_state{continuation_state = ContinuationState2}) 103 | end. 104 | 105 | 106 | %% function to call the Callback function for all elements in a list of 'new namespaces'. 107 | %% returns State 108 | mapStartPrefixMappingCallback([{Prefix, Uri} | Tail], State, Callback) -> 109 | mapStartPrefixMappingCallback(Tail, Callback({startPrefixMapping, Prefix, Uri}, State), Callback); 110 | mapStartPrefixMappingCallback([], State, _Callback) -> 111 | State. 112 | 113 | %% function to call the Callback function for all elements in a list of 'new namespaces'. 114 | %% returns State 115 | mapEndPrefixMappingCallback([{Prefix, _Uri} | Tail], State, Callback) -> 116 | mapEndPrefixMappingCallback(Tail, Callback({endPrefixMapping, Prefix}, State), Callback); 117 | mapEndPrefixMappingCallback([], State, _Callback) -> 118 | State. 119 | 120 | 121 | %% StartTag = {Prefix, LocalName, QualifiedName} 122 | %% Attributes = list of Attribute 123 | %% Attribute = {{Prefix, LocalName} Value} 124 | %% 125 | %% returns: {Name, Attributes2, NewNamespaces} 126 | %% Name = {URI, LocalName, QualifiedName} 127 | %% Attributes2 = list of Attribute2 128 | %% Attribute2 = #attribute 129 | %% NewNamespaces = list of {Prefix, URI} (prefix can be []). 130 | %% 131 | %% Namespaces are in such an order that namespace of the 'closest ancestors' 132 | %% are in front. That way the right element will be found, even if a prefix is 133 | %% used more than once in the document. 134 | %% 135 | createStartTagEvent(StartTag, Namespaces, Attributes) -> 136 | 137 | %% find the namespace definitions in the attributes 138 | {NewNamespaces, OtherAttributes} = lookForNamespaces([], [], Attributes), 139 | AllNamespaces = NewNamespaces ++ Namespaces, 140 | 141 | %% add the Uri to the tag name (if applicable) 142 | Name = tagNameTuple(StartTag, AllNamespaces), 143 | 144 | %% add the URIs to the attribute names (if applicable) 145 | Attributes2 = attributeNameTuples([], OtherAttributes, AllNamespaces), 146 | 147 | {Name, Attributes2, NewNamespaces}. 148 | 149 | %% returns {Namespaces, OtherAttributes}, where 150 | %% Namespaces = a list of tuples {Prefix, URI} 151 | %% OtherAttributes = a list of tuples {Name, Value} 152 | %% 153 | lookForNamespaces(Namespaces, OtherAttributes, [Head | Tail]) -> 154 | {{Prefix, LocalName, _QName}, Value} = Head, 155 | if 156 | Prefix == "xmlns" -> 157 | lookForNamespaces([{LocalName, Value} | Namespaces], 158 | OtherAttributes, Tail); 159 | Prefix == [], LocalName == "xmlns" -> 160 | lookForNamespaces([{[], Value} | Namespaces], 161 | OtherAttributes, Tail); 162 | true -> 163 | lookForNamespaces(Namespaces, [Head | OtherAttributes], Tail) 164 | end; 165 | 166 | lookForNamespaces(Namespaces, OtherAttributes, []) -> 167 | {Namespaces, OtherAttributes}. 168 | 169 | %% StartTag = {Prefix, LocalName, QualifiedName} 170 | %% Namespaces = list of {Prefix, URI} (prefix can be []). 171 | %% 172 | %% Returns {Uri, LocalName, Prefix} 173 | %% 174 | %% TODO: error if not found? special treatment of 'xml:lang'? 175 | tagNameTuple(StartTag, Namespaces) -> 176 | {Prefix, LocalName, _QName} = StartTag, 177 | case lists:keysearch(Prefix, 1, Namespaces) of 178 | {value, {Prefix, Uri}} -> {Uri, LocalName, Prefix}; 179 | false -> {[], LocalName, Prefix} 180 | end. 181 | 182 | 183 | %% Attributes = list of Attribute 184 | %% Attribute = {{Prefix, LocalName} Value} 185 | %% Namespaces = list of {Prefix, URI} (prefix can be []). 186 | %% 187 | %% Returns a list of #attribute records 188 | attributeNameTuples(ProcessedAttributes, 189 | [{AttributeName, Value} | Attributes], Namespaces) -> 190 | {Uri, LocalName, Prefix} = attributeNameTuple(AttributeName, Namespaces), 191 | attributeNameTuples([#attribute{localName= LocalName, 192 | prefix = Prefix, 193 | uri = Uri, 194 | value = Value} | ProcessedAttributes], 195 | Attributes, Namespaces); 196 | 197 | attributeNameTuples(ProcessedAttributes, [], _) -> 198 | ProcessedAttributes. 199 | 200 | %% AttributeName = {Prefix, LocalName, QualifiedName} 201 | %% Namespaces = list of {Prefix, URI} (prefix can be []). 202 | %% 203 | %% Returns {Uri, LocalName, Prefix}. 204 | %% Difference with TagNameTuple: attributes without prefix do NOT belong 205 | %% to the default namespace. 206 | attributeNameTuple(AttributeName, Namespaces) -> 207 | {Prefix, LocalName, _} = AttributeName, 208 | if 209 | Prefix == [] -> {[], LocalName, LocalName}; 210 | true -> 211 | case lists:keysearch(Prefix, 1, Namespaces) of 212 | {value, {Prefix, Uri}} -> 213 | {Uri, LocalName, Prefix}; 214 | false -> 215 | case Prefix of 216 | "xml" -> {"http://www.w3.org/XML/1998/namespace", LocalName, Prefix}; 217 | _ -> {[], LocalName, Prefix} 218 | end 219 | end 220 | end. 221 | 222 | %% simplistic function to find a cycle in a list [{a, b}, {b, c}, ...] 223 | %% or if there is a path longer than MaxDepth. 224 | %% The edge A, B is added; the rest of the graph is known 225 | %% to be acyclical. So we start from B (To) and look for a path 226 | %% to A (Current). 227 | findCycle(To, Current, Edges, MaxDepth) -> 228 | findCycle(To, Current, Edges, MaxDepth, 1). 229 | 230 | findCycle(_To, _Current, [], _MaxD, _CurrentD) -> 231 | false; 232 | findCycle(To, Current, Edges, MaxD, CurrentD) -> 233 | %% take the next edge from edge from Current 234 | case lists:keyfind(To, 1, Edges) of 235 | _ when MaxD == CurrentD -> 236 | max_depth; %% reached Max Depth 237 | false -> 238 | false; 239 | {_, Current} -> 240 | cycle; %% found a cycle 241 | {_, B} -> 242 | RemainingEdges = lists:keydelete(To, 1, Edges), 243 | case findCycle(B, Current, RemainingEdges, MaxD, CurrentD + 1) of 244 | false -> 245 | findCycle(To, Current, RemainingEdges, MaxD, CurrentD); 246 | Other -> 247 | Other 248 | end 249 | end. 250 | 251 | test() -> 252 | false = findCycle(b, a, [{a, b}], 2), 253 | max_depth = findCycle(b, a, [{a, b}, {b, c}], 2), 254 | false = findCycle(b, a, [{a, b}, {b, c}], 3), 255 | false = findCycle(b, a, [{a, b}, {b, c}, {c, d}, {c, e}, 256 | {c, f}, {c, g}, {f, q}, {f, r}, {f, s}, 257 | {g, z}], 12), 258 | cycle = findCycle(b, a, [{a, b}, {c, d}, {b, c}, {c, e}, 259 | {c, f}, {f, q}, {f, r}, {f, s}, {q, s}, 260 | {g, a}, {c, g}], 12), 261 | cycle = findCycle(b, a, [{a, b}, {b, c}, {c, d}, {c, e}, 262 | {c, a}, {c, g}, {f, q}, {f, r}, {f, s}, 263 | {g, a}], 12). 264 | 265 | -------------------------------------------------------------------------------- /examples/complex_form/erlsom_complex_form.erl: -------------------------------------------------------------------------------- 1 | %%% translate XML to the output format used by XMERL. 2 | %%% The output is not complete: some fields in the XMERL output records 3 | %%% are not populated. But is it enough to use the XPATH functions (at 4 | %%% least for the examples that I tried). 5 | %%% 6 | %%% Note: this hasn't been tested properly. See it as an example of how 7 | %%% the sax parser can be used. 8 | %%% 9 | -module(erlsom_complex_form). 10 | 11 | %% user interface 12 | -export([scan/1]). 13 | -export([scan_file/1]). 14 | %% with options 15 | -export([scan/2]). 16 | -export([scan_file/2]). 17 | 18 | -include_lib("erlsom/src/erlsom_sax.hrl"). 19 | 20 | %% The record definitions below are copied from xmerl hrl files! 21 | %% XML Element 22 | %% content = [#xmlElement()|#xmlText()|#xmlPI()|#xmlComment()|#xmlDecl()] 23 | -record(xmlElement,{ 24 | name, % atom() 25 | expanded_name = [], % string() | {URI,Local} | {"xmlns",Local} 26 | nsinfo = [], % {Prefix, Local} | [] 27 | namespace, 28 | parents = [], % [{atom(),integer()}] 29 | pos, % integer() 30 | attributes = [], % [#xmlAttribute()] 31 | content = [], 32 | language = "", % string() 33 | xmlbase="", % string() XML Base path, for relative URI:s 34 | elementdef=undeclared % atom(), one of [undeclared | prolog | external | element] 35 | }). 36 | 37 | %% plain text 38 | %% IOlist = [char() | binary () | IOlist] 39 | -record(xmlText,{ 40 | parents = [], % [{atom(),integer()}] 41 | pos, % integer() 42 | language = [], % inherits the element's language 43 | value, % IOlist() 44 | type = text % atom() one of (text|cdata) 45 | }). 46 | 47 | %% Attribute 48 | -record(xmlAttribute,{ 49 | name, % atom() 50 | expanded_name=[],% atom() | {string(),atom()} 51 | nsinfo = [], % {Prefix, Local} | [] 52 | namespace = [], % inherits the element's namespace 53 | parents = [], % [{atom(),integer()}] 54 | pos, % integer() 55 | language = [], % inherits the element's language 56 | value, % IOlist() | atom() | integer() 57 | normalized % atom() one of (true | false) 58 | }). 59 | 60 | %% namespace record 61 | -record(xmlNamespace,{ 62 | default = [], 63 | nodes = [] 64 | }). 65 | 66 | 67 | -record(sState, {stack = [], posStack = [], options}). 68 | 69 | scan_file(File) -> 70 | scan_file(File, []). 71 | 72 | scan_file(File, Options) -> 73 | case file:read_file(File) of 74 | {ok, Bin} -> 75 | scan(Bin, Options); 76 | Error -> 77 | Error 78 | end. 79 | 80 | scan(Xml) -> 81 | scan(Xml, []). 82 | 83 | scan(Xml, Options) -> 84 | erlsom:parse_sax(Xml, 85 | #sState{stack = []}, 86 | fun callback/2, Options). 87 | 88 | 89 | callback(Event, State) -> 90 | 91 | try 92 | case Event of 93 | startDocument -> 94 | case State of 95 | #sState{} -> 96 | State; 97 | _ -> 98 | #sState{stack = [], options = []} 99 | end; 100 | {startElement, _Uri, _LocalName, _Prefix, _Attributes} -> 101 | startElement(Event, State); 102 | {endElement, _Uri, _LocalName, _Prefix} -> 103 | endElement(Event, State); 104 | {characters, _Characters} -> 105 | characters(Event, State); 106 | {ignorableWhitespace, Characters} -> 107 | characters({characters, Characters}, State); 108 | {processingInstruction, _Target, _Data} -> State; 109 | {startPrefixMapping, _Prefix, _URI} -> 110 | State; 111 | {endPrefixMapping, _Prefix} -> 112 | State; 113 | endDocument -> 114 | case State of 115 | #sState{stack = [Root]} -> 116 | Root; 117 | _Else -> 118 | throw({error, "unexpected end"}) 119 | end; 120 | {error, Message} -> 121 | throw(Message); 122 | {'EXIT', Message} -> 123 | exit(Message) 124 | end 125 | catch 126 | error:Reason -> throwError(error, {Reason,erlang:get_stacktrace()}, Event, State); 127 | Class:Exception -> throwError(Class, Exception, Event, State) 128 | end. 129 | 130 | %% Stack contains the tree that is growing as the elements come in. 131 | %% [{root, [attributes], [element1, element2]}, 132 | %% {element3, [attributes], [element3.1, element3.2]}, 133 | %% {element3.3, [attributes], [element3.3.1]}] (but in reverse order...) 134 | 135 | %% Now with some added info that we need for the complex form: 136 | %% Stack contains the tree that is growing as the elements come in. 137 | %% [{root, SeqNo, [attributes], [element1, element2]}, 138 | %% {element3, SeqNo, [attributes], [element3.1, element3.2]}, 139 | %% {element3.3, SeqNo, [attributes], [element3.3.1]}] (but in reverse order...) 140 | 141 | 142 | %% When a startElement event comes in, add a new element to the stack: 143 | %% [{root, [attributes], [element1, element2]}, 144 | %% {element3, [attributes], [element3.1, element3.2]}, 145 | %% {element3.3, [attributes], [element3.3.1]}, 146 | %% {element3.3.2, [attributes], []}] 147 | 148 | %% When a textElement event comes in, insert it into the top element: 149 | %% [{root, [attributes], [element1, element2]}, 150 | %% {element3, [attributes], [element3.1, element3.2]}, 151 | %% {element3.3, [attributes], [element3.3.1, element3.3.2]}] 152 | %% {element3.3, [attributes], [element3.3.1]}, 153 | %% {element3.3.2, [attributes], [{#text, "the text"}]}] 154 | 155 | %% When an endElement comes in, insert the top element of the stack in the 156 | %% layer below it (its parent): 157 | %% [{root, [attributes], [element1, element2]}, 158 | %% {element3, [attributes], [element3.1, element3.2]}, 159 | %% {element3.3, [attributes], [element3.3.1, element3.3.2]}] 160 | 161 | startElement({startElement, Uri, LocalName, Prefix, Attributes}, 162 | State = #sState{stack = [], posStack = []}) -> 163 | Name = makeName(LocalName, Prefix), 164 | State#sState{stack = [#xmlElement{name = Name, 165 | expanded_name = makeExpandedName(Uri, LocalName), 166 | pos = 1, 167 | nsinfo = makeNsInfo(Prefix, LocalName), 168 | namespace = makeNs(Prefix, Uri, #xmlNamespace{}), 169 | parents = [], 170 | attributes = processAttributes(Attributes, State), 171 | content = []}], 172 | posStack = [0]}; 173 | 174 | startElement({startElement, Uri, LocalName, Prefix, Attributes}, 175 | State = #sState{stack = [Parent | _], posStack = [Pos | _]}) -> 176 | Name = makeName(LocalName, Prefix), 177 | State#sState{stack = [#xmlElement{name = Name, 178 | expanded_name = makeExpandedName(Uri, LocalName), 179 | pos = Pos + 1, 180 | nsinfo = makeNsInfo(Prefix, LocalName), 181 | namespace = makeNs(Prefix, Uri, Parent#xmlElement.namespace), 182 | parents = getParentsFromStack(State#sState.stack, []), 183 | attributes = processAttributes(Attributes, State), 184 | content = []} | State#sState.stack], 185 | posStack = [0 | State#sState.posStack]}. 186 | 187 | endElement({endElement, _Uri, _LocalName, _Prefix}, 188 | State = #sState{stack = [#xmlElement{content = Content} = Top]}) -> 189 | State#sState{stack = [Top#xmlElement{content = lists:reverse(Content)}]}; 190 | 191 | endElement({endElement, _Uri, _LocalName, _Prefix}, 192 | %%State) -> 193 | #sState{stack = [#xmlElement{content = ChildContent} = Child | 194 | [#xmlElement{content = ParentContent} = Parent | Tail]], 195 | posStack = [_NrOfChildEls | [NrOfElements | PosTail]]} = State) -> 196 | State#sState{stack = [Parent#xmlElement{content = [Child#xmlElement{content = lists:reverse(ChildContent)} | 197 | ParentContent]} | 198 | Tail], 199 | posStack = [NrOfElements + 1 | PosTail]}. 200 | 201 | characters({characters, Characters}, 202 | State = #sState{stack = [#xmlElement{content = [#xmlText{value = Text} = FirstPart | Rest]} = Element | Tail]}) -> 203 | State#sState{stack = [Element#xmlElement{content = [FirstPart#xmlText{value = Text ++ Characters} | Rest]} | Tail]}; 204 | 205 | characters({characters, Characters}, 206 | State = #sState{stack = [#xmlElement{content = Content} = Element | Tail], 207 | posStack = [NrOfElements | PosTail]}) -> 208 | State#sState{stack = [Element#xmlElement{content = [#xmlText{value = Characters, 209 | parents = getParentsFromStack(State#sState.stack, []), 210 | pos = NrOfElements + 1} | Content]} | Tail], 211 | posStack = [NrOfElements + 1 | PosTail]}. 212 | 213 | getParentsFromStack([], Acc) -> 214 | Acc; 215 | getParentsFromStack([#xmlElement{name = Name, pos = Pos} | Tail], Acc) -> 216 | getParentsFromStack(Tail, [{Name, Pos} | Acc]). 217 | 218 | processAttributes(Attributes, State) -> 219 | processAttributes(Attributes, State, 1, []). 220 | processAttributes([], _State, _Count, Acc) -> 221 | lists:reverse(Acc); 222 | processAttributes([#attribute{localName=LocalName, uri = Uri, prefix = Prefix, value=Value} | Tail], 223 | State, Count, Acc) -> 224 | processAttributes(Tail, State, Count + 1, [ 225 | #xmlAttribute{ 226 | name = makeName(LocalName, Prefix), 227 | expanded_name = makeExpandedName(Uri, LocalName), 228 | nsinfo = makeNsInfo(Prefix, LocalName), 229 | pos = Count, 230 | value = Value 231 | } | Acc]). 232 | 233 | throwError(Class, Exception, Event, 234 | #sState{stack = Stack}) -> 235 | %% "Error while parsing type " 236 | %% Take the ElementRecord at current state, and print the first element 237 | Message = [{exception, Exception}, 238 | %% for each of the elements in ResultSoFar, 239 | %% take the 'elementRecord' element and print the first element (the type). 240 | {stack, printStackTrace(Stack)}, 241 | %% "Received: " 242 | {received, Event}], 243 | case Class of 244 | 'error' -> exit({error, Message}); 245 | 'throw' -> throw({error, Message}); 246 | 'exit' -> exit({error, Message}) 247 | end; 248 | 249 | throwError(Class, Exception, _Event, 250 | _Something) -> 251 | case Class of 252 | 'error' -> exit({error, Exception}); 253 | 'throw' -> throw({error, Exception}); 254 | 'exit' -> exit({error, Exception}) 255 | end. 256 | 257 | printStackTrace(Stack) -> 258 | printStackTrace(Stack, []). 259 | printStackTrace([], Acc) -> 260 | Acc; 261 | printStackTrace([#xmlElement{name = Name} | Tail], Acc) -> 262 | printStackTrace(Tail, [{element, Name} | Acc]). 263 | 264 | makeName(Local, []) -> 265 | list_to_atom_or_not(Local); 266 | makeName(Local, Prefix) -> 267 | list_to_atom_or_not(Prefix ++ ":" ++ Local). 268 | 269 | makeNsInfo([], _) -> []; 270 | makeNsInfo(Prefix, Local) -> {Prefix, Local}. 271 | 272 | makeNs(_Prefix, [], Ns) -> 273 | Ns; 274 | makeNs(Prefix, Uri, #xmlNamespace{nodes = Nodes} = ParentNs) -> 275 | ParentNs#xmlNamespace{nodes = Nodes ++ [{Prefix, list_to_atom_or_not(Uri)}]}. 276 | 277 | % string() | {URI,Local} | {"xmlns",Local} 278 | makeExpandedName([], Local) -> 279 | list_to_atom_or_not(Local); 280 | makeExpandedName(Uri, Local) -> 281 | {list_to_atom_or_not(Uri), list_to_atom_or_not(Local)}. 282 | 283 | list_to_atom_or_not(String) -> 284 | try list_to_atom(String) 285 | catch 286 | _:_ -> String 287 | end. 288 | 289 | -------------------------------------------------------------------------------- /src/erlsom_ucs.erl: -------------------------------------------------------------------------------- 1 | %%% -*- Erlang -*- 2 | %%%------------------------------------------------------------------- 3 | %%% Author: Lon Willett 4 | %%% 5 | %%% Description: Some minimal support for encoding, decoding, and 6 | %%% manipulating strings of ISO-10646 characters (i.e. Unicode). 7 | %%%------------------------------------------------------------------- 8 | 9 | 10 | %% NOTICE: This is just an excerpt of the original ucs application 11 | 12 | %% This is a copy from xmerl_ucs, but it has 13 | %% been modified to handle the case that a block of data ends in the middle 14 | %% of a group of bytes that make up 1 character. In such a case the 15 | %% bytes that belong to the incomplete character are passed back, so that 16 | %% they can be put in front of the next block of data. 17 | 18 | %% the function 'to_utf8' is an exact copy. 19 | 20 | -module(erlsom_ucs). 21 | -author('Lon.Willett@sse.ie'). 22 | -modified_by('johan.blom@mobilearts.se'). 23 | -modified_by('w.a.de.jong@gmail.com'). 24 | -compile([verbose,report_warnings,warn_unused_vars]). 25 | 26 | 27 | -export([to_utf8/1, from_utf8/1]). 28 | -export([decode_utf8/1]). 29 | -export([char_to_utf8/1]). 30 | 31 | -export([from_utf16be/1, from_utf16le/1]). 32 | 33 | %% TODO: relpace this by something a bit more efficient 34 | decode_utf8(Utf8) -> 35 | case from_utf8(Utf8) of 36 | {String, []} -> String; 37 | _ -> error 38 | end. 39 | 40 | 41 | %% from_utf8([Byte]) -> {[UnicodeChar], Tail} 42 | %% Decode UTF-8 encoded character-strings. 43 | %% 44 | %% Modification (WdJ): Added an output parameter (Tail): 45 | %% If the string ends in the middle of a character, the bytes 46 | %% of that incomplete character are returned (if not, the new 47 | %% parameter has value []). 48 | %% The goal is to allow parsing of data in arbitrary blocks. 49 | 50 | from_utf8(Bin) when is_binary(Bin) -> 51 | from_utf8(binary_to_list(Bin)); 52 | 53 | from_utf8(List) -> 54 | case expand_utf8(List) of 55 | {Result, Rest, 0} -> 56 | %% case Rest of 57 | %% [] -> ok; 58 | %% _ -> io:format("Rest: ~p~n", [Rest]) 59 | %% end, 60 | {Result, Rest}; 61 | {_Res,_Rest, _NumBadChar} -> 62 | exit({ucs,{bad_utf8_character_code}}) 63 | end. 64 | 65 | %% expand_utf8([Byte]) -> {[UnicodeChar], Tail, NumberOfBadBytes} 66 | %% Expand UTF8 byte sequences to ISO 10646/Unicode 67 | %% characters. Any illegal bytes are removed and the number of 68 | %% bad bytes are returned. 69 | %% 70 | %% Modification (WdJ): Added an output parameter (Tail): 71 | %% If the string ends in the middle of a character, the bytes 72 | %% of that incomplete character are returned (if not, the new 73 | %% parameter has value []). 74 | %% The goal is to allow parsing of data in arbitrary blocks. 75 | %% 76 | %% Reference: 77 | %% RFC 3629: "UTF-8, a transformation format of ISO 10646". 78 | expand_utf8(Str) -> 79 | expand_utf8_1(Str, [], 0). 80 | 81 | expand_utf8_1([C|Cs], Acc, Bad) when C < 16#80 -> 82 | %% Plain Ascii character. 83 | expand_utf8_1(Cs, [C|Acc], Bad); 84 | expand_utf8_1([C1,C2|Cs], Acc, Bad) when C1 band 16#E0 =:= 16#C0, 85 | C2 band 16#C0 =:= 16#80 -> 86 | case ((C1 band 16#1F) bsl 6) bor (C2 band 16#3F) of 87 | C when 16#80 =< C -> 88 | expand_utf8_1(Cs, [C|Acc], Bad); 89 | _ -> 90 | %% Bad range. 91 | expand_utf8_1(Cs, Acc, Bad+1) 92 | end; 93 | expand_utf8_1([C1], Acc, Bad) when C1 band 16#E0 =:= 16#C0 -> 94 | {lists:reverse(Acc), [C1], Bad}; 95 | expand_utf8_1([C1,C2,C3|Cs], Acc, Bad) when C1 band 16#F0 =:= 16#E0, 96 | C2 band 16#C0 =:= 16#80, 97 | C3 band 16#C0 =:= 16#80 -> 98 | case ((((C1 band 16#0F) bsl 6) bor (C2 band 16#3F)) bsl 6) bor 99 | (C3 band 16#3F) of 100 | C when 16#800 =< C -> 101 | expand_utf8_1(Cs, [C|Acc], Bad); 102 | _ -> 103 | %% Bad range. 104 | expand_utf8_1(Cs, Acc, Bad+1) 105 | end; 106 | expand_utf8_1([C1], Acc, Bad) when C1 band 16#F0 =:= 16#E0 -> 107 | {lists:reverse(Acc), [C1], Bad}; 108 | expand_utf8_1([C1,C2], Acc, Bad) when C1 band 16#F0 =:= 16#E0, 109 | C2 band 16#C0 =:= 16#80 -> 110 | {lists:reverse(Acc), [C1, C2], Bad}; 111 | expand_utf8_1([C1,C2,C3,C4|Cs], Acc, Bad) when C1 band 16#F8 =:= 16#F0, 112 | C2 band 16#C0 =:= 16#80, 113 | C3 band 16#C0 =:= 16#80, 114 | C4 band 16#C0 =:= 16#80 -> 115 | case ((((((C1 band 16#0F) bsl 6) bor (C2 band 16#3F)) bsl 6) bor 116 | (C3 band 16#3F)) bsl 6) bor (C4 band 16#3F) of 117 | C when 16#10000 =< C -> 118 | expand_utf8_1(Cs, [C|Acc], Bad); 119 | _ -> 120 | %% Bad range. 121 | expand_utf8_1(Cs, Acc, Bad+1) 122 | end; 123 | expand_utf8_1([C1], Acc, Bad) when C1 band 16#F8 =:= 16#F0 -> 124 | {lists:reverse(Acc), [C1], Bad}; 125 | expand_utf8_1([C1,C2], Acc, Bad) when C1 band 16#F8 =:= 16#F0, 126 | C2 band 16#C0 =:= 16#80 -> 127 | {lists:reverse(Acc), [C1, C2], Bad}; 128 | expand_utf8_1([C1,C2,C3], Acc, Bad) when C1 band 16#F8 =:= 16#F0, 129 | C2 band 16#C0 =:= 16#80, 130 | C3 band 16#C0 =:= 16#80 -> 131 | {lists:reverse(Acc), [C1, C2, C3], Bad}; 132 | expand_utf8_1([_Bad|Cs], Acc, Bad) -> 133 | %% Ignore bad character. 134 | expand_utf8_1(Cs, Acc, Bad+1); 135 | expand_utf8_1([], Acc, Bad) -> {lists:reverse(Acc), [], Bad}. 136 | 137 | %% from_utf16be(List) -> {[UnicodeChar], Tail, NumberOfBadBytes} 138 | %% Expand UTF16 byte sequences to ISO 10646/Unicode 139 | %% characters. Any illegal bytes are removed and the number of 140 | %% bad bytes are returned. 141 | %% 142 | %% Modification (WdJ): Added an output parameter (Tail): 143 | %% If the string ends in the middle of a character, the bytes 144 | %% of that incomplete character are returned (if not, the new 145 | %% parameter has value <<>>). 146 | %% The goal is to allow parsing of data in arbitrary blocks. 147 | %% Also: changed to work on lists in stead of binaries. 148 | from_utf16be(Bin) when is_binary(Bin) -> from_utf16be(binary_to_list(Bin),[]); 149 | from_utf16be(List) -> from_utf16be(List,[]). 150 | 151 | from_utf16be([_Byte] = Rest, Acc) -> 152 | {lists:reverse(Acc), Rest}; 153 | %% from_utf16be(<>, Acc) 154 | %% when Ch < 16#D800; Ch > 16#DFFF -> 155 | %% if Ch < 16#FFFE -> from_utf16be(Rest,[Ch|Acc]) end; 156 | from_utf16be([Byte1, Byte2 | Rest], Acc) 157 | when Byte1 < 16#D8; Byte1 > 16#DF -> 158 | Ch = Byte1 * 256 + Byte2, 159 | if Ch < 16#FFFE -> from_utf16be(Rest,[Ch|Acc]) end; 160 | %% from_utf16be(<>, Acc) 162 | %% when Hi >= 16#D800, Hi < 16#DC00, Lo >= 16#DC00, Lo =< 16#DFFF -> 163 | from_utf16be([Hi1, Hi2, Lo1, Lo2 | Rest], Acc) 164 | when Hi1 >= 16#D8, Hi1 < 16#DC, Lo1 >= 16#DC, Lo1 < 16#E0 -> 165 | %% Surrogate pair 166 | Hi = Hi1 * 256 + Hi2, 167 | Lo = Lo1 * 256 + Lo2, 168 | Ch = ((Hi band 16#3FF) bsl 10) + (Lo band 16#3FF) + 16#10000, 169 | from_utf16be(Rest, [Ch|Acc]); 170 | from_utf16be([Hi1, _Hi2] = Rest, Acc) 171 | when Hi1 >= 16#D8, Hi1 < 16#DC -> 172 | %% Surrogate pair, incomplete 173 | {lists:reverse(Acc), Rest}; 174 | from_utf16be([Hi1, _Hi2, _Byte] = Rest, Acc) 175 | when Hi1 >= 16#D8, Hi1 < 16#DC -> 176 | %% Surrogate pair, incomplete 177 | {lists:reverse(Acc), Rest}; 178 | from_utf16be([],Acc) -> 179 | {lists:reverse(Acc), []}; 180 | from_utf16be(_List,_Acc) -> 181 | {error,not_utf16be}. 182 | 183 | from_utf16le(Bin) when is_binary(Bin) -> from_utf16le(binary_to_list(Bin),[]); 184 | from_utf16le(List) -> from_utf16le(List,[]). 185 | 186 | from_utf16le([_Byte] = Rest, Acc) -> 187 | {lists:reverse(Acc), Rest}; 188 | %% from_utf16le(<>, Acc) 189 | %% when Ch < 16#D800; Ch > 16#DFFF -> 190 | %% if Ch < 16#FFFE -> from_utf16le(Rest, [Ch|Acc]) end; 191 | from_utf16le([Byte1, Byte2 | Rest], Acc) 192 | when Byte2 < 16#D8; Byte2 > 16#DF -> 193 | Ch = Byte2 * 256 + Byte1, 194 | if Ch < 16#FFFE -> from_utf16le(Rest,[Ch|Acc]) end; 195 | %% from_utf16le(<>, Acc) 197 | %% when Hi >= 16#D800, Hi < 16#DC00, Lo >= 16#DC00, Lo =< 16#DFFF -> 198 | %% %% Surrogate pair 199 | %% Ch = ((Hi band 16#3FF) bsl 10) + (Lo band 16#3FF) + 16#10000, 200 | %% from_utf16le(Rest, [Ch|Acc]); 201 | from_utf16le([Hi1, Hi2, Lo1, Lo2 | Rest], Acc) 202 | when Hi2 >= 16#D8, Hi2 < 16#DC, Lo2 >= 16#DC, Lo2 < 16#E0 -> 203 | %% Surrogate pair 204 | Hi = Hi2 * 256 + Hi1, 205 | Lo = Lo2 * 256 + Lo1, 206 | Ch = ((Hi band 16#3FF) bsl 10) + (Lo band 16#3FF) + 16#10000, 207 | from_utf16le(Rest, [Ch|Acc]); 208 | %% from_utf16le(<> = Rest, Acc) 209 | %% when Hi >= 16#D800, Hi < 16#DC00 -> 210 | %% %% Surrogate pair, incomplete 211 | %% {lists:reverse(Acc), Rest}; 212 | from_utf16le([_Hi1, Hi2] = Rest, Acc) 213 | when Hi2 >= 16#D8, Hi2 < 16#DC -> 214 | %% Surrogate pair, incomplete 215 | {lists:reverse(Acc), Rest}; 216 | %% from_utf16le(<> = Rest, Acc) 217 | %% when Hi >= 16#D800, Hi < 16#DC00 -> 218 | %% %% Surrogate pair, incomplete 219 | %% {lists:reverse(Acc), Rest}; 220 | from_utf16le([_Hi1, Hi2, _Byte] = Rest, Acc) 221 | when Hi2 >= 16#D8, Hi2 < 16#DC -> 222 | %% Surrogate pair, incomplete 223 | {lists:reverse(Acc), Rest}; 224 | from_utf16le([],Acc) -> 225 | {lists:reverse(Acc), []}; 226 | from_utf16le(_Bin,_Acc) -> 227 | {error,not_utf16le}. 228 | 229 | %%% UTF-8 encoding and decoding 230 | %% TODO: isn't this very inefficient? Building all these lists? 231 | to_utf8(List) when is_list(List) -> lists:flatmap(fun to_utf8/1, List); 232 | to_utf8(Ch) -> char_to_utf8_list(Ch). 233 | 234 | %% TODO: this is probably not the best way to do this. 235 | char_to_utf8(Char) -> 236 | list_to_binary(char_to_utf8_list(Char)). 237 | 238 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 239 | %%% UTF-8 support 240 | %%% Possible errors encoding UTF-8: 241 | %%% - Non-character values (something other than 0 .. 2^31-1). 242 | %%% - Surrogate pair code in string. 243 | %%% - 16#FFFE or 16#FFFF character in string. 244 | %%% Possible errors decoding UTF-8: 245 | %%% - 10xxxxxx or 1111111x as initial byte. 246 | %%% - Insufficient number of 10xxxxxx octets following an initial octet of 247 | %%% multi-octet sequence. 248 | %%% - Non-canonical encoding used. 249 | %%% - Surrogate-pair code encoded as UTF-8. 250 | %%% - 16#FFFE or 16#FFFF character in string. 251 | char_to_utf8_list(Ch) when is_integer(Ch), Ch >= 0 -> 252 | if Ch < 128 -> 253 | %% 0yyyyyyy 254 | [Ch]; 255 | Ch < 16#800 -> 256 | %% 110xxxxy 10yyyyyy 257 | [16#C0 + (Ch bsr 6), 258 | 128+(Ch band 16#3F)]; 259 | Ch < 16#10000 -> 260 | %% 1110xxxx 10xyyyyy 10yyyyyy 261 | if Ch < 16#D800; Ch > 16#DFFF, Ch < 16#FFFE -> 262 | [16#E0 + (Ch bsr 12), 263 | 128+((Ch bsr 6) band 16#3F), 264 | 128+(Ch band 16#3F)] 265 | end; 266 | Ch < 16#200000 -> 267 | %% 11110xxx 10xxyyyy 10yyyyyy 10yyyyyy 268 | [16#F0+(Ch bsr 18), 269 | 128+((Ch bsr 12) band 16#3F), 270 | 128+((Ch bsr 6) band 16#3F), 271 | 128+(Ch band 16#3F)]; 272 | Ch < 16#4000000 -> 273 | %% 111110xx 10xxxyyy 10yyyyyy 10yyyyyy 10yyyyyy 274 | [16#F8+(Ch bsr 24), 275 | 128+((Ch bsr 18) band 16#3F), 276 | 128+((Ch bsr 12) band 16#3F), 277 | 128+((Ch bsr 6) band 16#3F), 278 | 128+(Ch band 16#3F)]; 279 | Ch < 16#80000000 -> 280 | %% 1111110x 10xxxxyy 10yyyyyy 10yyyyyy 10yyyyyy 10yyyyyy 281 | [16#FC+(Ch bsr 30), 282 | 128+((Ch bsr 24) band 16#3F), 283 | 128+((Ch bsr 18) band 16#3F), 284 | 128+((Ch bsr 12) band 16#3F), 285 | 128+((Ch bsr 6) band 16#3F), 286 | 128+(Ch band 16#3F)] 287 | end. 288 | 289 | -------------------------------------------------------------------------------- /src/erlsom_writeHrl.erl: -------------------------------------------------------------------------------- 1 | %%% Copyright (C) 2006 - 2008 Willem de Jong 2 | %%% 3 | %%% This file is part of Erlsom. 4 | %%% 5 | %%% Erlsom is free software: you can redistribute it and/or modify 6 | %%% it under the terms of the GNU Lesser General Public License as 7 | %%% published by the Free Software Foundation, either version 3 of 8 | %%% the License, or (at your option) any later version. 9 | %%% 10 | %%% Erlsom is distributed in the hope that it will be useful, 11 | %%% but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | %%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | %%% GNU Lesser General Public License for more details. 14 | %%% 15 | %%% You should have received a copy of the GNU Lesser General Public 16 | %%% License along with Erlsom. If not, see 17 | %%% . 18 | %%% 19 | %%% Author contact: w.a.de.jong@gmail.com 20 | 21 | %%% ==================================================================== 22 | %%% Writes record definitions, to be used with Erlsom. 23 | %%% ==================================================================== 24 | 25 | %%% Writes record definitions, taking a 'model' (from erlsom_compile) as 26 | %%% input. 27 | 28 | -module(erlsom_writeHrl). 29 | -export([writeHrl/1, writeHrl/2]). 30 | -export([write_hrl/1, write_hrl/2]). 31 | -export([writeHrlFile/3]). 32 | -export([writeXsdHrlFile/2]). 33 | 34 | -include("erlsom_parse.hrl"). 35 | -include("erlsom.hrl"). 36 | 37 | -type hrl_header() :: iolist(). %% only explanatory text (comment) 38 | -type hrl_types() :: iolist(). %% the actual record and type declarations 39 | 40 | %% debug(Text) -> io:format("writeHrl: ~p~n", [Text]). 41 | 42 | %% debug(Text1, Text2) -> 43 | %% io:format("~p ~p~n", [Text1, Text2]). 44 | 45 | -spec write_hrl(Model::erlsom:model()) -> {hrl_header(), hrl_types()}. 46 | write_hrl(Model) -> 47 | write_hrl(Model, []). 48 | 49 | -spec write_hrl(Model::erlsom:model(), Options :: list()) -> {hrl_header(), hrl_types()}. 50 | write_hrl(#model{tps = Types, th = TypeHierarchy, any_attribs = AnyAtts}, Options) -> 51 | erlang:put(erlsom_attribute_hrl_prefix, proplists:get_value(attribute_hrl_prefix, Options, "")), 52 | {header(), writeTypes(Types, TypeHierarchy, AnyAtts)}. 53 | 54 | writeHrl(Model) -> 55 | writeHrl(Model, []). 56 | 57 | writeHrl(#model{} = Model, Options) -> 58 | {Header, Types} = write_hrl(Model, Options), 59 | [Header, Types]. 60 | 61 | writeHrlFile(Xsd, Prefix, Namespaces) -> 62 | %% compile file 63 | Result = erlsom:compile(Xsd, Prefix, Namespaces), 64 | case Result of 65 | {ok, Model} -> 66 | writeHrl(Model); 67 | {error, Error} -> 68 | io:format("Error while compiling file: ~p~n", [Error]) 69 | end. 70 | 71 | writeXsdHrlFile(Xsd, Options) -> 72 | %% compile file 73 | Result = erlsom:compile_xsd(Xsd, Options), 74 | case Result of 75 | {ok, Model} -> 76 | writeHrl(Model, Options); 77 | {error, Error} -> 78 | throw({error, Error}) 79 | end. 80 | 81 | header() -> 82 | "%% HRL file generated by ERLSOM\n" 83 | "%%\n" 84 | "%% It is possible (and in some cases necessary) to change the name of\n" 85 | "%% the record fields.\n" 86 | "%%\n" 87 | "%% It is possible to add default values, but be aware that these will\n" 88 | "%% only be used when *writing* an xml document.\n\n" 89 | "\n". 90 | 91 | standard_types(AnyAtts) -> 92 | case AnyAtts of 93 | true -> 94 | "-ifndef(ERLSOM_ANY_ATTRIB_TYPES).\n" 95 | "-define(ERLSOM_ANY_ATTRIB_TYPES, true).\n" 96 | "-type anyAttrib() :: {{string(), %% name of the attribute\n" 97 | " string()}, %% namespace\n" 98 | " string()}. %% value\n" 99 | "\n" 100 | "-type anyAttribs() :: [anyAttrib()] | undefined.\n" 101 | "-endif.\n" 102 | "\n"; 103 | _ -> 104 | "" 105 | end ++ 106 | "-ifndef(ERLSOM_QNAME_TYPES).\n" 107 | "-define(ERLSOM_QNAME_TYPES, true).\n" 108 | "%% xsd:QName values are translated to #qname{} records.\n" 109 | "-record(qname, {uri :: string(),\n" 110 | " localPart :: string(),\n" 111 | " prefix :: string(),\n" 112 | " mappedPrefix :: string()}).\n" 113 | "-endif.\n". 114 | 115 | writeTypes(Types, TypeHierarchy, AnyAtts) -> 116 | [standard_types(AnyAtts), [writeType(T, TypeHierarchy, AnyAtts) || T <- Types]]. 117 | 118 | writeType(#type{nm = '_document'}, _, _) -> 119 | []; 120 | writeType(#type{nm = Name, els = Elements, atts = Attributes, mxd = Mixed}, 121 | Hierarchy, AnyAtts) -> 122 | Format = "~3n-record(~p, {~s})." ++ 123 | "~2n-type ~s :: ~s.", 124 | Fields = [case AnyAtts of 125 | true -> 126 | "anyAttribs :: anyAttribs()"; 127 | _ -> 128 | [] 129 | end, 130 | writeAttributes(Attributes), 131 | writeElements(Elements, Mixed, Hierarchy)], 132 | Args = [Name, add_commas(Fields), 133 | formatType(Name), formatRecord(Name)], 134 | lists:flatten(io_lib:format(Format, Args)). 135 | 136 | add_commas(Parts) -> 137 | string:join(lists:filter(fun(S) -> S /= "" end, Parts), ","). 138 | 139 | %% writeElements(Elements, Mixed, Hierarchy) -> 140 | %% writeElements(Elements, Mixed, Hierarchy, 0). 141 | 142 | %% writeElements([], _Mixed, _Hierarchy, _) -> 143 | %% []; 144 | %% writeElements([Element | Tail], Mixed, Hierarchy, CountChoices) -> 145 | %% {Elem, CountChoices2} = writeElement(Element, Mixed, Hierarchy, CountChoices), 146 | %% NextElems = writeElements(Tail, Mixed, Hierarchy, CountChoices2), 147 | %% [",\n\t", Elem, NextElems]. 148 | 149 | writeElements(Elements, Mixed, Hierarchy) -> 150 | WriteFun = fun(Elem, AccIn) -> 151 | {Acc, CountChoices} = AccIn, 152 | {ElString, CountChoices2} = writeElement(Elem, Mixed, Hierarchy, CountChoices), 153 | {[ElString | Acc], CountChoices2} 154 | end, 155 | {Result, _} = lists:foldl(WriteFun, {[], 0}, Elements), 156 | string:join(lists:reverse(Result), ","). 157 | 158 | 159 | writeElement(#el{alts = Alternatives, mn = Min, mx = Max, nillable = Nillable}, Mixed, Hierarchy, CountChoices) -> 160 | {Label, Types, Count2} = case Mixed of 161 | true -> 162 | writeAlternatives(Alternatives, 1, 1, false, Hierarchy, CountChoices); 163 | _ -> 164 | writeAlternatives(Alternatives, Min, Max, Nillable, Hierarchy, CountChoices) 165 | end, 166 | TypeSpec = case Mixed of 167 | true -> 168 | ["\n\t", Label, "[", Types, " | string()]"]; 169 | _ -> 170 | ["\n\t", Label, Types] 171 | end, 172 | {lists:flatten(TypeSpec), Count2}. 173 | 174 | 175 | %% returns {Label (including " :: "), Type, CountChoices} 176 | %% 177 | %% more than 1 alternative: a choice 178 | writeAlternatives(Alts, Min, Max, _N, Hierarchy, CountChoices) when length(Alts) > 1 -> 179 | Label = case CountChoices of 180 | 0 -> 181 | "choice :: "; 182 | _ -> 183 | ["choice", integer_to_list(CountChoices), " :: "] 184 | end, 185 | Alternatives = case lists:keyfind('#any', #alt.tag, Alts) of 186 | false -> 187 | [writeAlternative(A, 1, 1, false, Hierarchy) || A <- Alts]; 188 | Alt -> 189 | %% it makes no sense to have a choice between many things if 190 | %% one of them is "any()" - in that case the any() suffices. 191 | [writeAlternative(Alt, 1, 1, false, Hierarchy)] 192 | end, 193 | Type = lists:flatten([minMaxType(string:join(Alternatives, " | "), 194 | Min, Max, 1, false, simple)]), 195 | {Label, Type, CountChoices + 1}; 196 | %% 1 alternative (not a choice) 197 | writeAlternatives([#alt{tag = Tag, tp = Tp, rl=Rl} = Alt], Min, Max, Nillable, Hierarchy, CountChoices) -> 198 | LabelAtom = case Rl of 199 | true -> 200 | %% erlsom_lib:nameWithoutPrefix(atom_to_list(Tag)); 201 | baseName(Tag); 202 | _ when Rl == false; Rl == simple -> 203 | case Tp of 204 | {'#PCDATA', _} -> 205 | Tag; 206 | _ -> 207 | Tp 208 | end 209 | end, 210 | Label = io_lib:format("~p :: ", [LabelAtom]), 211 | Type = writeAlternative(Alt, Min, Max, Nillable, Hierarchy), 212 | {Label, Type, CountChoices}. 213 | 214 | %% alternative (not a choice), 'real' element (not a group) 215 | writeAlternative(#alt{tag = '#any'}, _, _, _, _) -> 216 | "any()"; 217 | writeAlternative(#alt{rl = true, tp = {Tp1, Tp2}, mx = Max2}, Min, Max, Nillable, _H) -> 218 | formatSimpleType(Tp1, Tp2, Min, Max, Max2, Nillable); 219 | writeAlternative(#alt{rl = Rl, tp = Type, mx = Max2}, Min, Max, Nillable, Hierarchy) 220 | when Rl == true; Rl == simple -> 221 | %% The type could be abstract, in that case put the 'leaves' of the type hierarchy 222 | case erlsom_lib:getDescendants(Type, Hierarchy) of 223 | [] -> 224 | formatListType(Type, Min, Max, Max2, Nillable); 225 | Leaves -> 226 | minMaxType(string:join([formatType(L) || L <- [Type | Leaves]], " | "), 227 | Min, Max, Max2, Nillable, false) 228 | end; 229 | %% simpleContent type 230 | writeAlternative(#alt{rl = false, tp = {Tp1, Tp2}, mx=Max2}, Min, Max, Nillable, _H) -> 231 | formatSimpleType(Tp1, Tp2, Min, Max, Max2, Nillable); 232 | %% group type 233 | writeAlternative(#alt{rl = false, tp=Tp, mx=Max2}, Min, Max, Nillable, _H) -> 234 | formatListType(Tp, Min, Max, Max2, Nillable). 235 | 236 | 237 | formatRecord(Type) -> 238 | io_lib:format("#~p{}", [Type]). 239 | 240 | formatType('#ANY') -> 241 | "any()"; 242 | formatType(Type) -> 243 | io_lib:format("~p()", [Type]). 244 | 245 | %% TODO: delete the flatten call. 246 | writeAttributes(Attributes) -> 247 | string:join(lists:map(fun writeAttribute/1, Attributes), ","). 248 | 249 | 250 | -spec writeAttribute(#att{}) -> Acc when Acc :: list(). 251 | 252 | writeAttribute(#att{nm = Name, opt = Optional, tp = Type}) -> 253 | OptOrReq = if Optional -> " | undefined"; true -> "" end, 254 | AttrPrefix = erlang:get(erlsom_attribute_hrl_prefix), 255 | AttrName = list_to_atom(AttrPrefix ++ atom_to_list(baseName(Name))), 256 | Format = "~n\t~p :: ~s~s", 257 | lists:flatten(io_lib:format(Format, [AttrName, makeType(Type), OptOrReq])). 258 | 259 | %% the names of the fields should not have the prefix 260 | baseName(Atom) when is_atom(Atom) -> 261 | String = atom_to_list(Atom), 262 | String_no_prefix = case string:tokens(String, ":") of 263 | [_Prefix, Name] -> 264 | Name; 265 | _ -> 266 | String 267 | end, 268 | list_to_atom(String_no_prefix). 269 | 270 | formatSimpleType(Tp1, Tp2, Min, Max, Max2, Nullable) -> 271 | Type = simpleType(Tp1, Tp2), 272 | minMaxType(Type, Min, Max, Max2, Nullable, true). 273 | 274 | minMaxType(Type, Min, Max, Max2, Nullable, Simple) -> 275 | Optional = if 276 | Min == 0 -> 277 | " | undefined"; 278 | true -> "" 279 | end, 280 | {Bracket1, Bracket2} = if 281 | Max == 1 -> 282 | {"", ""}; 283 | true -> 284 | {"[", "]"} 285 | end, 286 | {BracketA, BracketB} = if 287 | Max2 == 1 -> 288 | {"", ""}; 289 | true -> 290 | {"[", "]"} 291 | end, 292 | NullAlternative = case {Nullable, Simple} of 293 | {true, true} -> 294 | " | nil"; 295 | {true, false} -> 296 | [" | {nil, ", Type, "}"]; 297 | _ -> 298 | "" 299 | end, 300 | lists:flatten([ Bracket1 301 | , BracketA 302 | , Type 303 | , NullAlternative 304 | , BracketB 305 | , Bracket2 306 | , Optional 307 | ]). 308 | 309 | simpleType(_, Type) -> makeType(Type). 310 | 311 | makeType(char) -> "string()"; 312 | makeType(integer) -> "integer()"; 313 | makeType({integer, negativeInteger}) -> "neg_integer()"; 314 | makeType({integer, positiveInteger}) -> "pos_integer()"; 315 | makeType({integer, nonPositiveInteger}) -> "neg_integer() | 0"; 316 | makeType({integer, Non_neg}) 317 | when Non_neg == nonNegativeInteger; 318 | Non_neg == unsignedLong; 319 | Non_neg == unsignedInt; 320 | Non_neg == unsignedShort; 321 | Non_neg == unsignedByte -> "non_neg_integer()"; 322 | makeType({integer, _}) -> "integer()"; 323 | makeType(bool) -> "boolean()"; 324 | makeType(float) -> "float() | 'NaN' | 'INF' | '-INF'"; 325 | makeType(qname) -> "#qname{}". 326 | 327 | formatListType(Type, Min, Max, Max2, Nullable) -> 328 | TypeAsString = formatType(Type), 329 | minMaxType(TypeAsString, Min, Max, Max2, Nullable, false). 330 | -------------------------------------------------------------------------------- /src/erlsom_example_value.erl: -------------------------------------------------------------------------------- 1 | -module(erlsom_example_value). 2 | 3 | %% output code that makes an example value for a type, using an erlsom model as input. 4 | %% 5 | %% example: 6 | %% #'p:Transaction'{ 7 | %% 'TransactionID' = "?", 8 | %% 'SessionID' = "?", 9 | %% 'CurrencyID' = "?", 10 | %% 'Value' = "?"} 11 | %% 12 | %% In order to be able to embed the result it must be possible to specificy 13 | %% indentation. 14 | 15 | -include("erlsom_parse.hrl"). 16 | 17 | -export([from_model/2]). 18 | -export([from_model/3]). 19 | -export([test/1]). 20 | 21 | -type options() :: [option()]. 22 | -type option() :: {indent, integer()} | {indent_level, integer()}. 23 | 24 | -record(e_state, { 25 | indent :: integer(), 26 | level :: integer(), 27 | choice_depth = 0 :: integer() 28 | }). 29 | 30 | test(File) -> 31 | Options = [{include_any_attribs, false}], 32 | % generate hrl file, store in test_example.hrl 33 | erlsom:write_xsd_hrl_file(File, "test_example.hrl", Options), 34 | {ok, Model} = erlsom:compile_xsd_file(File, Options), 35 | % Pick a type from the model 36 | #model{tps = [#type{nm = '_document', els=Elements} | _]} = Model, 37 | [#el{alts = [#alt{tp = Type} | _]} | _] = Elements, 38 | % generate an example value, 39 | Example_value = from_model(Type, Model), 40 | file:write_file("test_example.erl", 41 | [test_header(), Example_value, ".\n"]), 42 | %% See if it compiles 43 | compile:file("test_example.erl"). 44 | 45 | test_header() -> 46 | "-module(test_example).\n" 47 | "-export([go/0]).\n" 48 | "-include(\"test_example.hrl\").\n" 49 | "go() -> \n". 50 | 51 | -spec from_model(Type::atom(), erlsom:model()) -> string(). 52 | from_model(Type, Model) -> 53 | from_model(Type, Model, []). 54 | 55 | -spec from_model(Type::atom(), erlsom:model(), options()) -> string(). 56 | from_model(Type, Model, Options) -> 57 | State = #e_state{indent = proplists:get_value(indent, Options, 4), 58 | level = proplists:get_value(indent_level, Options, 0)}, 59 | from_type(Type, Model, State). 60 | 61 | from_type(Type, #model{tps = Types} = Model, State) -> 62 | case lists:keyfind(Type, #type.nm, Types) of 63 | false -> 64 | throw({error, "Type not found", Type}); 65 | Value -> 66 | from_type2(Value, Model, State) 67 | end. 68 | 69 | from_type2(#type{nm = Name, els = Elements, atts = Attributes}, 70 | #model{any_attribs = AnyAtts} = Model, State) -> 71 | Attribute_result = [from_attribute(A, Model, State) || A <- Attributes], 72 | Element_result = from_elements(Elements, Model, State), 73 | Fields = 74 | case AnyAtts of 75 | true -> 76 | Any_attribs = [[comment(State), indent(State), " anyAttribs = []"]], 77 | ["{\n", add_commas(Any_attribs ++ Attribute_result ++ Element_result), $}]; 78 | false -> 79 | ["{\n", add_commas(Attribute_result ++ Element_result), $}] 80 | end, 81 | [comment(State), indent(State), $#, atom_list(Name), Fields]. 82 | 83 | from_elements(Elements, Model, State) -> 84 | from_elements(Elements, Model, State, 0, []). 85 | 86 | from_elements([], _Model, _State, _ChoiceCount, Acc) -> 87 | lists:reverse(Acc); 88 | from_elements([E | T], Model, State, ChoiceCount, Acc) -> 89 | {Result, NewCount} = 90 | from_element(E, Model, State, ChoiceCount), 91 | from_elements(T, Model, State, NewCount, [Result | Acc]). 92 | 93 | 94 | indent(#e_state{indent = Indent, level = Level}) -> 95 | lists:duplicate(Indent * Level, 32). % 32 = space. 96 | 97 | bump_level(State) -> 98 | bump_level(State, 2). 99 | 100 | bump_level(#e_state{level = Level} = State, Nr) -> 101 | State#e_state{level = Level + Nr}. 102 | 103 | %% This is used between the alternatives - no comma's 104 | %% because only one of them should be used, the others are commented 105 | %% out. 106 | add_breaks(List) -> 107 | separate(List, "\n"). 108 | 109 | add_commas(List) -> 110 | separate(List, ",\n"). 111 | 112 | separate([], _) -> 113 | []; 114 | separate([H | T], Separator) -> 115 | separate(T, [H], Separator). 116 | 117 | separate([], Acc, _) -> 118 | lists:reverse(Acc); 119 | separate([H | T], Acc, Separator) -> 120 | separate(T, [H, Separator | Acc], Separator). 121 | 122 | from_attribute(#att{nm = Name, opt = Optional, tp = Type}, Model, 123 | State) -> 124 | Comment = case Optional of 125 | true -> 126 | [comment(State), indent(State), " % Optional:\n"]; 127 | false -> 128 | "" 129 | end, 130 | Value = default_value(Type, Model, State), 131 | [Comment, comment(State), indent(State), io_lib:format(" ~p = ~s", [Name, Value])]. 132 | 133 | from_element(#el{alts = Alternatives, mn = Min, mx = Max}, Model, State, Nr_choices) when 134 | length(Alternatives) == 1 -> 135 | Min_Max_comment = min_max_comment(Min, Max, State), 136 | Values = [from_alternative(A, Max, Model, State) || A <- Alternatives], 137 | {[Min_Max_comment, Values], Nr_choices}; 138 | from_element(#el{alts = Alternatives, mn = Min, mx = Max}, Model, State, Nr_choices) -> 139 | Unique_alternatives = lists:ukeysort(#alt.tp, Alternatives), 140 | Choice_comment = choice_comment(length(Unique_alternatives), State), 141 | Min_Max_comment = min_max_comment(Min, Max, State), 142 | %% If there are several tags that lead to 1 alternative, there may be more than 1 143 | %% #alt{} record for the same type. 144 | Alts = from_alternatives(Unique_alternatives, Model, State), 145 | Label = choice_label(Nr_choices), 146 | Result = 147 | case (Max > 1) of %% unbound > 1 148 | true -> 149 | [Min_Max_comment, comment(State), indent(State), " ", Label, " = [\n", 150 | Choice_comment, add_breaks(Alts), $]]; 151 | false -> 152 | [Min_Max_comment, comment(State), indent(State), " ", Label, " = \n", 153 | Choice_comment, add_breaks(Alts)] 154 | end, 155 | {Result, Nr_choices + 1}. 156 | 157 | 158 | choice_label(0) -> 159 | "choice"; 160 | choice_label(N) -> 161 | ["choice", integer_to_list(N)]. 162 | 163 | %% only used for alternatives of a choice 164 | from_alternatives(Alternatives, Model, State) -> 165 | from_alternatives(Alternatives, Model, State, 1, []). 166 | 167 | from_alternatives([], _, _, _, Acc) -> 168 | lists:reverse(Acc); 169 | from_alternatives([H|T], Model, #e_state{choice_depth= Depth} = State, 170 | Count, Acc) -> 171 | %% All alternatives are commented out, exacpt for the last one 172 | %% (the last one, because otherwise there are problems with commas, closing braces 173 | %% etc.). 174 | New_depth = 175 | case T of 176 | [] -> % no more alternatives, so this is the last one 177 | Depth; 178 | _ -> 179 | Depth + 1 180 | end, 181 | from_alternatives(T, Model, State, Count + 1, 182 | [from_alternative2(H, Model, 183 | State#e_state{choice_depth = New_depth}) | Acc]). 184 | 185 | from_alternative(#alt{tag = Tag, tp = Type, rl = Real, mn = _Min2, mx = _Max2}, 186 | Max, Model, State) -> 187 | Field_name = name(Tag, Type, Real), 188 | %% add a newline if the type is a record 189 | Newline = newline(Type), 190 | %% add a relevant comment if the type is 'any': 191 | Any_comment = any_comment(Type, State), 192 | Value = default_value(Type, Model, State), 193 | Field = case (Max > 1) of %% unbound > 1 194 | true -> 195 | case Type of 196 | _ when Type == any; Type == '#ANY' -> 197 | %% Note: this is not correct if MinOccurs > 0, 198 | %% but that is rare, and it would be difficult 199 | %% to figure out what to put in such a case. 200 | io_lib:format(" ~p = ~s[]", 201 | [Field_name, Newline]); 202 | _ -> 203 | io_lib:format(" ~p = [~s~s]", 204 | [Field_name, Newline, Value]) 205 | end; 206 | false -> 207 | io_lib:format(" ~p = ~s~s", [Field_name, Newline, Value]) 208 | end, 209 | [Any_comment, comment(State), indent(State), Field]. 210 | 211 | any_comment(any, State) -> 212 | [comment(State), indent(State), " % Any value:\n"]; 213 | any_comment(_, _State) -> 214 | "". 215 | 216 | %% inside a choice 217 | from_alternative2(#alt{tp = Type, mn = Min, mx = Max}, Model, State) -> 218 | Value = default_value(Type, Model, State), 219 | Min_Max_comment = min_max_comment(Min, Max, bump_level(State, 1)), 220 | Field = case (Max > 1) of %% unbound > 1 221 | true -> 222 | Value2 = put_brace(Value), 223 | io_lib:format("~s]", [Value2]); 224 | false -> 225 | io_lib:format("~s", [Value]) 226 | end, 227 | [Min_Max_comment, Field]. 228 | 229 | name(Tag, Type, Real) -> 230 | With_prefix = 231 | case Real of 232 | false -> 233 | case Type of 234 | {_,_} -> 235 | Tag; 236 | _ -> 237 | Type 238 | end; 239 | _ -> 240 | Tag 241 | end, 242 | base_name(With_prefix). 243 | 244 | 245 | %% the names of the fields should not have the prefix 246 | base_name(Atom) when is_atom(Atom) -> 247 | String = atom_to_list(Atom), 248 | String_no_prefix = case string:tokens(String, ":") of 249 | [_Prefix, Name] -> 250 | Name; 251 | _ -> 252 | String 253 | end, 254 | list_to_atom(String_no_prefix). 255 | 256 | comment(#e_state{choice_depth = D}) -> 257 | lists:duplicate(D, $%). 258 | 259 | 260 | put_brace(String) -> 261 | Flat = lists:flatten(String), 262 | put_brace(Flat, []). 263 | 264 | %% String = n * space + Something, must become: 265 | %% n * space + [ + Something 266 | put_brace([32 | T], Acc) -> 267 | put_brace(T, [32 | Acc]); 268 | put_brace([$% | T], Acc) -> 269 | put_brace(T, [$% | Acc]); 270 | put_brace([_ | _T] = Rest, Acc) -> 271 | [lists:reverse(Acc), $[, Rest]. 272 | 273 | min_max_comment(1, 1, _) -> 274 | ""; 275 | min_max_comment(0, 1, State) -> 276 | [comment(State), indent(State), " % Optional:\n"]; 277 | min_max_comment(0, M, State) -> 278 | [comment(State), indent(State), 279 | io_lib:format(" % List with zero ~s elements:~n", [max_as_string(M)])]; 280 | min_max_comment(N, M, State) -> 281 | [comment(State), indent(State), 282 | io_lib:format(" % List with ~p ~s elements:~n", [N, max_as_string(M)])]. 283 | 284 | max_as_string(unbound) -> 285 | "or more"; 286 | max_as_string(N) -> 287 | "to " ++ integer_to_list(N). 288 | 289 | 290 | choice_comment(1, _State) -> 291 | ""; 292 | choice_comment(N, State) -> 293 | [comment(State), indent(State), 294 | io_lib:format(" % Select one from the following ~p elements:~n", [N])]. 295 | 296 | newline({_, _}) -> 297 | % simple type 298 | []; 299 | newline(any) -> 300 | []; 301 | newline(_) -> 302 | "\n". 303 | 304 | default_value({'#PCDATA', Type}, Model, State) -> 305 | default_value(Type, Model, State); 306 | default_value(bool, _, _) -> 307 | "true"; 308 | default_value(any, _, _) -> 309 | "undefined"; 310 | default_value('#ANY', _, _) -> 311 | "\"could be anything\""; 312 | default_value(qname, _, _) -> 313 | "qname"; 314 | default_value(integer, _, _) -> 315 | "42"; 316 | default_value({integer, long}, _, _) -> 317 | "42000"; 318 | default_value({integer, int}, _, _) -> 319 | "4200"; 320 | default_value({integer, short}, _, _) -> 321 | "420"; 322 | default_value({integer, byte}, _, _) -> 323 | "42"; 324 | default_value({integer, unsignedLong}, _, _) -> 325 | "43000"; 326 | default_value({integer, unsignedInt}, _, _) -> 327 | "4300"; 328 | default_value({integer, unsignedShort}, _, _) -> 329 | "430"; 330 | default_value({integer, unsignedByte}, _, _) -> 331 | "43"; 332 | default_value({integer, nonPositiveInteger}, _, _) -> 333 | "0"; 334 | default_value({integer, positiveInteger}, _, _) -> 335 | "42"; 336 | default_value({integer, negativeInteger}, _, _) -> 337 | "-42"; 338 | default_value({integer, nonNegativeInteger}, _, _) -> 339 | "0"; 340 | default_value(float, _, _) -> 341 | "3.1415927"; 342 | default_value(char, _, _) -> 343 | "\"?\""; 344 | default_value(Type, Model, State) -> 345 | from_type(Type, Model, bump_level(State)). 346 | 347 | atom_list(Atom) -> 348 | io_lib:format("~p", [Atom]). 349 | --------------------------------------------------------------------------------