├── ebin
└── .gitignore
├── vsn.mk
├── rebar
├── doc
├── erlsom.doc
└── image001.gif
├── examples
├── continuation
│ ├── itunes_example_be.xml
│ ├── BookStore.xml
│ ├── itunes_example.xml
│ └── continuation_example.erl
├── example1
│ ├── valid.xml
│ ├── abb4.xsd
│ ├── example1.hrl
│ ├── abb1.xsd
│ ├── abb3.xsd
│ ├── abb7.xsd
│ ├── abb2.xsd
│ ├── abb11.xsd
│ ├── abb5.xsd
│ ├── abb6.xsd
│ ├── abb8.xsd
│ ├── abb9.xsd
│ ├── example1.xsd
│ ├── abb10.xsd
│ ├── example1.xml
│ └── example1.erl
├── complex_form
│ ├── foo.xml
│ ├── complex_form_example.erl
│ └── erlsom_complex_form.erl
├── erlsom_example
│ ├── example_in.xml
│ ├── example_in.xsd
│ ├── example_out.xsd
│ ├── erlsom.hrl
│ └── erlsom_example.erl
├── book_store
│ ├── BookStore.hrl
│ ├── BookStore.xsd
│ ├── book_store.erl
│ └── BookStore.xml
├── soap_example
│ ├── example_in.xml
│ ├── example_in.xsd
│ ├── example_out.xsd
│ ├── erlsom.hrl
│ ├── soap_example.erl
│ └── soap-envelope.xsd
└── erlsom_sax_example
│ ├── erlsom_sax_example.erl
│ ├── tpp_auth.xml
│ ├── sax_example.xml
│ ├── search_request.xsd
│ └── erlsom_simple_form.erl
├── priv
├── extension
│ ├── simpleContentExtension.xml
│ ├── Makefile
│ ├── extension.xml
│ ├── extension.xsd
│ └── simpleContentExtension.xsd
├── choice_complex
│ ├── choice_complex.xml
│ └── choice_complex.xsd
├── all
│ ├── all.xml
│ └── all.xsd
├── xsi_type
│ ├── ext.xml
│ ├── base.xsd
│ └── ext.xsd
├── xsi_type_no_prefix
│ ├── test.xml
│ └── test.xsd
└── gexf
│ ├── schema
│ ├── hierarchy.xsd
│ ├── phylogenics.xsd
│ ├── dynamics.xsd
│ ├── gexf.xsd
│ ├── viz.xsd
│ └── data.xsd
│ └── data
│ ├── basic.gexf
│ ├── data.gexf
│ ├── test.gexf
│ └── dynamics.gexf
├── .gitignore
├── .project
├── include
├── exception.hrl
└── erlsom.hrl
├── rebar.config
├── CHANGELOG.md
├── src
├── erlsom.app.src
├── erlsom.hrl
├── erlsom_add.erl
├── erlsom_sax.hrl
├── erlsom_parse.hrl
├── erlsom_compile.hrl
├── erlsom_simple_form.erl
├── erlsom_sax.erl
├── erlsom_sax_lib.erl
├── erlsom_ucs.erl
├── erlsom_writeHrl.erl
└── erlsom_example_value.erl
├── make-upload
├── make-release
├── Makefile
├── test
├── erlsom_gexf_tests.erl
└── erlsom_tests.erl
└── COPYING.LESSER
/ebin/.gitignore:
--------------------------------------------------------------------------------
1 | erlsom.app
2 | *.beam
3 |
--------------------------------------------------------------------------------
/vsn.mk:
--------------------------------------------------------------------------------
1 | ERLSOM_VSN=1.4.1
2 |
3 |
4 |
5 |
--------------------------------------------------------------------------------
/rebar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/willemdj/erlsom/HEAD/rebar
--------------------------------------------------------------------------------
/doc/erlsom.doc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/willemdj/erlsom/HEAD/doc/erlsom.doc
--------------------------------------------------------------------------------
/doc/image001.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/willemdj/erlsom/HEAD/doc/image001.gif
--------------------------------------------------------------------------------
/examples/continuation/itunes_example_be.xml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/willemdj/erlsom/HEAD/examples/continuation/itunes_example_be.xml
--------------------------------------------------------------------------------
/examples/example1/valid.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | 123
4 | value 1
5 | value 2
6 |
7 |
--------------------------------------------------------------------------------
/priv/extension/simpleContentExtension.xml:
--------------------------------------------------------------------------------
1 |
2 | myuri
3 |
--------------------------------------------------------------------------------
/priv/choice_complex/choice_complex.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | text
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.swp
2 | *.beam
3 | config.log
4 | config.status
5 | include.mk
6 | .rebar
7 | .eunit
8 | .test
9 | _build/
10 | erlsom.plt
11 | rebar.lock
12 |
--------------------------------------------------------------------------------
/priv/extension/Makefile:
--------------------------------------------------------------------------------
1 | all: validate
2 |
3 | validate:
4 | @xmllint --noout --schema extension.xsd extension.xml
5 | @xmllint --noout --schema simpleContentExtension.xsd simpleContentExtension.xml
6 |
--------------------------------------------------------------------------------
/examples/complex_form/foo.xml:
--------------------------------------------------------------------------------
1 |
2 | x
3 | x
4 | y
5 |
6 |
--------------------------------------------------------------------------------
/.project:
--------------------------------------------------------------------------------
1 |
2 |
3 | erlsom
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/priv/extension/extension.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | Bart
4 | Simpson
5 | Springfield
6 | U.S.A.
7 | Evergreen Terrace
8 |
9 |
--------------------------------------------------------------------------------
/include/exception.hrl:
--------------------------------------------------------------------------------
1 | -ifdef(OTP_RELEASE). %% this implies 21 or higher
2 | -define(EXCEPTION(Class, Reason, Stacktrace), Class:Reason:Stacktrace).
3 | -define(GET_STACK(Stacktrace), Stacktrace).
4 | -else.
5 | -define(EXCEPTION(Class, Reason, _), Class:Reason).
6 | -define(GET_STACK(_), erlang:get_stacktrace()).
7 | -endif.
8 |
--------------------------------------------------------------------------------
/priv/all/all.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | some name
5 | some name
6 |
7 |
8 | some name1
9 | some name2
10 |
11 |
12 |
--------------------------------------------------------------------------------
/examples/erlsom_example/example_in.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | 3
4 | 7
5 | 1
6 | 123
7 | 17
8 | 2
9 |
10 |
--------------------------------------------------------------------------------
/priv/xsi_type/ext.xml:
--------------------------------------------------------------------------------
1 |
2 |
7 | a
8 | b
9 |
10 |
--------------------------------------------------------------------------------
/examples/example1/abb4.xsd:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/examples/example1/example1.hrl:
--------------------------------------------------------------------------------
1 | %% HRL file generated by ERLSOM
2 | %%
3 | %% It is possible to change the name of the record fields.
4 | %%
5 | %% It is possible to add default values, but be aware that these will
6 | %% only be used when *writing* an xml document.
7 |
8 | -record('case', {anyAttribs, 'name', 'result', 'xsd', 'xml'}).
9 | -record('testConfig', {anyAttribs, 'path', 'case'}).
10 |
--------------------------------------------------------------------------------
/examples/book_store/BookStore.hrl:
--------------------------------------------------------------------------------
1 | %% HRL file generated by ERLSOM
2 | %%
3 | %% It is possible to change the name of the record fields.
4 | %%
5 | %% It is possible to add default values, but be aware that these will
6 | %% only be used when *writing* an xml document.
7 |
8 | -record('book_type', {anyAttribs, 'ISBN', 'title', 'author', 'date', 'publisher'}).
9 | -record('book_store', {anyAttribs, 'book'}).
10 |
--------------------------------------------------------------------------------
/examples/example1/abb1.xsd:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/examples/example1/abb3.xsd:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/priv/xsi_type_no_prefix/test.xml:
--------------------------------------------------------------------------------
1 |
2 |
6 |
7 | base
8 | ext
9 |
10 |
--------------------------------------------------------------------------------
/rebar.config:
--------------------------------------------------------------------------------
1 | {erl_opts, [debug_info]}.
2 |
3 | %% Options for running dialyzer
4 | %% {plt, PltFile}
5 | %% 'src': run Dialyzer on the source files as in 'dialyzer --src'
6 | %% {warnings, [WarnOpts]}: turn on/off Dialyzer warnings
7 | {dialyzer_opts, [{plt, "erlsom.plt"},
8 | {warnings, [race_conditions]},
9 | src]}.
10 | {eunit_opts, [verbose]}.
11 | {cover_enabled, true}.
12 |
--------------------------------------------------------------------------------
/priv/gexf/schema/hierarchy.xsd:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/priv/gexf/data/basic.gexf:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/examples/example1/abb7.xsd:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/examples/example1/abb2.xsd:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/examples/soap_example/example_in.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | 3
6 | 7
7 | 1
8 | 123
9 | 17
10 | 3
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/priv/xsi_type/base.xsd:
--------------------------------------------------------------------------------
1 |
2 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | CHANGELOG
2 | =========
3 |
4 | 1.4.0
5 | -----
6 |
7 | * New option `strict` to enforce additional type conversion and checking.
8 |
9 | * Several modifications to allow better embedding of the parser (not
10 | documented).
11 |
12 | * :warning: `models` that were compiled with earlier versions are no
13 | longer supported. If you have stored a `model` and you want to upgrade
14 | to the new version, the model must be recompiled.
15 |
16 | 1.3.1
17 | -----
18 |
19 | * First tagged version.
20 |
--------------------------------------------------------------------------------
/examples/example1/abb11.xsd:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/examples/example1/abb5.xsd:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/src/erlsom.app.src:
--------------------------------------------------------------------------------
1 | {application,erlsom,
2 | [{description,"XML parser. Supports SAX style parsing as well as XML Schema based data mapping: create records from XML (and vice versa)"},
3 | {vsn,"1.5.2"},
4 | {modules,[]},
5 | {maintainers,["Willem de Jong"]},
6 | {licenses,["GNU Lesser GPL, Version 3"]},
7 | {links,[{"Github","https://github.com/willemdj/erlsom"}]},
8 | {registered,[]},
9 | {env,[]},
10 | {applications,[kernel,stdlib,inets]}]}.
11 |
--------------------------------------------------------------------------------
/make-upload:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | # This is make it a wee bit more convenient for
4 | # me to publish the tar balls
5 | # set -x
6 |
7 | if [ $# != 1 ]; then
8 | echo 'usage: make-upload '
9 | exit 1
10 | fi
11 |
12 |
13 | echo "cd incoming " > /tmp/cmds.$$
14 | echo "put $1 " >> /tmp/cmds.$$
15 | ncftp -u anonymous -p klacke@hyber.org upload.sourceforge.net < /tmp/cmds.$$
16 | rm /tmp/cmds.$$
17 |
18 |
19 |
20 | echo "Now login at sourceforge at go the page"
21 | echo "http://sourceforge.net/project/admin/editpackages.php?group_id=45637"
22 | echo "and create the new package ... "
23 |
--------------------------------------------------------------------------------
/examples/example1/abb6.xsd:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
--------------------------------------------------------------------------------
/examples/erlsom_sax_example/erlsom_sax_example.erl:
--------------------------------------------------------------------------------
1 | %%% a simple example of the use of erlsom.
2 | %%%
3 | -module(erlsom_sax_example).
4 |
5 | %% user interface
6 | -export([run/0]).
7 |
8 | run() ->
9 | case file:read_file(xml()) of
10 | {ok, Bin} ->
11 | {ok, _, _} = erlsom:parse_sax(Bin, ok, fun callback/2);
12 | Error ->
13 | Error
14 | end,
15 | ok.
16 |
17 | callback(Event, State) ->
18 | io:format("~p\n", [Event]),
19 | State.
20 |
21 | %% this is just to make it easier to test this little example
22 | xml() -> filename:join([codeDir(), "sax_example.xml"]).
23 | codeDir() -> filename:dirname(code:which(?MODULE)).
24 |
--------------------------------------------------------------------------------
/examples/erlsom_example/example_in.xsd:
--------------------------------------------------------------------------------
1 |
2 |
6 |
7 |
8 |
9 |
12 |
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/examples/example1/abb8.xsd:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
--------------------------------------------------------------------------------
/examples/soap_example/example_in.xsd:
--------------------------------------------------------------------------------
1 |
2 |
6 |
7 |
8 |
9 |
12 |
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/examples/complex_form/complex_form_example.erl:
--------------------------------------------------------------------------------
1 | %% shows how you could (in theory, at least) apply xpath to the
2 | %% result of erlsom_complex_form.
3 | -module(complex_form_example).
4 | -include_lib("xmerl/include/xmerl.hrl").
5 |
6 | -export([run/0]).
7 |
8 | run() ->
9 | {ok, ParsedDoc, _} = erlsom_complex_form:scan_file(xml()),
10 | Result = xmerl_xpath:string("//myelement[. = 'x']/text()", ParsedDoc),
11 | io:format("result of Xpath query \"//myelement[. = 'x']/text()\"~n"),
12 | io:format("~p~n", [Result]).
13 |
14 | %% this is just to make it easier to test this little example
15 | xml() -> filename:join([codeDir(), "foo.xml"]).
16 | codeDir() -> filename:dirname(code:which(?MODULE)).
17 |
--------------------------------------------------------------------------------
/examples/example1/abb9.xsd:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
--------------------------------------------------------------------------------
/priv/xsi_type/ext.xsd:
--------------------------------------------------------------------------------
1 |
2 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
--------------------------------------------------------------------------------
/make-release:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | set -x
4 | . ./vsn.mk
5 |
6 | echo tagging and packing release ${ERLSOM_VSN}
7 |
8 | CVS_RSH=ssh
9 | Y=`echo ${ERLSOM_VSN} | sed 's/\./-/g'`
10 |
11 | if [ ! -f ./make-release ]; then
12 | echo "need to be in top dir"; exit 1;
13 | fi
14 |
15 | cd ..
16 | rm -rf downloads 2> /dev/null
17 | rm -rf erlsom-0.0.1 2> /dev/null
18 |
19 | cvs tag -F erlsom-${Y}
20 |
21 | rm -rf tmp
22 | mkdir tmp
23 |
24 | cvs export -d tmp -r erlsom-$Y .
25 | cd tmp
26 | rm -rf erlsom/CVSROOT
27 | rm erlsom/make-release
28 | rm erlsom/make-upload
29 | mv erlsom erlsom-${ERLSOM_VSN}
30 | tar cfz erlsom-${ERLSOM_VSN}.tar.gz erlsom-${ERLSOM_VSN}
31 |
32 |
33 | echo new release resides in `pwd`/erlsom-${ERLSOM_VSN}.tar.gz
34 |
35 |
36 |
37 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | APPLICATION := erlsom
2 |
3 | ERL := erl
4 | EPATH := -pa ebin
5 | TEST_EPATH := -pa .eunit
6 |
7 | DIALYZER=dialyzer
8 | DIALYZER_OPTS=-Wno_return -Wrace_conditions -Wunderspecs -Wbehaviours
9 | PLT_FILE=.erlsom_plt
10 | APPS=kernel stdlib
11 |
12 | .PHONY: all clean test
13 |
14 | all: compile
15 |
16 | compile:
17 | @./rebar compile
18 |
19 | doc:
20 | @./rebar doc
21 |
22 | clean:
23 | @./rebar clean
24 |
25 | build-plt: compile
26 | @./rebar build-plt
27 |
28 | check-plt: compile
29 | @./rebar check-plt
30 |
31 | dialyze:
32 | @./rebar dialyze
33 |
34 | eunit:
35 | @./rebar eunit
36 |
37 | shell: compile
38 | $(ERL) -sname $(APPLICATION) $(EPATH)
39 |
40 | touch:
41 | find . -name '*' -print | xargs touch -m
42 | find . -name '*.erl' -print | xargs touch -m
43 |
--------------------------------------------------------------------------------
/examples/example1/example1.xsd:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
--------------------------------------------------------------------------------
/priv/all/all.xsd:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
--------------------------------------------------------------------------------
/priv/choice_complex/choice_complex.xsd:
--------------------------------------------------------------------------------
1 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
--------------------------------------------------------------------------------
/examples/example1/abb10.xsd:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
--------------------------------------------------------------------------------
/priv/extension/extension.xsd:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
--------------------------------------------------------------------------------
/priv/xsi_type_no_prefix/test.xsd:
--------------------------------------------------------------------------------
1 |
2 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
--------------------------------------------------------------------------------
/examples/soap_example/example_out.xsd:
--------------------------------------------------------------------------------
1 |
2 |
6 |
7 |
8 |
9 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
20 |
21 |
22 |
23 |
--------------------------------------------------------------------------------
/priv/gexf/schema/phylogenics.xsd:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
--------------------------------------------------------------------------------
/examples/erlsom_example/example_out.xsd:
--------------------------------------------------------------------------------
1 |
2 |
6 |
7 |
8 |
9 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
20 |
21 |
22 |
23 |
--------------------------------------------------------------------------------
/examples/book_store/BookStore.xsd:
--------------------------------------------------------------------------------
1 |
2 |
6 |
7 |
8 |
9 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
--------------------------------------------------------------------------------
/examples/erlsom_sax_example/tpp_auth.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
5 |
6 |
7 | Field 1
8 | Text 1
9 |
10 |
11 |
12 | testnumber
13 |
14 |
15 | 491703434123
16 |
17 |
18 | 1
19 | 2
20 |
21 |
22 | 2004-04-28T10:46:34.797Z
23 | Pizza
24 |
25 |
26 | 2004-04-28T10:46:34.796Z
27 |
28 |
29 | test001
30 |
31 |
32 | 116
33 | 100
34 | 16
35 | FULL
36 | 2
37 | EUR
38 |
39 |
40 |
--------------------------------------------------------------------------------
/examples/book_store/book_store.erl:
--------------------------------------------------------------------------------
1 | %%% a simple example of the use of erlsom.
2 | %%%
3 | -module(book_store).
4 | -include("BookStore.hrl").
5 |
6 | %% user interface
7 | -export([run/0]).
8 |
9 | run() ->
10 | %% compile xsd
11 | {ok, Model} = erlsom:compile_xsd_file(xsd()),
12 | %% parse xml
13 | {ok, #book_store{book=Books}, _} = erlsom:scan_file(xml(), Model),
14 | %% do something with the content
15 | lists:foreach(fun process_book/1, Books).
16 |
17 | process_book(#book_type{'ISBN' = ISBN, title = Title, author = Author,
18 | date = Date, publisher = Publisher}) ->
19 | io:format("Title : ~s~n"
20 | "Author : ~s~n"
21 | "Publisher: ~s~n"
22 | "ISBN : ~s~n"
23 | "Date : ~s~n~n",
24 | [Title, Author, Publisher, ISBN, formatDate(Date)]).
25 |
26 | formatDate(undefined) -> "";
27 | formatDate(Date) -> Date.
28 |
29 | %% this is just to make it easier to test this little example
30 | xsd() -> filename:join([codeDir(), "BookStore.xsd"]).
31 | xml() -> filename:join([codeDir(), "BookStore.xml"]).
32 | codeDir() -> filename:dirname(code:which(?MODULE)).
33 |
--------------------------------------------------------------------------------
/examples/erlsom_sax_example/sax_example.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
5 |
6 |
7 |
8 |
9 |
11 |
12 |
13 | '>
14 | %xx;
15 | ]>
16 |
17 |
18 | 3
19 | 7
20 | ]]> and some more text
21 | and now an entity: < and some more text
22 | and now defined entity: &zz; and some more text
23 | and now the weird case: &aa; and some more text
24 | 123
25 | 17
26 |
27 |
28 | 2
29 |
30 |
--------------------------------------------------------------------------------
/priv/extension/simpleContentExtension.xsd:
--------------------------------------------------------------------------------
1 |
2 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
--------------------------------------------------------------------------------
/examples/erlsom_example/erlsom.hrl:
--------------------------------------------------------------------------------
1 | %%% Copyright (C) 2006 - 2008 Willem de Jong
2 | %%%
3 | %%% This file is part of Erlsom.
4 | %%%
5 | %%% Erlsom is free software: you can redistribute it and/or modify
6 | %%% it under the terms of the GNU Lesser General Public License as
7 | %%% published by the Free Software Foundation, either version 3 of
8 | %%% the License, or (at your option) any later version.
9 | %%%
10 | %%% Erlsom is distributed in the hope that it will be useful,
11 | %%% but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | %%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | %%% GNU Lesser General Public License for more details.
14 | %%%
15 | %%% You should have received a copy of the GNU Lesser General Public
16 | %%% License along with Erlsom. If not, see
17 | %%% .
18 | %%%
19 | %%% Author contact: w.a.de.jong@gmail.com
20 |
21 | %%% ====================================================================
22 | %%% Header file for erlsom
23 | %%% ====================================================================
24 |
25 | %% prefix=the prefix that will be used in the result
26 |
27 | -ifndef(_ERLSOM_HRL_).
28 | -define(_ERLSOM_HRL_, true).
29 |
30 | -record(ns, {uri, prefix}).
31 | -record(qname, {uri, localPart, prefix, mappedPrefix}).
32 |
33 | -endif.
34 |
--------------------------------------------------------------------------------
/src/erlsom.hrl:
--------------------------------------------------------------------------------
1 | %%% Copyright (C) 2006 - 2008 Willem de Jong
2 | %%%
3 | %%% This file is part of Erlsom.
4 | %%%
5 | %%% Erlsom is free software: you can redistribute it and/or modify
6 | %%% it under the terms of the GNU Lesser General Public License as
7 | %%% published by the Free Software Foundation, either version 3 of
8 | %%% the License, or (at your option) any later version.
9 | %%%
10 | %%% Erlsom is distributed in the hope that it will be useful,
11 | %%% but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | %%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | %%% GNU Lesser General Public License for more details.
14 | %%%
15 | %%% You should have received a copy of the GNU Lesser General Public
16 | %%% License along with Erlsom. If not, see
17 | %%% .
18 | %%%
19 | %%% Author contact: w.a.de.jong@gmail.com
20 |
21 | %%% ====================================================================
22 | %%% Header file for erlsom
23 | %%% ====================================================================
24 |
25 | %% prefix=the prefix that will be used in the result
26 | -record(ns, {uri,
27 | prefix,
28 | efd = unqualified :: qualified | unqualified % elementFormDefault
29 | }).
30 | -record(qname, {uri, localPart, prefix, mappedPrefix}).
31 |
--------------------------------------------------------------------------------
/examples/soap_example/erlsom.hrl:
--------------------------------------------------------------------------------
1 | %%% Copyright (C) 2006 - 2008 Willem de Jong
2 | %%%
3 | %%% This file is part of Erlsom.
4 | %%%
5 | %%% Erlsom is free software: you can redistribute it and/or modify
6 | %%% it under the terms of the GNU Lesser General Public License as
7 | %%% published by the Free Software Foundation, either version 3 of
8 | %%% the License, or (at your option) any later version.
9 | %%%
10 | %%% Erlsom is distributed in the hope that it will be useful,
11 | %%% but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | %%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | %%% GNU Lesser General Public License for more details.
14 | %%%
15 | %%% You should have received a copy of the GNU Lesser General Public
16 | %%% License along with Erlsom. If not, see
17 | %%% .
18 | %%%
19 | %%% Author contact: w.a.de.jong@gmail.com
20 |
21 | %%% ====================================================================
22 | %%% Header file for erlsom
23 | %%% ====================================================================
24 |
25 | %% prefix=the prefix that will be used in the result
26 |
27 | -ifndef(_ERLSOM_HRL_).
28 | -define(_ERLSOM_HRL_, true).
29 |
30 |
31 | -record(ns, {uri, prefix}).
32 | -record(qname, {uri, localPart, prefix, mappedPrefix}).
33 |
34 | -endif.
35 |
36 |
37 |
38 |
--------------------------------------------------------------------------------
/examples/book_store/BookStore.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Het zijn net mensen
5 | Joris Luyendijk
6 | 2006
7 | Podium
8 |
9 |
10 | Portnoy's Complaint
11 | Philip Roth
12 | 1993
13 | Vintage
14 |
15 |
16 | My Life and Times
17 | Paul McCartney
18 | 1998
19 | McMillin publishing
20 |
21 |
22 | Illusions The Adventures of a Reluctant Messiah
23 | Richard Bach
24 | 1977
25 | Dell publishing Co.
26 |
27 |
28 | The First and Last Freedom
29 | J. Krishnamurti
30 | Harper & Row
31 |
32 |
33 |
--------------------------------------------------------------------------------
/include/erlsom.hrl:
--------------------------------------------------------------------------------
1 | %%% Copyright (C) 2006 - 2008 Willem de Jong
2 | %%%
3 | %%% This file is part of Erlsom.
4 | %%%
5 | %%% Erlsom is free software: you can redistribute it and/or modify
6 | %%% it under the terms of the GNU Lesser General Public License as
7 | %%% published by the Free Software Foundation, either version 3 of
8 | %%% the License, or (at your option) any later version.
9 | %%%
10 | %%% Erlsom is distributed in the hope that it will be useful,
11 | %%% but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | %%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | %%% GNU Lesser General Public License for more details.
14 | %%%
15 | %%% You should have received a copy of the GNU Lesser General Public
16 | %%% License along with Erlsom. If not, see
17 | %%% .
18 | %%%
19 | %%% Author contact: w.a.de.jong@gmail.com
20 |
21 | %%% ====================================================================
22 | %%% Header file for erlsom
23 | %%% ====================================================================
24 |
25 | %% prefix=the prefix that will be used in the result
26 |
27 | -ifndef(_ERLSOM_HRL_).
28 | -define(_ERLSOM_HRL_, true).
29 |
30 |
31 | -record(ns, {uri,
32 | prefix,
33 | efd = unqualified :: qualified | unqualified % elementFormDefault
34 | }).
35 | -record(qname, {uri, localPart, prefix, mappedPrefix}).
36 |
37 | -endif.
38 |
39 |
40 |
41 |
--------------------------------------------------------------------------------
/examples/example1/example1.xml:
--------------------------------------------------------------------------------
1 |
2 | .
3 |
4 |
5 | abb1
6 | OK
7 | abb1.xsd
8 | valid.xml
9 |
10 |
11 |
12 | abb2
13 | OK
14 | abb2.xsd
15 | valid.xml
16 |
17 |
18 |
19 | abb3
20 | OK
21 | abb3.xsd
22 | valid.xml
23 |
24 |
25 |
26 | abb4
27 | OK
28 | abb4.xsd
29 | valid.xml
30 |
31 |
32 |
33 | abb5
34 | OK
35 | abb5.xsd
36 | valid.xml
37 |
38 |
39 |
40 | abb6
41 | OK
42 | abb6.xsd
43 | valid.xml
44 |
45 |
46 |
47 | abb7
48 | OK
49 | abb7.xsd
50 | valid.xml
51 |
52 |
53 |
54 | abb8
55 | OK
56 | abb8.xsd
57 | valid.xml
58 |
59 |
60 |
61 | abb9
62 | OK
63 | abb9.xsd
64 | valid.xml
65 |
66 |
67 |
68 | abb10
69 | OK
70 | abb10.xsd
71 | valid.xml
72 |
73 |
74 |
75 | abb11
76 | OK
77 | abb11.xsd
78 | valid.xml
79 |
80 |
81 |
82 |
--------------------------------------------------------------------------------
/test/erlsom_gexf_tests.erl:
--------------------------------------------------------------------------------
1 | -module(erlsom_gexf_tests).
2 |
3 | %% ------------------------------------------------------------------
4 | %% Tests
5 | %% ------------------------------------------------------------------
6 |
7 | -include_lib("eunit/include/eunit.hrl").
8 | -include_lib("erlsom/src/erlsom.hrl").
9 | -include_lib("erlsom/src/erlsom_parse.hrl").
10 |
11 | -define(XSD_FILE, ["gexf", "schema", "gexf.xsd"]).
12 | -define(INCLUDE_PATHS, [["gexf", "schema"]]).
13 |
14 | compile_schema_test() ->
15 | {ok, _Model} = erlsom_tests:compile_xsd(?XSD_FILE, ?INCLUDE_PATHS).
16 |
17 | unique_namespaces_test() ->
18 | {ok, Model} = erlsom_tests:compile_xsd(?XSD_FILE, ?INCLUDE_PATHS),
19 | Namespaces = Model#model.nss,
20 | ?assertEqual(lists:usort(Namespaces), Namespaces),
21 | ok.
22 |
23 | parse_file_test() ->
24 | {ok, Model} = erlsom_tests:compile_xsd(?XSD_FILE, ?INCLUDE_PATHS),
25 | {ok, _Tree} = erlsom_tests:parse_file(["gexf", "data", "test.gexf"], Model),
26 | ok.
27 |
28 |
29 | %% @doc makeAttrRef returns ":parent-content". It is an error.
30 | leading_ns_delimeter_test_() ->
31 | NS = [#ns{uri = "http://www.gexf.net/1.2draft", prefix = ""}],
32 | Ref = #qname{uri = "http://www.gexf.net/1.2draft",
33 | localPart = "parent-content",
34 | prefix = "ns1",
35 | mappedPrefix = []},
36 | [?_assertEqual("parent-content", erlsom_lib:makeAttrRef(Ref, NS))].
37 |
38 |
39 | stability_test_() ->
40 | [{T,
41 | erlsom_tests:verify_stability_(
42 | ?XSD_FILE, ["gexf", "data", T], ?INCLUDE_PATHS)}
43 | || T <- ["test.gexf", "basic.gexf", "data.gexf", "dynamics.gexf"]].
44 |
--------------------------------------------------------------------------------
/priv/gexf/schema/dynamics.xsd:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
--------------------------------------------------------------------------------
/priv/gexf/data/data.gexf:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Gephi.org
5 | A Web network
6 |
7 |
8 |
9 |
10 |
11 |
12 | true
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
--------------------------------------------------------------------------------
/priv/gexf/data/test.gexf:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Gephi.org
5 | A Web network
6 |
7 |
8 |
9 |
10 |
11 |
12 | true
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
--------------------------------------------------------------------------------
/examples/erlsom_example/erlsom_example.erl:
--------------------------------------------------------------------------------
1 | %%% a simple example of the use of erlsom.
2 | %%%
3 | -module(erlsom_example).
4 |
5 | %% user interface
6 | -export([run/0]).
7 |
8 | %% define records
9 | -record('in:arguments', {anyAttribs, values, precision}).
10 | -record('out:resultType', {anyAttribs, result}).
11 | -record('out:resultType-error', {anyAttribs, error}).
12 | -record('out:resultType-okResult', {anyAttribs, value}).
13 | -record('out:errorType', {anyAttribs, errorCode, errorDescription}).
14 |
15 | run() ->
16 | %% compile xsd
17 | {ok, ModelIn} = erlsom:compile_xsd_file(example_in_xsd(), [{prefix, "in"},
18 | {strict, false}]),
19 | {ok, ModelOut} = erlsom:compile_xsd_file(example_out_xsd(),[{prefix, "out"},
20 | {strict, false}]),
21 |
22 | %% parse xml
23 | {ok, Input, _} = erlsom:scan_file(example_in_xml(), ModelIn),
24 |
25 | %% do something with the content
26 | case Input of
27 | #'in:arguments'{values = undefined} ->
28 | Error = #'out:errorType'{errorCode = "01",
29 | errorDescription = "No arguments provided"},
30 | Result = #'out:resultType-error'{error = Error};
31 | #'in:arguments'{values = List, precision = Precision} ->
32 | Result = #'out:resultType-okResult'{value = calcAverage(List, Precision)}
33 | end,
34 |
35 | %% generate xml.
36 | Response = #'out:resultType'{result=Result},
37 | XmlResult = erlsom:write(Response, ModelOut),
38 | io:format("Result: ~p~n", [XmlResult]),
39 | ok.
40 |
41 | calcAverage(List, Precision) ->
42 | calcAverage(List, Precision, 0, 0).
43 | calcAverage([], Precision, Acc, NrOfElements) ->
44 | lists:flatten(io_lib:format("~.*f", [Precision, Acc/NrOfElements]));
45 | calcAverage([Head|Tail], Precision, Acc, NrOfElements) ->
46 | calcAverage(Tail, Precision, Acc + Head, NrOfElements + 1).
47 |
48 | %% this is just to make it easier to test this little example
49 | example_in_xsd() -> filename:join([codeDir(), "example_in.xsd"]).
50 | example_out_xsd() -> filename:join([codeDir(), "example_out.xsd"]).
51 | example_in_xml() -> filename:join([codeDir(), "example_in.xml"]).
52 | codeDir() -> filename:dirname(code:which(?MODULE)).
53 |
54 |
--------------------------------------------------------------------------------
/examples/example1/example1.erl:
--------------------------------------------------------------------------------
1 | -module(example1).
2 | -export([test_erlsom/1]).
3 | -export([run/0]).
4 | %% this example has 2 purposes:
5 | %%
6 | %% - It shows how easy Erlsom makes it for you to use an XML configuration file.
7 | %% The configuration file describes a set of 10 test cases, which are run by
8 | %% this example. The configuration file is described by "example1.xsd".
9 | %% Compiling this XSD and then parsing the configuration file ("example1.xml")
10 | %% gives you access to an Erlang structure of records that corresponds with the
11 | %% XML schema.
12 | %%
13 | %%- It shows how 11 different schemas (names "abb1.xsd" through "abb11.xsd") can
14 | %% describe the same XML document (named "abb.xml"), and it shows the output
15 | %% that results from running Erlsom on this file using these schema’s. To run
16 | %% the example for XSD abb1.xsd, use the command example1:test_erlsom("abb1").
17 |
18 |
19 | %% example1.hrl contains the record definitions.
20 | %% It was generated using erlsom:writeHrl
21 | -include("example1.hrl").
22 | run() ->
23 | test_erlsom("abb11").
24 |
25 | test_erlsom(Test) ->
26 | XsdFile = filename:join([codeDir(), "example1.xsd"]),
27 | {ok, Model} = erlsom:compile_xsd_file(XsdFile),
28 | Xml = filename:join([codeDir(), "example1.xml"]),
29 | {ok, TestSuite, _} = erlsom:scan_file(Xml, Model),
30 | Cases = TestSuite#testConfig.'case',
31 | Dir = codeDir(),
32 | case findCase(Cases, Test) of
33 | {ok, Case} -> execute_case(Case, Dir);
34 | _Else -> io:format("Case not found\n", [])
35 | end.
36 |
37 | execute_case(#'case'{name=Name, xsd=XSD, xml=XML}, Path) ->
38 | io:format("example: ~p\n", [Name]),
39 | XsdFile = filename:join([Path, XSD]),
40 | io:format("compiling xsd ~p...\n", [XsdFile]),
41 | Result = erlsom:compile_file(XsdFile, []),
42 | case Result of
43 | {error, Message} ->
44 | io:format("XSD error: ~p\n", [Message]);
45 | {ok, Model} ->
46 | Xml = filename:join([Path, XML]),
47 | erlsom:parse_file(Xml, Model)
48 | end.
49 |
50 | findCase([], _Name) ->
51 | false;
52 | findCase([Case = #'case'{name=Name}| _], Name) ->
53 | {ok, Case};
54 | findCase([_| Tail], Name) ->
55 | findCase(Tail, Name).
56 |
57 | %% this is just to make it easier to test this little example
58 | codeDir() -> filename:dirname(code:which(?MODULE)).
59 |
--------------------------------------------------------------------------------
/examples/continuation/BookStore.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | My Life and Times
5 | Paul McCartney
6 | 1998
7 | McMillin publishing
8 |
9 |
10 | Het zijn net mensen
11 | Joris Luyendijk
12 | 2006
13 | Podium
14 |
15 |
16 | Tannöd
17 | Andrea Maria Schenkel
18 | 2006
19 | btb
20 |
21 |
22 | 33 Augenblicke des Glücks
23 | Ingo Schulze
24 | 1995
25 | Süddeutsche Zeitung
26 |
27 |
28 | Portnoy's Complaint
29 | Philip Roth
30 | 1993
31 | Vintage
32 |
33 |
34 | Der Schwimmer
35 | Zsusza Bánk
36 | 2002
37 | Fischer Verlag
38 |
39 |
40 | My Life and Times
41 | Paul McCartney
42 | 1998
43 | McMillin publishing
44 |
45 |
46 | Illusions The Adventures of a Reluctant Messiah
47 | Richard Bach
48 | 1977
49 | Dell publishing Co.
50 |
51 |
52 | The First and Last Freedom
53 | J. Krishnamurti
54 | Harper & Row
55 |
56 |
57 |
--------------------------------------------------------------------------------
/priv/gexf/data/dynamics.gexf:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Gexf.net
5 | A Web network changing over time
6 |
7 |
8 |
9 |
10 |
11 | true
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
--------------------------------------------------------------------------------
/examples/continuation/itunes_example.xml:
--------------------------------------------------------------------------------
1 |
2 |
4 |
5 |
6 |
7 | Major Version1
8 | Minor Version1
9 | Application Version4.6
10 | Music Folder
11 | file://localhost/Users/niel/Music/iTunes/iTunes%20Music/
12 | Library Persistent IDŒŒŒŒŒŒŒŒŒŒ
13 | Tracks
14 |
15 |
16 |
17 | 35
18 |
19 | Track ID35
20 | NameGula Gula
21 | ArtistJan Garbarek
22 | ComposerMari Boine Persen, arr Jan Garbarek
23 | AlbumI Took Up The Runes & look what I got & look where I am now
24 | GenreJazz
25 | KindAAC audio file
26 | Size5892093
27 | Total Time363578
28 | Disc Number1
29 | Disc Count1
30 | Track Number1
31 | Track Count10
32 | Year1990
33 | Date Modified2005-06-06T04:11:43Z
34 | Date Added2005-06-06T04:11:18Z
35 | Bit Rate128
36 | Sample Rate44100
37 | Track TypeFile
38 | Locationfile://localhost/C:/Documents%20and%20Settings/User1/My%20Documents/My%20Music/iTunes/iTunes%20Music/Jan%20Garbarek/I%20Took%20Up%20The%20Runes/01%20Gula%20Gula.m4a/
39 | File Folder Count4
40 | Library Folder Count1
41 |
42 |
43 | 35
44 |
45 | Track ID35
46 | NameGula Gula
47 | ArtistJan Garbarek
48 | ComposerMari Boine Persen, arr Jan Garbarek
49 | AlbumI Took Up The Runes & look what I got & look where I am now
50 | File Folder Count4
51 | Library Folder Count1
52 |
53 |
54 | 36
55 |
56 | Track ID36
57 | NameGula Gula
58 | ArtistJan Garbarek
59 | ComposerMari Boine Persen, arr Jan Garbarek
60 | AlbumI Took Up The Runes & look what I got & look where I am now
61 | File Folder Count4
62 | Library Folder Count1
63 |
64 |
65 |
66 |
67 |
68 |
--------------------------------------------------------------------------------
/src/erlsom_add.erl:
--------------------------------------------------------------------------------
1 | %%% Copyright (C) 2006 - 2008 Willem de Jong
2 | %%%
3 | %%% This file is part of Erlsom.
4 | %%%
5 | %%% Erlsom is free software: you can redistribute it and/or modify
6 | %%% it under the terms of the GNU Lesser General Public License as
7 | %%% published by the Free Software Foundation, either version 3 of
8 | %%% the License, or (at your option) any later version.
9 | %%%
10 | %%% Erlsom is distributed in the hope that it will be useful,
11 | %%% but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | %%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | %%% GNU Lesser General Public License for more details.
14 | %%%
15 | %%% You should have received a copy of the GNU Lesser General Public
16 | %%% License along with Erlsom. If not, see
17 | %%% .
18 | %%%
19 | %%% Author contact: w.a.de.jong@gmail.com
20 |
21 | %%% ====================================================================
22 | %%% adds an XSD to an existing Erlsom Model
23 | %%% ====================================================================
24 |
25 | %%% Adds an XSD/namespace to an existing model. This is useful only if the
26 | %%% existing model contains 'any' elements that have to be parsed. A typical
27 | %%% example is the soap envelope. In order to parse the body, the parser needs
28 | %%% to know it's 'model'.
29 |
30 | %%% Compiles the model for the imported xsd, adds the types to the existing
31 | %%% model, adds all the alternatives from the _document element to the
32 | %%% _document element of the existing model, adds the namespaces, and finally
33 | %%% updates the alternatives for all 'any' types in the model.
34 |
35 | -module(erlsom_add).
36 | -export([add/3]).
37 | -export([add_xsd_model/1]).
38 | -export([add_model/2]).
39 |
40 | -include("erlsom_parse.hrl").
41 | -include("erlsom_compile.hrl").
42 |
43 | %% debug(Text) ->
44 | %% io:format("~p\n", [Text]).
45 |
46 |
47 | %% -record(model, {tps, nss, tns}).
48 | %% -record(type, {nm, tp = sequence, els, atts = [], anyAttr, nillable, nr, mn = 1, mx = 1}).
49 | %% -record(el, {alts, mn = 1, mx = 1, nr}).
50 |
51 | %% Returns the new #model.
52 | add(Xsd, Options, Model1) ->
53 | {ok, Model2} = erlsom:compile_xsd(Xsd, Options),
54 | add_model(Model1, Model2).
55 |
56 | add_xsd_model(Model1) ->
57 | add_model(Model1, erlsom_parseXsd:xsdModel()).
58 |
59 | add_model(Model1 = #model{tps = Tps, nss = Nss, tns = Tns, th = Th},
60 | _Model2 = #model{tps = NewTps, nss = NewNss, th = NewTh}) ->
61 | [Document | OtherTypes] = Tps,
62 | #type{nm = '_document', els = [Element]} = Document,
63 | #el{alts = Alts} = Element,
64 |
65 | [New_Document | OtherNewTypes] = NewTps,
66 | #type{nm = '_document', els = [NewElement]} = New_Document,
67 | #el{alts = NewAlts} = NewElement,
68 |
69 | CombinedAlts = lists:umerge(lists:usort(Alts), lists:usort(NewAlts)),
70 | CombinedElement = Element#el{alts = CombinedAlts},
71 | CombinedDocument = Document#type{els = [CombinedElement]},
72 | CombinedTypes = [CombinedDocument | lists:umerge(lists:usort(OtherTypes), lists:usort(OtherNewTypes))],
73 | CombinedNss = lists:umerge(lists:usort(Nss), lists:usort(NewNss)),
74 | CombinedTh = lists:umerge(lists:usort(Th), lists:usort(NewTh)),
75 |
76 | Info = #schemaInfo{namespaces = CombinedNss, targetNamespace = Tns},
77 |
78 | UpdatedTypes = erlsom_pass2:pass5(CombinedTypes, Info),
79 |
80 | Model1#model{tps = UpdatedTypes, nss = CombinedNss, th = CombinedTh}.
81 |
82 |
--------------------------------------------------------------------------------
/examples/soap_example/soap_example.erl:
--------------------------------------------------------------------------------
1 | %%% a simple example of the use of erlsom.
2 | %%%
3 | -module(soap_example).
4 | -include("erlsom.hrl").
5 |
6 | %% user interface
7 | -export([run/0]).
8 |
9 | %% define records (generated by writeHrl)
10 | -record('in:arguments', {anyAttribs, values, precision}).
11 | -record('out:resultType', {anyAttribs, result}).
12 | -record('out:resultType-error', {anyAttribs, error}).
13 | -record('out:resultType-okResult', {anyAttribs, value}).
14 | -record('out:errorType', {anyAttribs, errorCode, errorDescription}).
15 |
16 | -record('sp:UpgradeType', {anyAttribs, 'SupportedEnvelope'}).
17 | -record('sp:SupportedEnvType', {anyAttribs, 'qname'}).
18 | -record('sp:NotUnderstoodType', {anyAttribs, 'qname'}).
19 | -record('sp:detail', {anyAttribs, choice}).
20 | -record('sp:subcode', {anyAttribs, 'Value', 'Subcode'}).
21 | -record('sp:faultcode', {anyAttribs, 'Value', 'Subcode'}).
22 | -record('sp:reasontext', {anyAttribs, 'xml:lang', '#text'}).
23 | -record('sp:faultreason', {anyAttribs, 'Text'}).
24 | -record('sp:Fault', {anyAttribs, 'Code', 'Reason', 'Node', 'Role', 'Detail'}).
25 | -record('sp:Body', {anyAttribs, choice}).
26 | -record('sp:Header', {anyAttribs, choice}).
27 | -record('sp:Envelope', {anyAttribs, 'Header', 'Body'}).
28 |
29 | run() ->
30 | {ModelIn, ModelOut} = compileXSDs(),
31 |
32 | %% parse xml
33 | Xml = filename:join([codeDir(), "example_in.xml"]),
34 | Result = case erlsom:scan_file(Xml, ModelIn) of
35 | {ok, #'sp:Envelope'{'Body' = #'sp:Body'{choice = Content}}, _} ->
36 | processContent(Content);
37 | {error, _} ->
38 | soapError("Sender", "Incorrect message")
39 | end,
40 |
41 | %% add envelope
42 | Response = #'sp:Envelope'{'Body' = #'sp:Body'{choice = Result}},
43 | %% generate xml.
44 | erlsom:write(Response, ModelOut).
45 |
46 |
47 | processContent(Content) ->
48 | %% do something with the content
49 | case Content of
50 | [#'in:arguments'{values = undefined}] ->
51 | soapError("sp:Sender", "No arguments provided");
52 | [#'in:arguments'{values = List, precision = Precision}] ->
53 | Result = #'out:resultType-okResult'{value = calcAverage(List, Precision)},
54 | [#'out:resultType'{result=Result}];
55 | _ ->
56 | soapError("sp:Sender", "Unexpected error")
57 | end.
58 |
59 |
60 | soapError(Code, Reason) ->
61 | FaultCode = #'sp:faultcode'{'Value' = Code},
62 | ReasonRec = #'sp:faultreason'{'Text' = [#'sp:reasontext'{'xml:lang' = "EN", '#text' = Reason}]},
63 | [#'sp:Fault'{'Code' = FaultCode, 'Reason' = ReasonRec}].
64 |
65 |
66 | compileXSDs() ->
67 | EnvelopeXsd = filename:join([codeDir(), "soap-envelope.xsd"]),
68 | BodyXsd = filename:join([codeDir(), "example_in.xsd"]),
69 | ResultXsd = filename:join([codeDir(), "example_out.xsd"]),
70 | {ok, SoapModel} = erlsom:compile_xsd_file(EnvelopeXsd, [{prefix, "sp"}, {strict, false}]),
71 | {ok, ModelIn} = erlsom:add_xsd_file(BodyXsd, [{prefix, "in"}, {strict, false}], SoapModel),
72 | {ok, ModelOut} = erlsom:add_xsd_file(ResultXsd, [{prefix, "out"}, {strict, false}], SoapModel),
73 | {ModelIn, ModelOut}.
74 |
75 | calcAverage(List, Precision) ->
76 | calcAverage(List, Precision, 0, 0).
77 | calcAverage([], Precision, Acc, NrOfElements) ->
78 | lists:flatten(io_lib:format("~.*f", [Precision, Acc/NrOfElements]));
79 | calcAverage([Head|Tail], Precision, Acc, NrOfElements) ->
80 | calcAverage(Tail, Precision, Acc + Head, NrOfElements + 1).
81 |
82 | %% this is just to make it easier to test this little example
83 | codeDir() -> filename:dirname(code:which(?MODULE)).
84 |
--------------------------------------------------------------------------------
/priv/gexf/schema/gexf.xsd:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 | Tree
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 | Datatypes
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
--------------------------------------------------------------------------------
/src/erlsom_sax.hrl:
--------------------------------------------------------------------------------
1 | %%% Copyright (C) 2006 - 2008 Willem de Jong
2 | %%%
3 | %%% This file is part of Erlsom.
4 | %%%
5 | %%% Erlsom is free software: you can redistribute it and/or modify
6 | %%% it under the terms of the GNU Lesser General Public License as
7 | %%% published by the Free Software Foundation, either version 3 of
8 | %%% the License, or (at your option) any later version.
9 | %%%
10 | %%% Erlsom is distributed in the hope that it will be useful,
11 | %%% but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | %%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | %%% GNU Lesser General Public License for more details.
14 | %%%
15 | %%% You should have received a copy of the GNU Lesser General Public
16 | %%% License along with Erlsom. If not, see
17 | %%% .
18 | %%%
19 | %%% Author contact: w.a.de.jong@gmail.com
20 |
21 | %%% ====================================================================
22 | %%% data structures produced by erlsom_sax
23 | %%% ====================================================================
24 |
25 | %% data structures produced by sax.erl.
26 |
27 | -record(attribute, {localName, prefix = [], uri = [], value}).
28 |
29 | -record(erlsom_sax_state,
30 | {user_state,
31 | callback,
32 | encoding, %% of input document
33 | continuation_state,
34 | entities = [],
35 | par_entities = [],
36 | current_entity = '__top',
37 | namespaces = [],
38 | endtags = [],
39 | output, %% determines the encoding of text and attribute values
40 | expand_entities = true, %% if false, user defined entities will
41 | %% be ignored in the DTD, and use of entities
42 | %% will fail.
43 | max_entity_depth = 2, %% Maximum level of nesting of entities. 2 means: an
44 | %% an entity can refer to 1 or more other entities,
45 | %% but none of those can contain entity references.
46 | max_entity_size = 2000, %% Maximum size of a single entity
47 | max_nr_of_entities = 100, %% Maximum number of entities that can be defined.
48 | %% Note that a large number can lead to long
49 | %% processing to find cycles, unless max depth has
50 | %% been set to a small number.
51 | max_expanded_entity_size = 10000000, %% Maximum total number of bytes of all
52 | %% expanded entities together.
53 | entity_size_acc = 0, %% accumulated size of entities
54 | continuation_fun,
55 | %% entity_relations is used to check on circular definitions
56 | entity_relations = []}).
57 |
58 | %% useful macro approach copied from xmerl
59 | -define(space, 32).
60 | -define(cr, 13).
61 | -define(lf, 10).
62 | -define(tab, 9).
63 |
64 | %% whitespace consists of 'space', 'carriage return', 'line feed' or 'tab'
65 | -define(is_whitespace(C),
66 | C =:= ?space; C =:= ?cr ; C =:= ?lf; C =:= ?tab).
67 |
68 | -define(is_namestart_char(C),
69 | C > 96, C < 123; C > 64, C < 91; C =:= $_).
70 |
71 | -define(is_namestart_char2(C), %% also for characters <> 7 bit ascii
72 | C > 96, C < 123; C > 64, C < 91; C =:= $_;
73 | C > 191, C =/= 215, C =/= 247). %% this check is far from complete!
74 |
75 | -define(is_name_char(C),
76 | C > 96, C < 123;
77 | C > 64, C < 91;
78 | C > 47, C < 58;
79 | C =:= $_;
80 | C =:= $-;
81 | C =:= $.).
82 |
83 | -define(is_name_char2(C), %% also for characters <> 7 bit ascii
84 | C > 96, C < 123;
85 | C > 64, C < 91;
86 | C > 47, C < 58;
87 | C =:= $_;
88 | C =:= $-;
89 | C =:= $.;
90 | C > 191, C /= 215, C /= 247). %% this check is far from complete!
91 |
--------------------------------------------------------------------------------
/examples/erlsom_sax_example/search_request.xsd:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
--------------------------------------------------------------------------------
/src/erlsom_parse.hrl:
--------------------------------------------------------------------------------
1 | %%% Copyright (C) 2006 - 2008 Willem de Jong
2 | %%%
3 | %%% This file is part of Erlsom.
4 | %%%
5 | %%% Erlsom is free software: you can redistribute it and/or modify
6 | %%% it under the terms of the GNU Lesser General Public License as
7 | %%% published by the Free Software Foundation, either version 3 of
8 | %%% the License, or (at your option) any later version.
9 | %%%
10 | %%% Erlsom is distributed in the hope that it will be useful,
11 | %%% but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | %%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | %%% GNU Lesser General Public License for more details.
14 | %%%
15 | %%% You should have received a copy of the GNU Lesser General Public
16 | %%% License along with Erlsom. If not, see
17 | %%% .
18 | %%%
19 | %%% Author contact: w.a.de.jong@gmail.com
20 |
21 | %%% ====================================================================
22 | %%% header file for erslom_parse
23 | %%% ====================================================================
24 |
25 | %% header file for erlsom_parse. Contains the record definitions.
26 |
27 | %% the records that form the model - see erlsom_parse.erl for a
28 | %% description.
29 |
30 | -record(model, {tps, nss,
31 | tns, %% target namespace (the URI, a string)
32 | th, %% type hierarchy, see 'tree'-functions in erlsom_lib
33 | any_attribs, %% Include "any_atributes" (i.e. attributes that
34 | %% have not been explicitly declared in the XSD)
35 | %% in the result. If set to true these will be in
36 | %% the second element of the record.
37 | value_fun %% Function that is called after the parsing
38 | %% of a complex type (resulting in creation of a
39 | %% record) has been created. Can be used
40 | %% to modify the value (or for side effects).
41 | }).
42 | -record(type, {nm, tp = sequence, els, atts = [], anyAttr, nillable, nr,
43 | mn = 1, mx = 1, mxd = false, %% mn & mx are only used by erlsom_compile
44 | typeName}). %% typeName is the 'real' name, to be used in xsi:type attributes
45 | %% for derived types. The 'nm' field is actually a key, which may
46 | %% include an additional prefix to differentiate between elements, types
47 | %% and groups.
48 | -record(el, {alts, mn = 1, mx = 1, nillable,
49 | nr %% nr is actually the position of the value in the
50 | %% result record.
51 | }).
52 | -record(alt, {tag, tp, nxt = [], mn = 1, mx = 1, rl = true, anyInfo}).
53 | -record(att, {nm, nr, opt, tp}).
54 | %% -record(ns, {uri, pf}).
55 | -record(elInfo, {anyAttr}).
56 | -record(anyAttr, {prCont, ns, tns}). %% for anyAttributes
57 | -record(anyInfo, {prCont, ns, tns}). %% for any elements
58 |
59 | -record(state, {currentState, resultSoFar, model, namespaces,
60 | allNamespaces, continuationState, value_acc = [],
61 | value_fun}).
62 |
63 | -record(cs, {re, %% remaining elements
64 | sf, %% nr of elements of the current type received so far
65 | er, %% element record: the result (so far) for this type
66 | rl, %% 'real element': do we expect an end-tag?
67 | mxd}). %% is this a mixed type?
68 |
69 | -record(all, {re, %% remaining elements
70 | nr, %% the sequence number of the current element
71 | er}). %% element record: the result (so far) for this type
72 |
73 | %% altState is used for parsing alternatives within
74 | %% a choice that can occur more than once
75 | -record(altState, {name, %% the tag we are processing
76 | type, %% the type of this element
77 | real, %% is this a 'real' element or a group ref
78 | receivedSoFar, %% number of elements received
79 | acc, %% values of elements already processed
80 | min, %% minOccurs
81 | max}). %% maxOccurs
82 |
83 | -record(anyState, {anyInfo}).
84 |
--------------------------------------------------------------------------------
/examples/continuation/continuation_example.erl:
--------------------------------------------------------------------------------
1 | -module(continuation_example).
2 | %% Example to show how the Erlsom Sax parser can be used in combination
3 | %% with a 'continuation function'. This enables parsing of very big documents
4 | %% in a sort of streaming mode.
5 | %%
6 | %% When the sax parser reaches the end of a block of data, it calls the
7 | %% continuation function. This should return the next block of data.
8 | %%
9 | %% the continuation function is a function that takes 2 arguments: Tail and
10 | %% State.
11 | %% - Tail is the (short) list of characters that could not yet be parsed
12 | %% because it might be a special token or not. Since this still has to
13 | %% be parsed, it should be put in front of the next block of data.
14 | %% - State is information that is passed by the parser to the callback
15 | %% functions transparently. This can be used to keep track of the
16 | %% location in the file etc.
17 | %% The function returns {NewData, NewState}, where NewData is a list of
18 | %% characters/unicode code points, and NewState the new value for the State.
19 |
20 | -export([run/0]).
21 |
22 | %% 'chunk' is the number of characters that is read at a time.
23 | %% should be tuned for the best result. (109 is obviously not a good value,
24 | %% it should be bigger than that - try it out).
25 | -define(chunk, 109).
26 |
27 | run() ->
28 | F = fun count_books/2, %% the callback function that handles the sax events
29 | G = fun continue_file/2, %% the callback function that returns the next
30 | %% chunk of data
31 | %% open file
32 | {ok, Handle} = file:open(xml(), [read, raw, binary]),
33 | Position = 0,
34 | CState = {Handle, Position, ?chunk},
35 | SaxCallbackState = undefined,
36 | %% erlsom:parse_sax() returns {ok, FinalState, TrailingBytes},
37 | %% where TrailingBytes is the rest of the input-document
38 | %% that follows after the last closing tag of the XML, and Result
39 | %% is the value of the State after processing the last SAX event.
40 | {ok, Result, _TrailingBytes} =
41 | erlsom:parse_sax(<<>>, SaxCallbackState, F,
42 | [{continuation_function, G, CState}]),
43 | %% close file
44 | ok = file:close(Handle),
45 |
46 | %% Result is a list [{track_id, count}, ...]
47 | lists:foreach(fun({Date, Count}) ->
48 | io:format("Date: ~p - count: ~p~n", [Date, Count])
49 | end, Result),
50 | ok.
51 |
52 | %% this is a continuation function that reads chunks of data
53 | %% from a file.
54 | continue_file(Tail, {Handle, Offset, Chunk}) ->
55 | %% read the next chunk
56 | case file:pread(Handle, Offset, Chunk) of
57 | {ok, Data} ->
58 | {<>, {Handle, Offset + Chunk, Chunk}};
59 | eof ->
60 | {Tail, {Handle, Offset, Chunk}}
61 | end.
62 |
63 | %% This function is specific for the example. It counts the number
64 | %% of books per year.
65 | %%
66 | %% The input is the sax-event and the state.
67 | %% The output is the new state.
68 | %%
69 | %% The state consists of a stack that corresponds to
70 | %% the level in the XML, and an accumulator for the result: [{Date, Count}].
71 | %% Additionally there is a field 'element_acc' which contains
72 | %% an intermediate result while parsing character data,
73 | %% because there can be more than 1 character event per element (in theory).
74 | -record(state, {stack = [], acc = [], element_acc = ""}).
75 | count_books(startDocument, _) ->
76 | #state{};
77 | count_books({startElement, _, Tag, _, _}, #state{stack = Stack} = State) ->
78 | State#state{stack = [Tag | Stack]};
79 | count_books({characters, Value},
80 | #state{stack = ["date", "book", "book_store"],
81 | element_acc = ElementAcc} = State)->
82 | State#state{element_acc = ElementAcc ++ Value};
83 | count_books({endElement, _, _, _},
84 | #state{stack = ["date" | Tail],
85 | acc = Acc,
86 | element_acc = Value} = State)->
87 | State#state{stack = Tail, acc = processBook(Acc, Value), element_acc = ""};
88 | count_books({endElement, _, _, _}, #state{stack = [_ | Tail]} = State)->
89 | State#state{stack = Tail};
90 | count_books(endDocument, #state{acc = Acc})->
91 | Acc;
92 | count_books(_, S) -> S.
93 |
94 | processBook(List, Date) ->
95 | case lists:keysearch(Date, 1, List) of
96 | false ->
97 | [{Date, 1} | List];
98 | {value, {_, Count}} ->
99 | lists:keyreplace(Date, 1, List, {Date, Count + 1})
100 | end.
101 |
102 | %% this is just to make it easier to test this little example
103 | xml() -> filename:join([codeDir(), "BookStore.xml"]).
104 | codeDir() -> filename:dirname(code:which(?MODULE)).
105 |
--------------------------------------------------------------------------------
/priv/gexf/schema/viz.xsd:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
--------------------------------------------------------------------------------
/src/erlsom_compile.hrl:
--------------------------------------------------------------------------------
1 | %%% Copyright (C) 2006 - 2008 Willem de Jong
2 | %%%
3 | %%% This file is part of Erlsom.
4 | %%%
5 | %%% Erlsom is free software: you can redistribute it and/or modify
6 | %%% it under the terms of the GNU Lesser General Public License as
7 | %%% published by the Free Software Foundation, either version 3 of
8 | %%% the License, or (at your option) any later version.
9 | %%%
10 | %%% Erlsom is distributed in the hope that it will be useful,
11 | %%% but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | %%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | %%% GNU Lesser General Public License for more details.
14 | %%%
15 | %%% You should have received a copy of the GNU Lesser General Public
16 | %%% License along with Erlsom. If not, see
17 | %%% .
18 | %%%
19 | %%% Author contact: w.a.de.jong@gmail.com
20 |
21 | %%% ====================================================================
22 | %%% Header file for erlsom_compile
23 | %%% ====================================================================
24 |
25 |
26 | %% records for the structures as found in the XSD
27 | -record(schemaType,
28 | {elInfo, targetNamespace, elementFormDefault, attributeFormDefault, blockDefault, finalDefault,
29 | version, id, imports, elements}).
30 | -record(importType, {elInfo, id, namespace, schemaLocation, annotation}).
31 | -record(includeType, {elInfo, id, schemaLocation, annotation}).
32 | -record(redefineType, {elInfo, id, schemaLocation, elements}).
33 | -record(globalElementType, {elInfo, name, type, default, fixed, id, abstract, substitutionGroup, final, nillable,
34 | annotation, simpleOrComplex, unique}).
35 | -record(localElementType, {elInfo, name, type, default, fixed, form, ref, minOccurs, maxOccurs, nillable, annotation,
36 | simpleOrComplex, unique}).
37 | -record(globalComplexTypeType, {elInfo, name, final, abstract, block, mixed, id, annotation, model, attributes, anyAttribute}).
38 | -record(localComplexTypeType, {elInfo, mixed, annotation, model, attributes, anyAttribute}).
39 | -record(globalSimpleTypeType, {elInfo, name, id, final, annotation, model}).
40 | -record(localSimpleTypeType, {elInfo, annotation, model}).
41 | -record(simpleContentType, {elInfo, annotation, model, id}).
42 | -record(groupDefType, {elInfo, name, annotation, model}).
43 | -record(groupRefType, {elInfo, ref, minOccurs, maxOccurs}).
44 | -record(annotationType, {elInfo, annotation}).
45 | -record(globalAttributeType, {elInfo, name, type, use, fixed, default, id, model}).
46 | -record(localAttributeType, {elInfo, name, type, use, ref, fixed, form, default, model}).
47 | -record(choiceType, {elInfo, id, minOccurs, maxOccurs, annotation, alternatives}).
48 | -record(sequenceType, {elInfo, annotation, elements, minOccurs, maxOccurs}).
49 | -record(allType, {elInfo, annotation, elements, minOccurs, maxOccurs}).
50 | -record(attributeGroupDefType, {elInfo, id, name, annotation, attributes, anyAttribute}).
51 | -record(attributeGroupRefType, {elInfo, ref, id}).
52 | -record(anyType, {elInfo, any, minOccurs, maxOccurs, namespace, processContents}).
53 | -record(anyAttributeType, {elInfo, id, namespace, processContents, annotation}).
54 | -record(extensionType, {elInfo, base, annotation, attributes, anyAttribute}).
55 | -record(extensionTypeC, {elInfo, base, annotation, model, attributes, anyAttribute}).
56 | -record(restrictionType, {elInfo, annotation, any, attributes, anyAttribute, base}).
57 | -record(restrictionTypeC, {elInfo, base, annotation, model, attributes, anyAttribute}).
58 | -record(complexContentType, {elInfo, annotation, model, mixed}).
59 |
60 | %% This is added to the XSD to allow generation of an XML document
61 | -record(namespaceType, {prefix, 'URI'}).
62 |
63 | %% the rest is for internal use in the translation of the XSD to the
64 | %% format used by the parser
65 | %% path is used to give local elements a unique name (the 'path' to the element)
66 | -record(schemaInfo, {targetNamespace, elementFormDefault, namespacePrefix, namespaces,
67 | path=[], attGrps, atts, th,
68 | strict = false :: boolean(), %% enforce additional type checks/conversions
69 | include_any_attrs = false, %% if true, the second element in the result
70 | %% types will be used for
71 | %% attributes that were not explicitly declared
72 | value_fun %% Function that is called after the parsing
73 | %% of a complex type (resulting in creation of a
74 | %% record) has been created. Can be used
75 | %% to modify the value (or for side effects).
76 | }).
77 |
78 | %% typeInfo - the intermediate format.
79 | %% global (true or false): we need to find out in the
80 | %% end whether this type should be available as 'top level' element in the
81 | %% xml document.
82 | -record(typeInfo, {typeName,
83 | global,
84 | typeType,
85 | typeRef,
86 | elements,
87 | attributes = [],
88 | anyAttr,
89 | seqOrAll,
90 | extends,
91 | restricts,
92 | mixed,
93 | base,
94 | substitutionGroup,
95 | min = 1,
96 | max = 1}).
97 |
98 | -record(elementInfo, {alternatives, min = 1, max = 1, nillable}).
99 | -record(alternative, {tag, type, real, min = 1, max = 1, anyInfo}).
100 | -record(attrib, {name, optional, type, ref}).
101 | -record(attGrp, {name, atts, anyAttr}).
102 |
--------------------------------------------------------------------------------
/examples/soap_example/soap-envelope.xsd:
--------------------------------------------------------------------------------
1 |
17 |
18 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 | Elements replacing the wildcard MUST be namespace qualified, but can be in the targetNamespace
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
58 |
59 |
60 |
61 |
62 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 | Fault reporting structure
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
105 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
125 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
--------------------------------------------------------------------------------
/test/erlsom_tests.erl:
--------------------------------------------------------------------------------
1 | -module(erlsom_tests).
2 |
3 | %% ------------------------------------------------------------------
4 | %% Tests
5 | %% ------------------------------------------------------------------
6 |
7 | -include_lib("eunit/include/eunit.hrl").
8 | -include("src/erlsom_parse.hrl").
9 | -compile([nowarn_export_all, export_all]).
10 |
11 | gexf_test_() ->
12 | {"Test XML/XSD in GEXF format.", {module, erlsom_gexf_tests}}.
13 |
14 | all_test_() ->
15 | {"Test XSD with the xs:all tag.",
16 | verify_stability_(["all", "all.xsd"],
17 | ["all", "all.xml"],
18 | [])}.
19 |
20 | extension_test_() ->
21 | [{"Test XSD type extensions.",
22 | verify_stability_(["extension", "extension.xsd"],
23 | ["extension", "extension.xml"],
24 | [])},
25 | {"Test simpleContent extension stable",
26 | verify_stability_(["extension", "simpleContentExtension.xsd"],
27 | ["extension", "simpleContentExtension.xml"],
28 | [])},
29 | {"Test simpleContent #text not duplicated", fun () ->
30 | {ok, #model{tps = Types}} = compile_xsd(["extension", "simpleContentExtension.xsd"], []),
31 | ok = lists:foreach(fun (Type = #type{els = Els}) ->
32 | IsTextEl = fun
33 | (#el{alts = [#alt{tag = '#text'}]}) -> true;
34 | (_) -> false
35 | end,
36 | % No duplicate entries for the #text.
37 | % Type bellow is only used to get more informative failure.
38 | case {Type, lists:filter(IsTextEl, Els)} of
39 | {_, []} -> ok;
40 | {_, [_]} -> ok
41 | end
42 | end, Types)
43 | end}].
44 |
45 | %% @doc
46 | %% compile the XSD schema file with the given relative path, example:
47 | %% compile_xsd(["all", "all.xsd"], [])
48 | compile_xsd(Path, IncludePaths) ->
49 | IncludeDirs = [priv_path(I) || I <- IncludePaths],
50 | erlsom:compile_xsd_file(priv_path(Path), [{include_dirs, IncludeDirs}]).
51 |
52 | %% @doc
53 | %% parse an xml document with a compiled XSD model, example:
54 | %% parse_file(["all", "all.xml"], Model)
55 | parse_file(Path, Model) ->
56 | erlsom:parse_file(priv_path(Path), Model).
57 |
58 | %% @doc
59 | %% verify the parser/generator stability, example:
60 | %% verify_stability(["all", "all.xsd"], ["all", "all.xml"], [])
61 | verify_stability(XsdPath, XmlPath, IncludeDirs) ->
62 | {ok, Model} = compile_xsd(XsdPath, IncludeDirs),
63 | {ok, Tree1} = parse_file(XmlPath, Model),
64 | {ok, XML} = erlsom:write(Tree1, Model),
65 | {ok, Tree2} = erlsom:parse(XML, Model),
66 | {lists:last(XmlPath), ?_assertEqual(Tree1, Tree2)}.
67 |
68 | %% @doc
69 | %% test generator function for verify_stability tests
70 | verify_stability_(XsdPath, XmlPath, IncludeDirs) ->
71 | fun() -> verify_stability(XsdPath, XmlPath, IncludeDirs) end.
72 |
73 | priv_path(Path) ->
74 | filename:join([code:priv_dir(erlsom) | Path]).
75 |
76 |
77 | %%
78 | %% Check if xsi:type is parsed and written correctly.
79 | %%
80 | %% This test was introduced to check/fix the following bugs:
81 | %% * The ext namespace was not added to the corresponding element when writing model to the XSD.
82 | %% * The xsi namespace was duplicated with different prefix if the model had xsi namespace defined with other prefix.
83 | %%
84 | %% Before the fix, the `Written' XML was looking like this:
85 | %% ```
86 | %%
92 | %% a
93 | %% b
94 | %%
95 | %% '''
96 | xsi_type_write_test() ->
97 | %
98 | % Parse the XSD model.
99 | {ok, Base} = erlsom:compile_xsd_file(
100 | priv_path(["xsi_type", "base.xsd"]),
101 | [
102 | {include_any_attribs, true},
103 | {prefix, "b"}
104 | ]
105 | ),
106 | {ok, Ext} = erlsom:compile_xsd_file(
107 | priv_path(["xsi_type", "ext.xsd"]),
108 | [
109 | {include_any_attribs, true},
110 | {prefix, "e"},
111 | {include_dirs, [priv_path(["xsi_type"])]},
112 | {include_files, [{"urn:erlsom/xsi_type/base", "b", priv_path(["xsi_type", "base.xsd"])}]}
113 | ]
114 | ),
115 | Model = erlsom:add_model(Base, Ext),
116 | io:format("Model=~p~n", [Model]),
117 | %
118 | % Parse the XML.
119 | {ok, Xml} = file:read_file(priv_path(["xsi_type", "ext.xml"])),
120 | {ok, Parsed1} = erlsom:parse(Xml, Model), io:format("Parsed1=~p~n", [Parsed1]),
121 | {ok, Written} = erlsom:write(Parsed1, Model), io:format("Written=~p~n", [Written]),
122 | {ok, Parsed2} = erlsom:parse(Written, Model), io:format("Parsed2=~p~n", [Parsed2]),
123 | ?assertEqual(
124 | erlang:setelement(2, Parsed1, []), % Compare ignoring the extra attributes, because they
125 | erlang:setelement(2, Parsed2, []) % have type names with prefixes, as defined in the XML.
126 | ).
127 |
128 | % TODO: XSI:type and xsi:nil in one element.
129 |
130 |
131 | %%
132 | %% Check, if document can be parsed in the case, when an element is put to the
133 | %% global namespace explicitly (`xmlns=""') and has derived type specified.
134 | %%
135 | xsi_type_no_prefix_read_test() ->
136 | %
137 | % Parse the XSD model.
138 | {ok, Model} = erlsom:compile_xsd_file(
139 | priv_path(["xsi_type_no_prefix", "test.xsd"]),
140 | [
141 | {include_any_attribs, true},
142 | {prefix, "t"}
143 | ]
144 | ),
145 | io:format("Model=~p~n", [Model]),
146 | %
147 | % Parse the XML.
148 | {ok, Xml} = file:read_file(priv_path(["xsi_type_no_prefix", "test.xml"])),
149 | {ok, Parsed} = erlsom:parse(Xml, Model),
150 | io:format("Parsed=~p~n", [Parsed]),
151 | ?assertMatch({'ExtType', _, "base", "ext"}, Parsed).
152 |
153 |
154 | choice_complex_test() ->
155 | {ok, Xsd} = file:read_file(priv_path(["choice_complex", "choice_complex.xsd"])),
156 | {ok, Xml} = file:read_file(priv_path(["choice_complex", "choice_complex.xml"])),
157 | {ok, Model} = erlsom:compile(Xsd),
158 | {ok, _Data, _} = erlsom:scan(Xml, Model).
159 |
160 |
--------------------------------------------------------------------------------
/examples/erlsom_sax_example/erlsom_simple_form.erl:
--------------------------------------------------------------------------------
1 | %%% translate XML to the 'simple form' as used by XMERL.
2 | %%%
3 | -module(erlsom_simple_form).
4 |
5 | %% user interface
6 | -export([simple_form/1]).
7 | %% with options
8 | -export([simple_form/2]).
9 |
10 | -include_lib("erlsom/src/erlsom_sax.hrl").
11 |
12 | -export([callback/2]).
13 | -export([nameFun/3]).
14 |
15 | -record(sState, {stack, nameFun, options}).
16 |
17 | simple_form(File) ->
18 | simple_form(File, []).
19 |
20 | simple_form(File, Options) ->
21 | case file:read_file(File) of
22 | {ok, Bin} ->
23 | erlsom:sax(binary_to_list(Bin),
24 | #sState{stack = [], nameFun = fun erlsom_simple_form:nameFun/3, options = Options},
25 | fun erlsom_simple_form:callback/2);
26 | Error ->
27 | Error
28 | end.
29 |
30 | callback(Event, State) ->
31 |
32 | %% debugState(State),
33 | %% debugEvent(Event),
34 | try
35 | case Event of
36 | startDocument ->
37 | State;
38 | {startElement, _Uri, _LocalName, _Prefix, _Attributes} ->
39 | %% debug(Event),
40 | startElement(Event, State);
41 | {endElement, _Uri, _LocalName, _Prefix} ->
42 | endElement(Event, State);
43 | {characters, _Characters} ->
44 | characters(Event, State);
45 | {ignorableWhitespace, _Characters} -> State;
46 | {processingInstruction, _Target, _Data} -> State;
47 | {startPrefixMapping, _Prefix, _URI} ->
48 | State;
49 | {endPrefixMapping, _Prefix} ->
50 | State;
51 | endDocument ->
52 | case State of
53 | #sState{stack = [Root]} ->
54 | %% debug(Result),
55 | {ok, Root};
56 | _Else ->
57 | %% debug(State),
58 | throw({error, "unexpected end"})
59 | end;
60 | {error, Message} ->
61 | throw(Message);
62 | {'EXIT', Message} ->
63 | exit(Message)
64 | end
65 | catch
66 | error:Reason -> throwError(error, {Reason,erlang:get_stacktrace()}, Event, State);
67 | Class:Exception -> throwError(Class, Exception, Event, State)
68 | end.
69 |
70 | %% Stack contains the tree that is growing as the elements come in.
71 | %% [{root, [attributes], [element1, element2]},
72 | %% {element3, [attributes], [element3.1, element3.2]},
73 | %% {element3.3, [attributes], [element3.3.1]}] (but in reverse order...)
74 |
75 | %% When a startElement event comes in, add a new element to the stack:
76 | %% [{root, [attributes], [element1, element2]},
77 | %% {element3, [attributes], [element3.1, element3.2]},
78 | %% {element3.3, [attributes], [element3.3.1]},
79 | %% {element3.3.2, [attributes], []}]
80 |
81 | %% When a textElement event comes in, insert it into the top element:
82 | %% [{root, [attributes], [element1, element2]},
83 | %% {element3, [attributes], [element3.1, element3.2]},
84 | %% {element3.3, [attributes], [element3.3.1, element3.3.2]}]
85 | %% {element3.3, [attributes], [element3.3.1]},
86 | %% {element3.3.2, [attributes], [{#text, "the text"}]}]
87 |
88 | %% When an endElement comes in, insert the top element of the stack in the
89 | %% layer below it (its parent):
90 | %% [{root, [attributes], [element1, element2]},
91 | %% {element3, [attributes], [element3.1, element3.2]},
92 | %% {element3.3, [attributes], [element3.3.1, element3.3.2]}]
93 |
94 | startElement({startElement, Uri, LocalName, Prefix, Attributes},
95 | State = #sState{stack = Stack, nameFun = NameFun}) ->
96 | Name = NameFun(LocalName, Uri, Prefix),
97 | State#sState{stack = [{Name, processAttributes(Attributes, State), []} | Stack]}.
98 |
99 | endElement({endElement, _Uri, _LocalName, _Prefix},
100 | State = #sState{stack = [{Name, Attributes, Elements}]}) ->
101 | State#sState{stack = [{Name, Attributes, lists:reverse(Elements)}]};
102 |
103 | endElement({endElement, _Uri, _LocalName, _Prefix},
104 | State) ->
105 | #sState{stack = [{Name, Attributes, Elements} | [{ParentName, ParentAttributes, ParentElements} | Tail]]} = State,
106 | State#sState{stack = [{ParentName,
107 | ParentAttributes,
108 | [{Name, Attributes, lists:reverse(Elements)} | ParentElements]} | Tail]}.
109 |
110 | characters({characters, Characters},
111 | State = #sState{stack = [{Name,
112 | Attributes,
113 | [FirstBit | OtherElements]
114 | } | Tail]})
115 | when is_list(FirstBit) ->
116 | State#sState{stack = [{Name, Attributes, [FirstBit ++ Characters | OtherElements]} | Tail]};
117 | characters({characters, Characters},
118 | State = #sState{stack = [{Name, Attributes, Elements} | Tail]}) ->
119 | State#sState{stack = [{Name, Attributes, [Characters | Elements]} | Tail]}.
120 |
121 | processAttributes(Attributes, State) ->
122 | processAttributes(Attributes, State, []).
123 | processAttributes([], _State, Acc) ->
124 | lists:reverse(Acc);
125 | processAttributes([#attribute{localName=LocalName, uri=Uri, prefix = Prefix, value=Value} | Tail],
126 | State = #sState{nameFun = NameFun},
127 | Acc) ->
128 | processAttributes(Tail, State, [{NameFun(LocalName, Uri, Prefix), Value} | Acc]).
129 |
130 | nameFun(Name, [], _Prefix) ->
131 | Name;
132 | nameFun(Name, Namespace, _Prefix) ->
133 | "{" ++ Namespace ++ "}" ++ Name.
134 |
135 |
136 | throwError(Class, Exception, Event,
137 | #sState{stack = Stack}) ->
138 | %% "Error while parsing type "
139 | %% Take the ElementRecord at current state, and print the first element
140 | Message = [{exception, Exception},
141 | %% for each of the elements in ResultSoFar,
142 | %% take the 'elementRecord' element and print the first element (the type).
143 | {stack, printStackTrace(Stack)},
144 | %% "Received: "
145 | {received, Event}],
146 | case Class of
147 | 'error' -> exit({error, Message});
148 | 'throw' -> throw({error, Message});
149 | 'exit' -> exit({error, Message})
150 | end;
151 |
152 | throwError(Class, Exception, _Event,
153 | _Something) ->
154 | case Class of
155 | 'error' -> exit({error, Exception});
156 | 'throw' -> throw({error, Exception});
157 | 'exit' -> exit({error, Exception})
158 | end.
159 |
160 | printStackTrace(Stack) ->
161 | printStackTrace(Stack, []).
162 | printStackTrace([], Acc) ->
163 | Acc;
164 | printStackTrace([{Name, _, _} | Tail], Acc) ->
165 | printStackTrace(Tail, [{element, Name} | Acc]).
166 |
167 |
--------------------------------------------------------------------------------
/priv/gexf/schema/data.xsd:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
--------------------------------------------------------------------------------
/COPYING.LESSER:
--------------------------------------------------------------------------------
1 | GNU LESSER GENERAL PUBLIC LICENSE
2 | Version 3, 29 June 2007
3 |
4 | Copyright (C) 2007 Free Software Foundation, Inc.
5 | Everyone is permitted to copy and distribute verbatim copies
6 | of this license document, but changing it is not allowed.
7 |
8 |
9 | This version of the GNU Lesser General Public License incorporates
10 | the terms and conditions of version 3 of the GNU General Public
11 | License, supplemented by the additional permissions listed below.
12 |
13 | 0. Additional Definitions.
14 |
15 | As used herein, "this License" refers to version 3 of the GNU Lesser
16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU
17 | General Public License.
18 |
19 | "The Library" refers to a covered work governed by this License,
20 | other than an Application or a Combined Work as defined below.
21 |
22 | An "Application" is any work that makes use of an interface provided
23 | by the Library, but which is not otherwise based on the Library.
24 | Defining a subclass of a class defined by the Library is deemed a mode
25 | of using an interface provided by the Library.
26 |
27 | A "Combined Work" is a work produced by combining or linking an
28 | Application with the Library. The particular version of the Library
29 | with which the Combined Work was made is also called the "Linked
30 | Version".
31 |
32 | The "Minimal Corresponding Source" for a Combined Work means the
33 | Corresponding Source for the Combined Work, excluding any source code
34 | for portions of the Combined Work that, considered in isolation, are
35 | based on the Application, and not on the Linked Version.
36 |
37 | The "Corresponding Application Code" for a Combined Work means the
38 | object code and/or source code for the Application, including any data
39 | and utility programs needed for reproducing the Combined Work from the
40 | Application, but excluding the System Libraries of the Combined Work.
41 |
42 | 1. Exception to Section 3 of the GNU GPL.
43 |
44 | You may convey a covered work under sections 3 and 4 of this License
45 | without being bound by section 3 of the GNU GPL.
46 |
47 | 2. Conveying Modified Versions.
48 |
49 | If you modify a copy of the Library, and, in your modifications, a
50 | facility refers to a function or data to be supplied by an Application
51 | that uses the facility (other than as an argument passed when the
52 | facility is invoked), then you may convey a copy of the modified
53 | version:
54 |
55 | a) under this License, provided that you make a good faith effort to
56 | ensure that, in the event an Application does not supply the
57 | function or data, the facility still operates, and performs
58 | whatever part of its purpose remains meaningful, or
59 |
60 | b) under the GNU GPL, with none of the additional permissions of
61 | this License applicable to that copy.
62 |
63 | 3. Object Code Incorporating Material from Library Header Files.
64 |
65 | The object code form of an Application may incorporate material from
66 | a header file that is part of the Library. You may convey such object
67 | code under terms of your choice, provided that, if the incorporated
68 | material is not limited to numerical parameters, data structure
69 | layouts and accessors, or small macros, inline functions and templates
70 | (ten or fewer lines in length), you do both of the following:
71 |
72 | a) Give prominent notice with each copy of the object code that the
73 | Library is used in it and that the Library and its use are
74 | covered by this License.
75 |
76 | b) Accompany the object code with a copy of the GNU GPL and this license
77 | document.
78 |
79 | 4. Combined Works.
80 |
81 | You may convey a Combined Work under terms of your choice that,
82 | taken together, effectively do not restrict modification of the
83 | portions of the Library contained in the Combined Work and reverse
84 | engineering for debugging such modifications, if you also do each of
85 | the following:
86 |
87 | a) Give prominent notice with each copy of the Combined Work that
88 | the Library is used in it and that the Library and its use are
89 | covered by this License.
90 |
91 | b) Accompany the Combined Work with a copy of the GNU GPL and this license
92 | document.
93 |
94 | c) For a Combined Work that displays copyright notices during
95 | execution, include the copyright notice for the Library among
96 | these notices, as well as a reference directing the user to the
97 | copies of the GNU GPL and this license document.
98 |
99 | d) Do one of the following:
100 |
101 | 0) Convey the Minimal Corresponding Source under the terms of this
102 | License, and the Corresponding Application Code in a form
103 | suitable for, and under terms that permit, the user to
104 | recombine or relink the Application with a modified version of
105 | the Linked Version to produce a modified Combined Work, in the
106 | manner specified by section 6 of the GNU GPL for conveying
107 | Corresponding Source.
108 |
109 | 1) Use a suitable shared library mechanism for linking with the
110 | Library. A suitable mechanism is one that (a) uses at run time
111 | a copy of the Library already present on the user's computer
112 | system, and (b) will operate properly with a modified version
113 | of the Library that is interface-compatible with the Linked
114 | Version.
115 |
116 | e) Provide Installation Information, but only if you would otherwise
117 | be required to provide such information under section 6 of the
118 | GNU GPL, and only to the extent that such information is
119 | necessary to install and execute a modified version of the
120 | Combined Work produced by recombining or relinking the
121 | Application with a modified version of the Linked Version. (If
122 | you use option 4d0, the Installation Information must accompany
123 | the Minimal Corresponding Source and Corresponding Application
124 | Code. If you use option 4d1, you must provide the Installation
125 | Information in the manner specified by section 6 of the GNU GPL
126 | for conveying Corresponding Source.)
127 |
128 | 5. Combined Libraries.
129 |
130 | You may place library facilities that are a work based on the
131 | Library side by side in a single library together with other library
132 | facilities that are not Applications and are not covered by this
133 | License, and convey such a combined library under terms of your
134 | choice, if you do both of the following:
135 |
136 | a) Accompany the combined library with a copy of the same work based
137 | on the Library, uncombined with any other library facilities,
138 | conveyed under the terms of this License.
139 |
140 | b) Give prominent notice with the combined library that part of it
141 | is a work based on the Library, and explaining where to find the
142 | accompanying uncombined form of the same work.
143 |
144 | 6. Revised Versions of the GNU Lesser General Public License.
145 |
146 | The Free Software Foundation may publish revised and/or new versions
147 | of the GNU Lesser General Public License from time to time. Such new
148 | versions will be similar in spirit to the present version, but may
149 | differ in detail to address new problems or concerns.
150 |
151 | Each version is given a distinguishing version number. If the
152 | Library as you received it specifies that a certain numbered version
153 | of the GNU Lesser General Public License "or any later version"
154 | applies to it, you have the option of following the terms and
155 | conditions either of that published version or of any later version
156 | published by the Free Software Foundation. If the Library as you
157 | received it does not specify a version number of the GNU Lesser
158 | General Public License, you may choose any version of the GNU Lesser
159 | General Public License ever published by the Free Software Foundation.
160 |
161 | If the Library as you received it specifies that a proxy can decide
162 | whether future versions of the GNU Lesser General Public License shall
163 | apply, that proxy's public statement of acceptance of any version is
164 | permanent authorization for you to choose that version for the
165 | Library.
166 |
--------------------------------------------------------------------------------
/src/erlsom_simple_form.erl:
--------------------------------------------------------------------------------
1 | %%% Copyright (C) 2006 - 2008 Willem de Jong
2 | %%%
3 | %%% This file is part of Erlsom.
4 | %%%
5 | %%% Erlsom is free software: you can redistribute it and/or modify
6 | %%% it under the terms of the GNU Lesser General Public License as
7 | %%% published by the Free Software Foundation, either version 3 of
8 | %%% the License, or (at your option) any later version.
9 | %%%
10 | %%% Erlsom is distributed in the hope that it will be useful,
11 | %%% but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | %%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | %%% GNU Lesser General Public License for more details.
14 | %%%
15 | %%% You should have received a copy of the GNU Lesser General Public
16 | %%% License along with Erlsom. If not, see
17 | %%% .
18 | %%%
19 | %%% Author contact: w.a.de.jong@gmail.com
20 |
21 | %%% translate XML to the 'simple form' as used by XMERL.
22 | -module(erlsom_simple_form).
23 |
24 | %% user interface
25 | -export([scan/2]).
26 | -export([callback/2]).
27 | -export([new_state/1]).
28 |
29 | -include("erlsom_sax.hrl").
30 | -include("exception.hrl").
31 |
32 | -record(sState, {stack, nameFun, options}).
33 |
34 | scan(Xml, Options) ->
35 | case lists:keysearch('nameFun', 1, Options) of
36 | {value, {_, Fun}} ->
37 | Options2 = lists:keydelete('nameFun', 1, Options);
38 | _ ->
39 | Fun = fun nameFun/3,
40 | Options2 = Options
41 | end,
42 | erlsom:parse_sax(Xml,
43 | #sState{stack = [], nameFun = Fun},
44 | fun callback/2, Options2).
45 |
46 |
47 | new_state(Namefun) ->
48 | #sState{stack = [], nameFun = Namefun, options = []}.
49 |
50 | %%
51 | %% It is also possible to call erlsom_simple_form:callback from within
52 | %% another callback function (to parse a part of an xml document).
53 | callback(Event, State) ->
54 |
55 | %% debugState(State),
56 | %% debugEvent(Event),
57 | try
58 | case Event of
59 | startDocument ->
60 | case State of
61 | #sState{} ->
62 | State;
63 | %% could be more options in the future, but for now there
64 | %% is just 1
65 | [{name_function, NameFun}] ->
66 | new_state(NameFun);
67 | _ ->
68 | new_state(fun nameFun/3)
69 | end;
70 | {startElement, _Uri, _LocalName, _Prefix, _Attributes} ->
71 | %% debug(Event),
72 | startElement(Event, State);
73 | {endElement, _Uri, _LocalName, _Prefix} ->
74 | endElement(Event, State);
75 | {characters, _Characters} ->
76 | characters(Event, State);
77 | {ignorableWhitespace, _Characters} -> State;
78 | {processingInstruction, _Target, _Data} -> State;
79 | {startPrefixMapping, _Prefix, _URI} ->
80 | State;
81 | {endPrefixMapping, _Prefix} ->
82 | State;
83 | endDocument ->
84 | case State of
85 | {result, Document} ->
86 | Document;
87 | _Else ->
88 | %% debug(State),
89 | throw({error, "unexpected end"})
90 | end;
91 | {error, Message} ->
92 | throw(Message);
93 | {'EXIT', Message} ->
94 | exit(Message)
95 | end
96 | catch
97 | ?EXCEPTION(error, Reason, Stacktrace) -> throwError(error, {Reason, ?GET_STACK(Stacktrace)}, Event, State);
98 | Class:Exception -> throwError(Class, Exception, Event, State)
99 | end.
100 |
101 | %% Stack contains the tree that is growing as the elements come in.
102 | %% [{root, [attributes], [element1, element2]},
103 | %% {element3, [attributes], [element3.1, element3.2]},
104 | %% {element3.3, [attributes], [element3.3.1]}] (but in reverse order...)
105 |
106 | %% When a startElement event comes in, add a new element to the stack:
107 | %% [{root, [attributes], [element1, element2]},
108 | %% {element3, [attributes], [element3.1, element3.2]},
109 | %% {element3.3, [attributes], [element3.3.1]},
110 | %% {element3.3.2, [attributes], []}]
111 |
112 | %% When a textElement event comes in, insert it into the top element:
113 | %% [{root, [attributes], [element1, element2]},
114 | %% {element3, [attributes], [element3.1, element3.2]},
115 | %% {element3.3, [attributes], [element3.3.1, element3.3.2]}]
116 | %% {element3.3, [attributes], [element3.3.1]},
117 | %% {element3.3.2, [attributes], [{#text, "the text"}]}]
118 |
119 | %% When an endElement comes in, insert the top element of the stack in the
120 | %% layer below it (its parent):
121 | %% [{root, [attributes], [element1, element2]},
122 | %% {element3, [attributes], [element3.1, element3.2]},
123 | %% {element3.3, [attributes], [element3.3.1, element3.3.2]}]
124 |
125 | startElement({startElement, Uri, LocalName, Prefix, Attributes},
126 | State = #sState{stack = Stack, nameFun = NameFun}) ->
127 | Name = NameFun(LocalName, Uri, Prefix),
128 | State#sState{stack = [{Name, processAttributes(Attributes, State), []} | Stack]}.
129 |
130 | endElement({endElement, _Uri, _LocalName, _Prefix},
131 | #sState{stack = [{Name, Attributes, Elements}]}) ->
132 | Document = {Name, Attributes, lists:reverse(Elements)},
133 | %% {result, Document} is a special value that signals to the calling function that
134 | %% the parsing is done. This can be useful when parsing a part of a larger
135 | %% document.
136 | {result, Document};
137 |
138 | endElement({endElement, _Uri, _LocalName, _Prefix},
139 | State) ->
140 | #sState{stack = [{Name, Attributes, Elements} | [{ParentName, ParentAttributes, ParentElements} | Tail]]} = State,
141 | State#sState{stack = [{ParentName,
142 | ParentAttributes,
143 | [{Name, Attributes, lists:reverse(Elements)} | ParentElements]} | Tail]}.
144 |
145 | characters({characters, Characters},
146 | State = #sState{stack = [{Name,
147 | Attributes,
148 | [FirstBit | OtherElements]
149 | } | Tail]})
150 | when is_list(FirstBit) ->
151 | State#sState{stack = [{Name, Attributes, [FirstBit ++ Characters | OtherElements]} | Tail]};
152 | characters({characters, Characters},
153 | State = #sState{stack = [{Name,
154 | Attributes,
155 | [FirstBit | OtherElements]
156 | } | Tail]})
157 | when is_binary(FirstBit) ->
158 | State#sState{stack = [{Name, Attributes, [<> | OtherElements]} | Tail]};
159 | characters({characters, Characters},
160 | State = #sState{stack = [{Name, Attributes, Elements} | Tail]}) ->
161 | State#sState{stack = [{Name, Attributes, [Characters | Elements]} | Tail]}.
162 |
163 | processAttributes(Attributes, State) ->
164 | processAttributes(Attributes, State, []).
165 | processAttributes([], _State, Acc) ->
166 | lists:reverse(Acc);
167 | processAttributes([#attribute{localName=LocalName, uri=Uri, prefix = Prefix, value=Value} | Tail],
168 | State = #sState{nameFun = NameFun},
169 | Acc) ->
170 | processAttributes(Tail, State, [{NameFun(LocalName, Uri, Prefix), Value} | Acc]).
171 |
172 | nameFun(Name, [], _Prefix) ->
173 | Name;
174 | nameFun(Name, Namespace, _Prefix) ->
175 | "{" ++ Namespace ++ "}" ++ Name.
176 |
177 |
178 | throwError(Class, Exception, Event,
179 | #sState{stack = Stack}) ->
180 | %% "Error while parsing type "
181 | %% Take the ElementRecord at current state, and print the first element
182 | Message = [{exception, Exception},
183 | %% for each of the elements in ResultSoFar,
184 | %% take the 'elementRecord' element and print the first element (the type).
185 | {stack, printStackTrace(Stack)},
186 | %% "Received: "
187 | {received, Event}],
188 | case Class of
189 | 'error' -> exit({error, Message});
190 | 'throw' -> throw({error, Message});
191 | 'exit' -> exit({error, Message})
192 | end;
193 |
194 | throwError(Class, Exception, _Event,
195 | _Something) ->
196 | case Class of
197 | 'error' -> exit({error, Exception});
198 | 'throw' -> throw({error, Exception});
199 | 'exit' -> exit({error, Exception})
200 | end.
201 |
202 | printStackTrace(Stack) ->
203 | printStackTrace(Stack, []).
204 | printStackTrace([], Acc) ->
205 | Acc;
206 | printStackTrace([{Name, _, _} | Tail], Acc) ->
207 | printStackTrace(Tail, [{element, Name} | Acc]).
208 |
209 |
--------------------------------------------------------------------------------
/src/erlsom_sax.erl:
--------------------------------------------------------------------------------
1 | %%% Copyright (C) 2006 - 2008 Willem de Jong
2 | %%%
3 | %%% This file is part of Erlsom.
4 | %%%
5 | %%% Erlsom is free software: you can redistribute it and/or modify
6 | %%% it under the terms of the GNU Lesser General Public License as
7 | %%% published by the Free Software Foundation, either version 3 of
8 | %%% the License, or (at your option) any later version.
9 | %%%
10 | %%% Erlsom is distributed in the hope that it will be useful,
11 | %%% but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | %%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | %%% GNU Lesser General Public License for more details.
14 | %%%
15 | %%% You should have received a copy of the GNU Lesser General Public
16 | %%% License along with Erlsom. If not, see
17 | %%% .
18 | %%%
19 | %%% Author contact: w.a.de.jong@gmail.com
20 |
21 | %%% ====================================================================
22 | %%% An XML parser, using the SAX model.
23 | %%% ====================================================================
24 |
25 | -module(erlsom_sax).
26 |
27 | -include("erlsom_sax.hrl").
28 |
29 | -type attribute() :: #attribute{}.
30 | -export_type([attribute/0]).
31 |
32 | -export([parseDocument/3]).
33 | -export([parseDocument/4]).
34 |
35 | %%%%%%%%%%%%%%%%%%%%%%%%%
36 | %%
37 | %% Interface
38 | %%
39 | %% parseDocument(Xml, State, EventFun)
40 | %% parseDocument(Xml, State, EventFun, Options)
41 | %%
42 | %% Xml = A list of integers that correspond with the characters in an XML
43 | %% document. Can be either 1 byte characters according to ISO ...,
44 | %% or integers that correspond to Unicode code points (see...).
45 | %%
46 | %% State - a term() that is passed to the EventFun.
47 | %%
48 | %% Eventfun - a fun() that is called by the parser whenever it has parsed
49 | %% a bit of the Xml input. The function is called by the parser according
50 | %% to the Sax specification (see [SAX]).
51 | %%
52 | %% EventFun should accept the following arguments:
53 | %% - Event, a tuple that describes the event, see below.
54 | %% - State - a term()
55 | %%
56 | %% EventFun should return State, a term() that wil be passed back to the next
57 | %% invocation of EventFun.
58 | %%
59 | %% Options - a list of options. Currently the only option is
60 | %% {continuation_function, CFunction}, where CFuntion is a fun() that
61 | %% returns the next block of data.
62 | %% CFunction should be a function that takes 2 arguments: Tail and State.
63 | %% - Tail is the (short) list of characters that could not yet be parsed
64 | %% because it might be a special token or not. Since this still has to
65 | %% be parsed, it should be put in front of the next block of data.
66 | %% - State is information that is passed by the parser to the callback
67 | %% functions transparently. This can be used to keep track of the
68 | %% location in the file etc.
69 | %% CFunction returns {NewData, NewState}, where NewData is a list of
70 | %% characters/unicode code points, and NewState the new value for the State.
71 | %%
72 | %% Returns: State
73 | %% (i.e.: the result of the last invocation of the callback function)
74 | %%
75 | %% parseDocumentBinary(Xml, State, EventFun, Encoding)
76 | %% parseDocument(Xml, State, EventFun, Encoding, Options)
77 | %%
78 | %% Just like parseDocument, but working on a binary in stead of a list.
79 | %% Encoding = the encoding of the binary (atom()). Supported values:
80 | %% - 'utf-8'
81 | %% - 'latin-1'
82 | %%%%%%%%%%%%%%%%%%%%%%%%%
83 |
84 | %%%%%%%%%%%%%%%%%%%%%%%%%
85 | %%
86 | %% Events sent out by the SAX parser.
87 | %%
88 | %% Based on org.xml.sax ContentHandler interface [SAX].
89 | %%
90 | %% startDocument
91 | %%
92 | %% endDocument
93 | %% Will NOT be sent out in case of an error
94 | %%
95 | %% {startPrefixMapping, Prefix, URI}
96 | %% Begin the scope of a prefix - URI namespace mapping
97 | %% Will be sent immediately before the corresponding startElement event.
98 | %%
99 | %% {endPrefixMapping, Prefix}
100 | %% End the scope of a prefix - URI namespace mapping
101 | %% Will be sent immediately before the corresponding endElement event.
102 | %%
103 | %% {startElement, Uri, LocalName, Prefix, [Attributes]}
104 | %% Receive notification of the beginning of an element.
105 | %% There will be a corresponding endElement (even when the element is
106 | %% empty).
107 | %% All three name components will be provided.
108 | %%
109 | %% [Attributes] is a list of attribute records, see sax.hrl.
110 | %% Namespace attributes (xmlns:*) will not be reported.
111 | %% There will be NO attribute values for defaulted attributes!
112 | %%
113 | %% Providing 'Prefix'in stead of 'Qualified name' is probably not quite
114 | %% in line with the SAX spec, but it appears to be more convenient.
115 | %%
116 | %% {endElement, Uri, LocalName, Prefix}
117 | %% Receive notification of the end of an element.
118 | %%
119 | %% {characters, Characters}
120 | %% Receive notification of character data.
121 | %% All character data will be in one chunk, except if there is a
122 | %% CDATA section included inside a character section. In that case
123 | %% there will be separate events for the characters before the CDATA, the
124 | %% CDATA section and the characters following it (if any, of course).
125 | %%
126 | %% {ignorableWhitespace, Characters}
127 | %% If a character data section (as it would be reported by the 'characters'
128 | %% event, see above) consists ONLY of whitespace, it will be
129 | %% reported as ignorableWhitespace.
130 | %%
131 | %% {processingInstruction, Target, Data}
132 | %%
133 | %% {error, Description}
134 | %% {internalError, Description}
135 | %%
136 | %%%%%%%%%%%%%%%%%%%%%%%%
137 |
138 | parseDocument(Xml, UserState, Callback) ->
139 | parseDocument(Xml, UserState, Callback, []).
140 |
141 | parseDocument(Xml, UserState, Callback, Options) ->
142 | S = (getOptions(Options))#erlsom_sax_state{callback = Callback,
143 | user_state = UserState},
144 | parseDocument(Xml, S).
145 |
146 | parseDocument(Xml, S) when is_list(Xml) ->
147 | erlsom_sax_list:parse(Xml, S);
148 |
149 | parseDocument(Xml, S) when is_binary(Xml) ->
150 | case S#erlsom_sax_state.encoding of
151 | undefined ->
152 | {Encoding, Xml2, CState2} =
153 | erlsom_lib:detectEncoding(Xml, S#erlsom_sax_state.continuation_fun,
154 | S#erlsom_sax_state.continuation_state),
155 | parseDocumentBinary(Encoding, Xml2,
156 | S#erlsom_sax_state{continuation_state = CState2});
157 | Encoding ->
158 | parseDocumentBinary(Encoding, Xml, S)
159 | end.
160 |
161 | parseDocumentBinary(Encoding, Xml, State) ->
162 | case Encoding of
163 | 'utf8' ->
164 | erlsom_sax_utf8:parse(Xml, State);
165 | 'utf16be' ->
166 | erlsom_sax_utf16be:parse(Xml, State);
167 | 'utf16le' ->
168 | erlsom_sax_utf16le:parse(Xml, State);
169 | 'latin-1' ->
170 | erlsom_sax_latin1:parse(Xml, State);
171 | 'iso_8859_1' ->
172 | erlsom_sax_latin1:parse(Xml, State);
173 | 'iso_8859_15' ->
174 | erlsom_sax_latin9:parse(Xml, State);
175 | 'list' ->
176 | erlsom_sax_list:parse(Xml, State);
177 | _ ->
178 | throw({error, "Encoding not supported: " ++ atom_to_list(Encoding)})
179 | end.
180 |
181 | getOptions(Options) ->
182 | getOptions(Options, #erlsom_sax_state{}).
183 |
184 | getOptions([], S) ->
185 | case S#erlsom_sax_state.continuation_fun of
186 | undefined ->
187 | S#erlsom_sax_state{continuation_fun = fun(T, St) -> {T, St} end};
188 | _ ->
189 | S
190 | end;
191 | getOptions([expand_entities | T], S) ->
192 | getOptions(T, S#erlsom_sax_state{expand_entities = true});
193 | getOptions([{expand_entities, V} | T], S) when is_boolean(V) ->
194 | getOptions(T, S#erlsom_sax_state{expand_entities = V});
195 | getOptions([{output_encoding, V} | T], S) ->
196 | getOptions(T, S#erlsom_sax_state{output = V});
197 | getOptions([{continuation_function, Cf, Cs} | T], S) when is_function(Cf) ->
198 | getOptions(T, S#erlsom_sax_state{continuation_fun = Cf,
199 | continuation_state = Cs});
200 | getOptions([{encoding, V} | T], S) ->
201 | getOptions(T, S#erlsom_sax_state{encoding = list_to_atom(V)});
202 | getOptions([{max_entity_depth, V} | T], S) when is_integer(V); V == infinity ->
203 | getOptions(T, S#erlsom_sax_state{max_entity_depth = V});
204 | getOptions([{max_entity_size, V} | T], S) when is_integer(V); V == infinity ->
205 | getOptions(T, S#erlsom_sax_state{max_entity_size = V});
206 | getOptions([{max_nr_of_entities, V} | T], S)
207 | when is_integer(V); V == infinity ->
208 | getOptions(T, S#erlsom_sax_state{max_nr_of_entities = V});
209 | getOptions([{max_expanded_entity_size, V} | T], S)
210 | when is_integer(V); V == infinity ->
211 | getOptions(T, S#erlsom_sax_state{max_expanded_entity_size = V}).
212 |
--------------------------------------------------------------------------------
/src/erlsom_sax_lib.erl:
--------------------------------------------------------------------------------
1 | %%% Copyright (C) 2006 - 2011 Willem de Jong
2 | %%%
3 | %%% This file is part of Erlsom.
4 | %%%
5 | %%% Erlsom is free software: you can redistribute it and/or modify
6 | %%% it under the terms of the GNU Lesser General Public License as
7 | %%% published by the Free Software Foundation, either version 3 of
8 | %%% the License, or (at your option) any later version.
9 | %%%
10 | %%% Erlsom is distributed in the hope that it will be useful,
11 | %%% but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | %%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | %%% GNU Lesser General Public License for more details.
14 | %%%
15 | %%% You should have received a copy of the GNU Lesser General Public
16 | %%% License along with Erlsom. If not, see
17 | %%% .
18 | %%%
19 | %%% Author contact: w.a.de.jong@gmail.com
20 |
21 | %%% ====================================================================
22 | %%% A couple of functions used by erlsom_sax (for each encoding variant)
23 | %%% ====================================================================
24 |
25 | %%% Version: 20-01-2008
26 |
27 | -module(erlsom_sax_lib).
28 |
29 | -include("erlsom_sax.hrl").
30 | -export([test/0]).
31 | -export([findCycle/4]).
32 | -export([continueFun/3]).
33 | -export([continueFun/4]).
34 | -export([continueFun2/4]).
35 | -export([continueFun/5]).
36 | -export([continueFun/6]).
37 | -export([continueFun2/6]).
38 | -export([mapStartPrefixMappingCallback/3]).
39 | -export([mapEndPrefixMappingCallback/3]).
40 | -export([createStartTagEvent/3]).
41 |
42 | %% there are 4 variants of this function, with different numbers of arguments
43 | %% The names of the first arguments aren't really meaningful, they can
44 | %% be anything - they are only there to be passed to 'ParseFun'.
45 | continueFun(V1, V2, V3, T, State, ParseFun) ->
46 | {Tail, ContinuationState2} =
47 | (State#erlsom_sax_state.continuation_fun)(T, State#erlsom_sax_state.continuation_state),
48 | case Tail of
49 | T -> throw({error, "Malformed: Unexpected end of data"});
50 | _ ->
51 | ParseFun(V1, V2, V3, Tail,
52 | State#erlsom_sax_state{continuation_state = ContinuationState2})
53 | end.
54 |
55 | continueFun2(T, V1, V2, V3, State, ParseFun) ->
56 | {Tail, ContinuationState2} =
57 | (State#erlsom_sax_state.continuation_fun)(T, State#erlsom_sax_state.continuation_state),
58 | case Tail of
59 | T -> throw({error, "Malformed: Unexpected end of data"});
60 | _ ->
61 | ParseFun(Tail, V1, V2, V3,
62 | State#erlsom_sax_state{continuation_state = ContinuationState2})
63 | end.
64 |
65 | continueFun(Prefix, Head, T, State, ParseFun) ->
66 | {Tail, ContinuationState2} =
67 | (State#erlsom_sax_state.continuation_fun)(T, State#erlsom_sax_state.continuation_state),
68 | case Tail of
69 | T -> throw({error, "Malformed: Unexpected end of data"});
70 | _ ->
71 | ParseFun(Prefix, Head, Tail,
72 | State#erlsom_sax_state{continuation_state = ContinuationState2})
73 | end.
74 |
75 | continueFun(Head, T, State, ParseFun) ->
76 | {Tail, ContinuationState2} =
77 | (State#erlsom_sax_state.continuation_fun)(T, State#erlsom_sax_state.continuation_state),
78 | case Tail of
79 | T -> throw({error, "Malformed: Unexpected end of data"});
80 | _ ->
81 | ParseFun(Head, Tail,
82 | State#erlsom_sax_state{continuation_state = ContinuationState2})
83 | end.
84 |
85 | continueFun2(T, Head, State, ParseFun) ->
86 | {Tail, ContinuationState2} =
87 | (State#erlsom_sax_state.continuation_fun)(T, State#erlsom_sax_state.continuation_state),
88 | case Tail of
89 | T -> throw({error, "Malformed: Unexpected end of data"});
90 | _ ->
91 | ParseFun(Tail, Head,
92 | State#erlsom_sax_state{continuation_state = ContinuationState2})
93 | end.
94 |
95 | continueFun(T, State, ParseFun) ->
96 | {Tail, ContinuationState2} =
97 | (State#erlsom_sax_state.continuation_fun)(T, State#erlsom_sax_state.continuation_state),
98 | case Tail of
99 | T -> throw({error, "Malformed: Unexpected end of data"});
100 | _ ->
101 | ParseFun(Tail,
102 | State#erlsom_sax_state{continuation_state = ContinuationState2})
103 | end.
104 |
105 |
106 | %% function to call the Callback function for all elements in a list of 'new namespaces'.
107 | %% returns State
108 | mapStartPrefixMappingCallback([{Prefix, Uri} | Tail], State, Callback) ->
109 | mapStartPrefixMappingCallback(Tail, Callback({startPrefixMapping, Prefix, Uri}, State), Callback);
110 | mapStartPrefixMappingCallback([], State, _Callback) ->
111 | State.
112 |
113 | %% function to call the Callback function for all elements in a list of 'new namespaces'.
114 | %% returns State
115 | mapEndPrefixMappingCallback([{Prefix, _Uri} | Tail], State, Callback) ->
116 | mapEndPrefixMappingCallback(Tail, Callback({endPrefixMapping, Prefix}, State), Callback);
117 | mapEndPrefixMappingCallback([], State, _Callback) ->
118 | State.
119 |
120 |
121 | %% StartTag = {Prefix, LocalName, QualifiedName}
122 | %% Attributes = list of Attribute
123 | %% Attribute = {{Prefix, LocalName} Value}
124 | %%
125 | %% returns: {Name, Attributes2, NewNamespaces}
126 | %% Name = {URI, LocalName, QualifiedName}
127 | %% Attributes2 = list of Attribute2
128 | %% Attribute2 = #attribute
129 | %% NewNamespaces = list of {Prefix, URI} (prefix can be []).
130 | %%
131 | %% Namespaces are in such an order that namespace of the 'closest ancestors'
132 | %% are in front. That way the right element will be found, even if a prefix is
133 | %% used more than once in the document.
134 | %%
135 | createStartTagEvent(StartTag, Namespaces, Attributes) ->
136 |
137 | %% find the namespace definitions in the attributes
138 | {NewNamespaces, OtherAttributes} = lookForNamespaces([], [], Attributes),
139 | AllNamespaces = NewNamespaces ++ Namespaces,
140 |
141 | %% add the Uri to the tag name (if applicable)
142 | Name = tagNameTuple(StartTag, AllNamespaces),
143 |
144 | %% add the URIs to the attribute names (if applicable)
145 | Attributes2 = attributeNameTuples([], OtherAttributes, AllNamespaces),
146 |
147 | {Name, Attributes2, NewNamespaces}.
148 |
149 | %% returns {Namespaces, OtherAttributes}, where
150 | %% Namespaces = a list of tuples {Prefix, URI}
151 | %% OtherAttributes = a list of tuples {Name, Value}
152 | %%
153 | lookForNamespaces(Namespaces, OtherAttributes, [Head | Tail]) ->
154 | {{Prefix, LocalName, _QName}, Value} = Head,
155 | if
156 | Prefix == "xmlns" ->
157 | lookForNamespaces([{LocalName, Value} | Namespaces],
158 | OtherAttributes, Tail);
159 | Prefix == [], LocalName == "xmlns" ->
160 | lookForNamespaces([{[], Value} | Namespaces],
161 | OtherAttributes, Tail);
162 | true ->
163 | lookForNamespaces(Namespaces, [Head | OtherAttributes], Tail)
164 | end;
165 |
166 | lookForNamespaces(Namespaces, OtherAttributes, []) ->
167 | {Namespaces, OtherAttributes}.
168 |
169 | %% StartTag = {Prefix, LocalName, QualifiedName}
170 | %% Namespaces = list of {Prefix, URI} (prefix can be []).
171 | %%
172 | %% Returns {Uri, LocalName, Prefix}
173 | %%
174 | %% TODO: error if not found? special treatment of 'xml:lang'?
175 | tagNameTuple(StartTag, Namespaces) ->
176 | {Prefix, LocalName, _QName} = StartTag,
177 | case lists:keysearch(Prefix, 1, Namespaces) of
178 | {value, {Prefix, Uri}} -> {Uri, LocalName, Prefix};
179 | false -> {[], LocalName, Prefix}
180 | end.
181 |
182 |
183 | %% Attributes = list of Attribute
184 | %% Attribute = {{Prefix, LocalName} Value}
185 | %% Namespaces = list of {Prefix, URI} (prefix can be []).
186 | %%
187 | %% Returns a list of #attribute records
188 | attributeNameTuples(ProcessedAttributes,
189 | [{AttributeName, Value} | Attributes], Namespaces) ->
190 | {Uri, LocalName, Prefix} = attributeNameTuple(AttributeName, Namespaces),
191 | attributeNameTuples([#attribute{localName= LocalName,
192 | prefix = Prefix,
193 | uri = Uri,
194 | value = Value} | ProcessedAttributes],
195 | Attributes, Namespaces);
196 |
197 | attributeNameTuples(ProcessedAttributes, [], _) ->
198 | ProcessedAttributes.
199 |
200 | %% AttributeName = {Prefix, LocalName, QualifiedName}
201 | %% Namespaces = list of {Prefix, URI} (prefix can be []).
202 | %%
203 | %% Returns {Uri, LocalName, Prefix}.
204 | %% Difference with TagNameTuple: attributes without prefix do NOT belong
205 | %% to the default namespace.
206 | attributeNameTuple(AttributeName, Namespaces) ->
207 | {Prefix, LocalName, _} = AttributeName,
208 | if
209 | Prefix == [] -> {[], LocalName, LocalName};
210 | true ->
211 | case lists:keysearch(Prefix, 1, Namespaces) of
212 | {value, {Prefix, Uri}} ->
213 | {Uri, LocalName, Prefix};
214 | false ->
215 | case Prefix of
216 | "xml" -> {"http://www.w3.org/XML/1998/namespace", LocalName, Prefix};
217 | _ -> {[], LocalName, Prefix}
218 | end
219 | end
220 | end.
221 |
222 | %% simplistic function to find a cycle in a list [{a, b}, {b, c}, ...]
223 | %% or if there is a path longer than MaxDepth.
224 | %% The edge A, B is added; the rest of the graph is known
225 | %% to be acyclical. So we start from B (To) and look for a path
226 | %% to A (Current).
227 | findCycle(To, Current, Edges, MaxDepth) ->
228 | findCycle(To, Current, Edges, MaxDepth, 1).
229 |
230 | findCycle(_To, _Current, [], _MaxD, _CurrentD) ->
231 | false;
232 | findCycle(To, Current, Edges, MaxD, CurrentD) ->
233 | %% take the next edge from edge from Current
234 | case lists:keyfind(To, 1, Edges) of
235 | _ when MaxD == CurrentD ->
236 | max_depth; %% reached Max Depth
237 | false ->
238 | false;
239 | {_, Current} ->
240 | cycle; %% found a cycle
241 | {_, B} ->
242 | RemainingEdges = lists:keydelete(To, 1, Edges),
243 | case findCycle(B, Current, RemainingEdges, MaxD, CurrentD + 1) of
244 | false ->
245 | findCycle(To, Current, RemainingEdges, MaxD, CurrentD);
246 | Other ->
247 | Other
248 | end
249 | end.
250 |
251 | test() ->
252 | false = findCycle(b, a, [{a, b}], 2),
253 | max_depth = findCycle(b, a, [{a, b}, {b, c}], 2),
254 | false = findCycle(b, a, [{a, b}, {b, c}], 3),
255 | false = findCycle(b, a, [{a, b}, {b, c}, {c, d}, {c, e},
256 | {c, f}, {c, g}, {f, q}, {f, r}, {f, s},
257 | {g, z}], 12),
258 | cycle = findCycle(b, a, [{a, b}, {c, d}, {b, c}, {c, e},
259 | {c, f}, {f, q}, {f, r}, {f, s}, {q, s},
260 | {g, a}, {c, g}], 12),
261 | cycle = findCycle(b, a, [{a, b}, {b, c}, {c, d}, {c, e},
262 | {c, a}, {c, g}, {f, q}, {f, r}, {f, s},
263 | {g, a}], 12).
264 |
265 |
--------------------------------------------------------------------------------
/examples/complex_form/erlsom_complex_form.erl:
--------------------------------------------------------------------------------
1 | %%% translate XML to the output format used by XMERL.
2 | %%% The output is not complete: some fields in the XMERL output records
3 | %%% are not populated. But is it enough to use the XPATH functions (at
4 | %%% least for the examples that I tried).
5 | %%%
6 | %%% Note: this hasn't been tested properly. See it as an example of how
7 | %%% the sax parser can be used.
8 | %%%
9 | -module(erlsom_complex_form).
10 |
11 | %% user interface
12 | -export([scan/1]).
13 | -export([scan_file/1]).
14 | %% with options
15 | -export([scan/2]).
16 | -export([scan_file/2]).
17 |
18 | -include_lib("erlsom/src/erlsom_sax.hrl").
19 |
20 | %% The record definitions below are copied from xmerl hrl files!
21 | %% XML Element
22 | %% content = [#xmlElement()|#xmlText()|#xmlPI()|#xmlComment()|#xmlDecl()]
23 | -record(xmlElement,{
24 | name, % atom()
25 | expanded_name = [], % string() | {URI,Local} | {"xmlns",Local}
26 | nsinfo = [], % {Prefix, Local} | []
27 | namespace,
28 | parents = [], % [{atom(),integer()}]
29 | pos, % integer()
30 | attributes = [], % [#xmlAttribute()]
31 | content = [],
32 | language = "", % string()
33 | xmlbase="", % string() XML Base path, for relative URI:s
34 | elementdef=undeclared % atom(), one of [undeclared | prolog | external | element]
35 | }).
36 |
37 | %% plain text
38 | %% IOlist = [char() | binary () | IOlist]
39 | -record(xmlText,{
40 | parents = [], % [{atom(),integer()}]
41 | pos, % integer()
42 | language = [], % inherits the element's language
43 | value, % IOlist()
44 | type = text % atom() one of (text|cdata)
45 | }).
46 |
47 | %% Attribute
48 | -record(xmlAttribute,{
49 | name, % atom()
50 | expanded_name=[],% atom() | {string(),atom()}
51 | nsinfo = [], % {Prefix, Local} | []
52 | namespace = [], % inherits the element's namespace
53 | parents = [], % [{atom(),integer()}]
54 | pos, % integer()
55 | language = [], % inherits the element's language
56 | value, % IOlist() | atom() | integer()
57 | normalized % atom() one of (true | false)
58 | }).
59 |
60 | %% namespace record
61 | -record(xmlNamespace,{
62 | default = [],
63 | nodes = []
64 | }).
65 |
66 |
67 | -record(sState, {stack = [], posStack = [], options}).
68 |
69 | scan_file(File) ->
70 | scan_file(File, []).
71 |
72 | scan_file(File, Options) ->
73 | case file:read_file(File) of
74 | {ok, Bin} ->
75 | scan(Bin, Options);
76 | Error ->
77 | Error
78 | end.
79 |
80 | scan(Xml) ->
81 | scan(Xml, []).
82 |
83 | scan(Xml, Options) ->
84 | erlsom:parse_sax(Xml,
85 | #sState{stack = []},
86 | fun callback/2, Options).
87 |
88 |
89 | callback(Event, State) ->
90 |
91 | try
92 | case Event of
93 | startDocument ->
94 | case State of
95 | #sState{} ->
96 | State;
97 | _ ->
98 | #sState{stack = [], options = []}
99 | end;
100 | {startElement, _Uri, _LocalName, _Prefix, _Attributes} ->
101 | startElement(Event, State);
102 | {endElement, _Uri, _LocalName, _Prefix} ->
103 | endElement(Event, State);
104 | {characters, _Characters} ->
105 | characters(Event, State);
106 | {ignorableWhitespace, Characters} ->
107 | characters({characters, Characters}, State);
108 | {processingInstruction, _Target, _Data} -> State;
109 | {startPrefixMapping, _Prefix, _URI} ->
110 | State;
111 | {endPrefixMapping, _Prefix} ->
112 | State;
113 | endDocument ->
114 | case State of
115 | #sState{stack = [Root]} ->
116 | Root;
117 | _Else ->
118 | throw({error, "unexpected end"})
119 | end;
120 | {error, Message} ->
121 | throw(Message);
122 | {'EXIT', Message} ->
123 | exit(Message)
124 | end
125 | catch
126 | error:Reason -> throwError(error, {Reason,erlang:get_stacktrace()}, Event, State);
127 | Class:Exception -> throwError(Class, Exception, Event, State)
128 | end.
129 |
130 | %% Stack contains the tree that is growing as the elements come in.
131 | %% [{root, [attributes], [element1, element2]},
132 | %% {element3, [attributes], [element3.1, element3.2]},
133 | %% {element3.3, [attributes], [element3.3.1]}] (but in reverse order...)
134 |
135 | %% Now with some added info that we need for the complex form:
136 | %% Stack contains the tree that is growing as the elements come in.
137 | %% [{root, SeqNo, [attributes], [element1, element2]},
138 | %% {element3, SeqNo, [attributes], [element3.1, element3.2]},
139 | %% {element3.3, SeqNo, [attributes], [element3.3.1]}] (but in reverse order...)
140 |
141 |
142 | %% When a startElement event comes in, add a new element to the stack:
143 | %% [{root, [attributes], [element1, element2]},
144 | %% {element3, [attributes], [element3.1, element3.2]},
145 | %% {element3.3, [attributes], [element3.3.1]},
146 | %% {element3.3.2, [attributes], []}]
147 |
148 | %% When a textElement event comes in, insert it into the top element:
149 | %% [{root, [attributes], [element1, element2]},
150 | %% {element3, [attributes], [element3.1, element3.2]},
151 | %% {element3.3, [attributes], [element3.3.1, element3.3.2]}]
152 | %% {element3.3, [attributes], [element3.3.1]},
153 | %% {element3.3.2, [attributes], [{#text, "the text"}]}]
154 |
155 | %% When an endElement comes in, insert the top element of the stack in the
156 | %% layer below it (its parent):
157 | %% [{root, [attributes], [element1, element2]},
158 | %% {element3, [attributes], [element3.1, element3.2]},
159 | %% {element3.3, [attributes], [element3.3.1, element3.3.2]}]
160 |
161 | startElement({startElement, Uri, LocalName, Prefix, Attributes},
162 | State = #sState{stack = [], posStack = []}) ->
163 | Name = makeName(LocalName, Prefix),
164 | State#sState{stack = [#xmlElement{name = Name,
165 | expanded_name = makeExpandedName(Uri, LocalName),
166 | pos = 1,
167 | nsinfo = makeNsInfo(Prefix, LocalName),
168 | namespace = makeNs(Prefix, Uri, #xmlNamespace{}),
169 | parents = [],
170 | attributes = processAttributes(Attributes, State),
171 | content = []}],
172 | posStack = [0]};
173 |
174 | startElement({startElement, Uri, LocalName, Prefix, Attributes},
175 | State = #sState{stack = [Parent | _], posStack = [Pos | _]}) ->
176 | Name = makeName(LocalName, Prefix),
177 | State#sState{stack = [#xmlElement{name = Name,
178 | expanded_name = makeExpandedName(Uri, LocalName),
179 | pos = Pos + 1,
180 | nsinfo = makeNsInfo(Prefix, LocalName),
181 | namespace = makeNs(Prefix, Uri, Parent#xmlElement.namespace),
182 | parents = getParentsFromStack(State#sState.stack, []),
183 | attributes = processAttributes(Attributes, State),
184 | content = []} | State#sState.stack],
185 | posStack = [0 | State#sState.posStack]}.
186 |
187 | endElement({endElement, _Uri, _LocalName, _Prefix},
188 | State = #sState{stack = [#xmlElement{content = Content} = Top]}) ->
189 | State#sState{stack = [Top#xmlElement{content = lists:reverse(Content)}]};
190 |
191 | endElement({endElement, _Uri, _LocalName, _Prefix},
192 | %%State) ->
193 | #sState{stack = [#xmlElement{content = ChildContent} = Child |
194 | [#xmlElement{content = ParentContent} = Parent | Tail]],
195 | posStack = [_NrOfChildEls | [NrOfElements | PosTail]]} = State) ->
196 | State#sState{stack = [Parent#xmlElement{content = [Child#xmlElement{content = lists:reverse(ChildContent)} |
197 | ParentContent]} |
198 | Tail],
199 | posStack = [NrOfElements + 1 | PosTail]}.
200 |
201 | characters({characters, Characters},
202 | State = #sState{stack = [#xmlElement{content = [#xmlText{value = Text} = FirstPart | Rest]} = Element | Tail]}) ->
203 | State#sState{stack = [Element#xmlElement{content = [FirstPart#xmlText{value = Text ++ Characters} | Rest]} | Tail]};
204 |
205 | characters({characters, Characters},
206 | State = #sState{stack = [#xmlElement{content = Content} = Element | Tail],
207 | posStack = [NrOfElements | PosTail]}) ->
208 | State#sState{stack = [Element#xmlElement{content = [#xmlText{value = Characters,
209 | parents = getParentsFromStack(State#sState.stack, []),
210 | pos = NrOfElements + 1} | Content]} | Tail],
211 | posStack = [NrOfElements + 1 | PosTail]}.
212 |
213 | getParentsFromStack([], Acc) ->
214 | Acc;
215 | getParentsFromStack([#xmlElement{name = Name, pos = Pos} | Tail], Acc) ->
216 | getParentsFromStack(Tail, [{Name, Pos} | Acc]).
217 |
218 | processAttributes(Attributes, State) ->
219 | processAttributes(Attributes, State, 1, []).
220 | processAttributes([], _State, _Count, Acc) ->
221 | lists:reverse(Acc);
222 | processAttributes([#attribute{localName=LocalName, uri = Uri, prefix = Prefix, value=Value} | Tail],
223 | State, Count, Acc) ->
224 | processAttributes(Tail, State, Count + 1, [
225 | #xmlAttribute{
226 | name = makeName(LocalName, Prefix),
227 | expanded_name = makeExpandedName(Uri, LocalName),
228 | nsinfo = makeNsInfo(Prefix, LocalName),
229 | pos = Count,
230 | value = Value
231 | } | Acc]).
232 |
233 | throwError(Class, Exception, Event,
234 | #sState{stack = Stack}) ->
235 | %% "Error while parsing type "
236 | %% Take the ElementRecord at current state, and print the first element
237 | Message = [{exception, Exception},
238 | %% for each of the elements in ResultSoFar,
239 | %% take the 'elementRecord' element and print the first element (the type).
240 | {stack, printStackTrace(Stack)},
241 | %% "Received: "
242 | {received, Event}],
243 | case Class of
244 | 'error' -> exit({error, Message});
245 | 'throw' -> throw({error, Message});
246 | 'exit' -> exit({error, Message})
247 | end;
248 |
249 | throwError(Class, Exception, _Event,
250 | _Something) ->
251 | case Class of
252 | 'error' -> exit({error, Exception});
253 | 'throw' -> throw({error, Exception});
254 | 'exit' -> exit({error, Exception})
255 | end.
256 |
257 | printStackTrace(Stack) ->
258 | printStackTrace(Stack, []).
259 | printStackTrace([], Acc) ->
260 | Acc;
261 | printStackTrace([#xmlElement{name = Name} | Tail], Acc) ->
262 | printStackTrace(Tail, [{element, Name} | Acc]).
263 |
264 | makeName(Local, []) ->
265 | list_to_atom_or_not(Local);
266 | makeName(Local, Prefix) ->
267 | list_to_atom_or_not(Prefix ++ ":" ++ Local).
268 |
269 | makeNsInfo([], _) -> [];
270 | makeNsInfo(Prefix, Local) -> {Prefix, Local}.
271 |
272 | makeNs(_Prefix, [], Ns) ->
273 | Ns;
274 | makeNs(Prefix, Uri, #xmlNamespace{nodes = Nodes} = ParentNs) ->
275 | ParentNs#xmlNamespace{nodes = Nodes ++ [{Prefix, list_to_atom_or_not(Uri)}]}.
276 |
277 | % string() | {URI,Local} | {"xmlns",Local}
278 | makeExpandedName([], Local) ->
279 | list_to_atom_or_not(Local);
280 | makeExpandedName(Uri, Local) ->
281 | {list_to_atom_or_not(Uri), list_to_atom_or_not(Local)}.
282 |
283 | list_to_atom_or_not(String) ->
284 | try list_to_atom(String)
285 | catch
286 | _:_ -> String
287 | end.
288 |
289 |
--------------------------------------------------------------------------------
/src/erlsom_ucs.erl:
--------------------------------------------------------------------------------
1 | %%% -*- Erlang -*-
2 | %%%-------------------------------------------------------------------
3 | %%% Author: Lon Willett
4 | %%%
5 | %%% Description: Some minimal support for encoding, decoding, and
6 | %%% manipulating strings of ISO-10646 characters (i.e. Unicode).
7 | %%%-------------------------------------------------------------------
8 |
9 |
10 | %% NOTICE: This is just an excerpt of the original ucs application
11 |
12 | %% This is a copy from xmerl_ucs, but it has
13 | %% been modified to handle the case that a block of data ends in the middle
14 | %% of a group of bytes that make up 1 character. In such a case the
15 | %% bytes that belong to the incomplete character are passed back, so that
16 | %% they can be put in front of the next block of data.
17 |
18 | %% the function 'to_utf8' is an exact copy.
19 |
20 | -module(erlsom_ucs).
21 | -author('Lon.Willett@sse.ie').
22 | -modified_by('johan.blom@mobilearts.se').
23 | -modified_by('w.a.de.jong@gmail.com').
24 | -compile([verbose,report_warnings,warn_unused_vars]).
25 |
26 |
27 | -export([to_utf8/1, from_utf8/1]).
28 | -export([decode_utf8/1]).
29 | -export([char_to_utf8/1]).
30 |
31 | -export([from_utf16be/1, from_utf16le/1]).
32 |
33 | %% TODO: relpace this by something a bit more efficient
34 | decode_utf8(Utf8) ->
35 | case from_utf8(Utf8) of
36 | {String, []} -> String;
37 | _ -> error
38 | end.
39 |
40 |
41 | %% from_utf8([Byte]) -> {[UnicodeChar], Tail}
42 | %% Decode UTF-8 encoded character-strings.
43 | %%
44 | %% Modification (WdJ): Added an output parameter (Tail):
45 | %% If the string ends in the middle of a character, the bytes
46 | %% of that incomplete character are returned (if not, the new
47 | %% parameter has value []).
48 | %% The goal is to allow parsing of data in arbitrary blocks.
49 |
50 | from_utf8(Bin) when is_binary(Bin) ->
51 | from_utf8(binary_to_list(Bin));
52 |
53 | from_utf8(List) ->
54 | case expand_utf8(List) of
55 | {Result, Rest, 0} ->
56 | %% case Rest of
57 | %% [] -> ok;
58 | %% _ -> io:format("Rest: ~p~n", [Rest])
59 | %% end,
60 | {Result, Rest};
61 | {_Res,_Rest, _NumBadChar} ->
62 | exit({ucs,{bad_utf8_character_code}})
63 | end.
64 |
65 | %% expand_utf8([Byte]) -> {[UnicodeChar], Tail, NumberOfBadBytes}
66 | %% Expand UTF8 byte sequences to ISO 10646/Unicode
67 | %% characters. Any illegal bytes are removed and the number of
68 | %% bad bytes are returned.
69 | %%
70 | %% Modification (WdJ): Added an output parameter (Tail):
71 | %% If the string ends in the middle of a character, the bytes
72 | %% of that incomplete character are returned (if not, the new
73 | %% parameter has value []).
74 | %% The goal is to allow parsing of data in arbitrary blocks.
75 | %%
76 | %% Reference:
77 | %% RFC 3629: "UTF-8, a transformation format of ISO 10646".
78 | expand_utf8(Str) ->
79 | expand_utf8_1(Str, [], 0).
80 |
81 | expand_utf8_1([C|Cs], Acc, Bad) when C < 16#80 ->
82 | %% Plain Ascii character.
83 | expand_utf8_1(Cs, [C|Acc], Bad);
84 | expand_utf8_1([C1,C2|Cs], Acc, Bad) when C1 band 16#E0 =:= 16#C0,
85 | C2 band 16#C0 =:= 16#80 ->
86 | case ((C1 band 16#1F) bsl 6) bor (C2 band 16#3F) of
87 | C when 16#80 =< C ->
88 | expand_utf8_1(Cs, [C|Acc], Bad);
89 | _ ->
90 | %% Bad range.
91 | expand_utf8_1(Cs, Acc, Bad+1)
92 | end;
93 | expand_utf8_1([C1], Acc, Bad) when C1 band 16#E0 =:= 16#C0 ->
94 | {lists:reverse(Acc), [C1], Bad};
95 | expand_utf8_1([C1,C2,C3|Cs], Acc, Bad) when C1 band 16#F0 =:= 16#E0,
96 | C2 band 16#C0 =:= 16#80,
97 | C3 band 16#C0 =:= 16#80 ->
98 | case ((((C1 band 16#0F) bsl 6) bor (C2 band 16#3F)) bsl 6) bor
99 | (C3 band 16#3F) of
100 | C when 16#800 =< C ->
101 | expand_utf8_1(Cs, [C|Acc], Bad);
102 | _ ->
103 | %% Bad range.
104 | expand_utf8_1(Cs, Acc, Bad+1)
105 | end;
106 | expand_utf8_1([C1], Acc, Bad) when C1 band 16#F0 =:= 16#E0 ->
107 | {lists:reverse(Acc), [C1], Bad};
108 | expand_utf8_1([C1,C2], Acc, Bad) when C1 band 16#F0 =:= 16#E0,
109 | C2 band 16#C0 =:= 16#80 ->
110 | {lists:reverse(Acc), [C1, C2], Bad};
111 | expand_utf8_1([C1,C2,C3,C4|Cs], Acc, Bad) when C1 band 16#F8 =:= 16#F0,
112 | C2 band 16#C0 =:= 16#80,
113 | C3 band 16#C0 =:= 16#80,
114 | C4 band 16#C0 =:= 16#80 ->
115 | case ((((((C1 band 16#0F) bsl 6) bor (C2 band 16#3F)) bsl 6) bor
116 | (C3 band 16#3F)) bsl 6) bor (C4 band 16#3F) of
117 | C when 16#10000 =< C ->
118 | expand_utf8_1(Cs, [C|Acc], Bad);
119 | _ ->
120 | %% Bad range.
121 | expand_utf8_1(Cs, Acc, Bad+1)
122 | end;
123 | expand_utf8_1([C1], Acc, Bad) when C1 band 16#F8 =:= 16#F0 ->
124 | {lists:reverse(Acc), [C1], Bad};
125 | expand_utf8_1([C1,C2], Acc, Bad) when C1 band 16#F8 =:= 16#F0,
126 | C2 band 16#C0 =:= 16#80 ->
127 | {lists:reverse(Acc), [C1, C2], Bad};
128 | expand_utf8_1([C1,C2,C3], Acc, Bad) when C1 band 16#F8 =:= 16#F0,
129 | C2 band 16#C0 =:= 16#80,
130 | C3 band 16#C0 =:= 16#80 ->
131 | {lists:reverse(Acc), [C1, C2, C3], Bad};
132 | expand_utf8_1([_Bad|Cs], Acc, Bad) ->
133 | %% Ignore bad character.
134 | expand_utf8_1(Cs, Acc, Bad+1);
135 | expand_utf8_1([], Acc, Bad) -> {lists:reverse(Acc), [], Bad}.
136 |
137 | %% from_utf16be(List) -> {[UnicodeChar], Tail, NumberOfBadBytes}
138 | %% Expand UTF16 byte sequences to ISO 10646/Unicode
139 | %% characters. Any illegal bytes are removed and the number of
140 | %% bad bytes are returned.
141 | %%
142 | %% Modification (WdJ): Added an output parameter (Tail):
143 | %% If the string ends in the middle of a character, the bytes
144 | %% of that incomplete character are returned (if not, the new
145 | %% parameter has value <<>>).
146 | %% The goal is to allow parsing of data in arbitrary blocks.
147 | %% Also: changed to work on lists in stead of binaries.
148 | from_utf16be(Bin) when is_binary(Bin) -> from_utf16be(binary_to_list(Bin),[]);
149 | from_utf16be(List) -> from_utf16be(List,[]).
150 |
151 | from_utf16be([_Byte] = Rest, Acc) ->
152 | {lists:reverse(Acc), Rest};
153 | %% from_utf16be(<>, Acc)
154 | %% when Ch < 16#D800; Ch > 16#DFFF ->
155 | %% if Ch < 16#FFFE -> from_utf16be(Rest,[Ch|Acc]) end;
156 | from_utf16be([Byte1, Byte2 | Rest], Acc)
157 | when Byte1 < 16#D8; Byte1 > 16#DF ->
158 | Ch = Byte1 * 256 + Byte2,
159 | if Ch < 16#FFFE -> from_utf16be(Rest,[Ch|Acc]) end;
160 | %% from_utf16be(<>, Acc)
162 | %% when Hi >= 16#D800, Hi < 16#DC00, Lo >= 16#DC00, Lo =< 16#DFFF ->
163 | from_utf16be([Hi1, Hi2, Lo1, Lo2 | Rest], Acc)
164 | when Hi1 >= 16#D8, Hi1 < 16#DC, Lo1 >= 16#DC, Lo1 < 16#E0 ->
165 | %% Surrogate pair
166 | Hi = Hi1 * 256 + Hi2,
167 | Lo = Lo1 * 256 + Lo2,
168 | Ch = ((Hi band 16#3FF) bsl 10) + (Lo band 16#3FF) + 16#10000,
169 | from_utf16be(Rest, [Ch|Acc]);
170 | from_utf16be([Hi1, _Hi2] = Rest, Acc)
171 | when Hi1 >= 16#D8, Hi1 < 16#DC ->
172 | %% Surrogate pair, incomplete
173 | {lists:reverse(Acc), Rest};
174 | from_utf16be([Hi1, _Hi2, _Byte] = Rest, Acc)
175 | when Hi1 >= 16#D8, Hi1 < 16#DC ->
176 | %% Surrogate pair, incomplete
177 | {lists:reverse(Acc), Rest};
178 | from_utf16be([],Acc) ->
179 | {lists:reverse(Acc), []};
180 | from_utf16be(_List,_Acc) ->
181 | {error,not_utf16be}.
182 |
183 | from_utf16le(Bin) when is_binary(Bin) -> from_utf16le(binary_to_list(Bin),[]);
184 | from_utf16le(List) -> from_utf16le(List,[]).
185 |
186 | from_utf16le([_Byte] = Rest, Acc) ->
187 | {lists:reverse(Acc), Rest};
188 | %% from_utf16le(<>, Acc)
189 | %% when Ch < 16#D800; Ch > 16#DFFF ->
190 | %% if Ch < 16#FFFE -> from_utf16le(Rest, [Ch|Acc]) end;
191 | from_utf16le([Byte1, Byte2 | Rest], Acc)
192 | when Byte2 < 16#D8; Byte2 > 16#DF ->
193 | Ch = Byte2 * 256 + Byte1,
194 | if Ch < 16#FFFE -> from_utf16le(Rest,[Ch|Acc]) end;
195 | %% from_utf16le(<>, Acc)
197 | %% when Hi >= 16#D800, Hi < 16#DC00, Lo >= 16#DC00, Lo =< 16#DFFF ->
198 | %% %% Surrogate pair
199 | %% Ch = ((Hi band 16#3FF) bsl 10) + (Lo band 16#3FF) + 16#10000,
200 | %% from_utf16le(Rest, [Ch|Acc]);
201 | from_utf16le([Hi1, Hi2, Lo1, Lo2 | Rest], Acc)
202 | when Hi2 >= 16#D8, Hi2 < 16#DC, Lo2 >= 16#DC, Lo2 < 16#E0 ->
203 | %% Surrogate pair
204 | Hi = Hi2 * 256 + Hi1,
205 | Lo = Lo2 * 256 + Lo1,
206 | Ch = ((Hi band 16#3FF) bsl 10) + (Lo band 16#3FF) + 16#10000,
207 | from_utf16le(Rest, [Ch|Acc]);
208 | %% from_utf16le(<> = Rest, Acc)
209 | %% when Hi >= 16#D800, Hi < 16#DC00 ->
210 | %% %% Surrogate pair, incomplete
211 | %% {lists:reverse(Acc), Rest};
212 | from_utf16le([_Hi1, Hi2] = Rest, Acc)
213 | when Hi2 >= 16#D8, Hi2 < 16#DC ->
214 | %% Surrogate pair, incomplete
215 | {lists:reverse(Acc), Rest};
216 | %% from_utf16le(<> = Rest, Acc)
217 | %% when Hi >= 16#D800, Hi < 16#DC00 ->
218 | %% %% Surrogate pair, incomplete
219 | %% {lists:reverse(Acc), Rest};
220 | from_utf16le([_Hi1, Hi2, _Byte] = Rest, Acc)
221 | when Hi2 >= 16#D8, Hi2 < 16#DC ->
222 | %% Surrogate pair, incomplete
223 | {lists:reverse(Acc), Rest};
224 | from_utf16le([],Acc) ->
225 | {lists:reverse(Acc), []};
226 | from_utf16le(_Bin,_Acc) ->
227 | {error,not_utf16le}.
228 |
229 | %%% UTF-8 encoding and decoding
230 | %% TODO: isn't this very inefficient? Building all these lists?
231 | to_utf8(List) when is_list(List) -> lists:flatmap(fun to_utf8/1, List);
232 | to_utf8(Ch) -> char_to_utf8_list(Ch).
233 |
234 | %% TODO: this is probably not the best way to do this.
235 | char_to_utf8(Char) ->
236 | list_to_binary(char_to_utf8_list(Char)).
237 |
238 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
239 | %%% UTF-8 support
240 | %%% Possible errors encoding UTF-8:
241 | %%% - Non-character values (something other than 0 .. 2^31-1).
242 | %%% - Surrogate pair code in string.
243 | %%% - 16#FFFE or 16#FFFF character in string.
244 | %%% Possible errors decoding UTF-8:
245 | %%% - 10xxxxxx or 1111111x as initial byte.
246 | %%% - Insufficient number of 10xxxxxx octets following an initial octet of
247 | %%% multi-octet sequence.
248 | %%% - Non-canonical encoding used.
249 | %%% - Surrogate-pair code encoded as UTF-8.
250 | %%% - 16#FFFE or 16#FFFF character in string.
251 | char_to_utf8_list(Ch) when is_integer(Ch), Ch >= 0 ->
252 | if Ch < 128 ->
253 | %% 0yyyyyyy
254 | [Ch];
255 | Ch < 16#800 ->
256 | %% 110xxxxy 10yyyyyy
257 | [16#C0 + (Ch bsr 6),
258 | 128+(Ch band 16#3F)];
259 | Ch < 16#10000 ->
260 | %% 1110xxxx 10xyyyyy 10yyyyyy
261 | if Ch < 16#D800; Ch > 16#DFFF, Ch < 16#FFFE ->
262 | [16#E0 + (Ch bsr 12),
263 | 128+((Ch bsr 6) band 16#3F),
264 | 128+(Ch band 16#3F)]
265 | end;
266 | Ch < 16#200000 ->
267 | %% 11110xxx 10xxyyyy 10yyyyyy 10yyyyyy
268 | [16#F0+(Ch bsr 18),
269 | 128+((Ch bsr 12) band 16#3F),
270 | 128+((Ch bsr 6) band 16#3F),
271 | 128+(Ch band 16#3F)];
272 | Ch < 16#4000000 ->
273 | %% 111110xx 10xxxyyy 10yyyyyy 10yyyyyy 10yyyyyy
274 | [16#F8+(Ch bsr 24),
275 | 128+((Ch bsr 18) band 16#3F),
276 | 128+((Ch bsr 12) band 16#3F),
277 | 128+((Ch bsr 6) band 16#3F),
278 | 128+(Ch band 16#3F)];
279 | Ch < 16#80000000 ->
280 | %% 1111110x 10xxxxyy 10yyyyyy 10yyyyyy 10yyyyyy 10yyyyyy
281 | [16#FC+(Ch bsr 30),
282 | 128+((Ch bsr 24) band 16#3F),
283 | 128+((Ch bsr 18) band 16#3F),
284 | 128+((Ch bsr 12) band 16#3F),
285 | 128+((Ch bsr 6) band 16#3F),
286 | 128+(Ch band 16#3F)]
287 | end.
288 |
289 |
--------------------------------------------------------------------------------
/src/erlsom_writeHrl.erl:
--------------------------------------------------------------------------------
1 | %%% Copyright (C) 2006 - 2008 Willem de Jong
2 | %%%
3 | %%% This file is part of Erlsom.
4 | %%%
5 | %%% Erlsom is free software: you can redistribute it and/or modify
6 | %%% it under the terms of the GNU Lesser General Public License as
7 | %%% published by the Free Software Foundation, either version 3 of
8 | %%% the License, or (at your option) any later version.
9 | %%%
10 | %%% Erlsom is distributed in the hope that it will be useful,
11 | %%% but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | %%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | %%% GNU Lesser General Public License for more details.
14 | %%%
15 | %%% You should have received a copy of the GNU Lesser General Public
16 | %%% License along with Erlsom. If not, see
17 | %%% .
18 | %%%
19 | %%% Author contact: w.a.de.jong@gmail.com
20 |
21 | %%% ====================================================================
22 | %%% Writes record definitions, to be used with Erlsom.
23 | %%% ====================================================================
24 |
25 | %%% Writes record definitions, taking a 'model' (from erlsom_compile) as
26 | %%% input.
27 |
28 | -module(erlsom_writeHrl).
29 | -export([writeHrl/1, writeHrl/2]).
30 | -export([write_hrl/1, write_hrl/2]).
31 | -export([writeHrlFile/3]).
32 | -export([writeXsdHrlFile/2]).
33 |
34 | -include("erlsom_parse.hrl").
35 | -include("erlsom.hrl").
36 |
37 | -type hrl_header() :: iolist(). %% only explanatory text (comment)
38 | -type hrl_types() :: iolist(). %% the actual record and type declarations
39 |
40 | %% debug(Text) -> io:format("writeHrl: ~p~n", [Text]).
41 |
42 | %% debug(Text1, Text2) ->
43 | %% io:format("~p ~p~n", [Text1, Text2]).
44 |
45 | -spec write_hrl(Model::erlsom:model()) -> {hrl_header(), hrl_types()}.
46 | write_hrl(Model) ->
47 | write_hrl(Model, []).
48 |
49 | -spec write_hrl(Model::erlsom:model(), Options :: list()) -> {hrl_header(), hrl_types()}.
50 | write_hrl(#model{tps = Types, th = TypeHierarchy, any_attribs = AnyAtts}, Options) ->
51 | erlang:put(erlsom_attribute_hrl_prefix, proplists:get_value(attribute_hrl_prefix, Options, "")),
52 | {header(), writeTypes(Types, TypeHierarchy, AnyAtts)}.
53 |
54 | writeHrl(Model) ->
55 | writeHrl(Model, []).
56 |
57 | writeHrl(#model{} = Model, Options) ->
58 | {Header, Types} = write_hrl(Model, Options),
59 | [Header, Types].
60 |
61 | writeHrlFile(Xsd, Prefix, Namespaces) ->
62 | %% compile file
63 | Result = erlsom:compile(Xsd, Prefix, Namespaces),
64 | case Result of
65 | {ok, Model} ->
66 | writeHrl(Model);
67 | {error, Error} ->
68 | io:format("Error while compiling file: ~p~n", [Error])
69 | end.
70 |
71 | writeXsdHrlFile(Xsd, Options) ->
72 | %% compile file
73 | Result = erlsom:compile_xsd(Xsd, Options),
74 | case Result of
75 | {ok, Model} ->
76 | writeHrl(Model, Options);
77 | {error, Error} ->
78 | throw({error, Error})
79 | end.
80 |
81 | header() ->
82 | "%% HRL file generated by ERLSOM\n"
83 | "%%\n"
84 | "%% It is possible (and in some cases necessary) to change the name of\n"
85 | "%% the record fields.\n"
86 | "%%\n"
87 | "%% It is possible to add default values, but be aware that these will\n"
88 | "%% only be used when *writing* an xml document.\n\n"
89 | "\n".
90 |
91 | standard_types(AnyAtts) ->
92 | case AnyAtts of
93 | true ->
94 | "-ifndef(ERLSOM_ANY_ATTRIB_TYPES).\n"
95 | "-define(ERLSOM_ANY_ATTRIB_TYPES, true).\n"
96 | "-type anyAttrib() :: {{string(), %% name of the attribute\n"
97 | " string()}, %% namespace\n"
98 | " string()}. %% value\n"
99 | "\n"
100 | "-type anyAttribs() :: [anyAttrib()] | undefined.\n"
101 | "-endif.\n"
102 | "\n";
103 | _ ->
104 | ""
105 | end ++
106 | "-ifndef(ERLSOM_QNAME_TYPES).\n"
107 | "-define(ERLSOM_QNAME_TYPES, true).\n"
108 | "%% xsd:QName values are translated to #qname{} records.\n"
109 | "-record(qname, {uri :: string(),\n"
110 | " localPart :: string(),\n"
111 | " prefix :: string(),\n"
112 | " mappedPrefix :: string()}).\n"
113 | "-endif.\n".
114 |
115 | writeTypes(Types, TypeHierarchy, AnyAtts) ->
116 | [standard_types(AnyAtts), [writeType(T, TypeHierarchy, AnyAtts) || T <- Types]].
117 |
118 | writeType(#type{nm = '_document'}, _, _) ->
119 | [];
120 | writeType(#type{nm = Name, els = Elements, atts = Attributes, mxd = Mixed},
121 | Hierarchy, AnyAtts) ->
122 | Format = "~3n-record(~p, {~s})." ++
123 | "~2n-type ~s :: ~s.",
124 | Fields = [case AnyAtts of
125 | true ->
126 | "anyAttribs :: anyAttribs()";
127 | _ ->
128 | []
129 | end,
130 | writeAttributes(Attributes),
131 | writeElements(Elements, Mixed, Hierarchy)],
132 | Args = [Name, add_commas(Fields),
133 | formatType(Name), formatRecord(Name)],
134 | lists:flatten(io_lib:format(Format, Args)).
135 |
136 | add_commas(Parts) ->
137 | string:join(lists:filter(fun(S) -> S /= "" end, Parts), ",").
138 |
139 | %% writeElements(Elements, Mixed, Hierarchy) ->
140 | %% writeElements(Elements, Mixed, Hierarchy, 0).
141 |
142 | %% writeElements([], _Mixed, _Hierarchy, _) ->
143 | %% [];
144 | %% writeElements([Element | Tail], Mixed, Hierarchy, CountChoices) ->
145 | %% {Elem, CountChoices2} = writeElement(Element, Mixed, Hierarchy, CountChoices),
146 | %% NextElems = writeElements(Tail, Mixed, Hierarchy, CountChoices2),
147 | %% [",\n\t", Elem, NextElems].
148 |
149 | writeElements(Elements, Mixed, Hierarchy) ->
150 | WriteFun = fun(Elem, AccIn) ->
151 | {Acc, CountChoices} = AccIn,
152 | {ElString, CountChoices2} = writeElement(Elem, Mixed, Hierarchy, CountChoices),
153 | {[ElString | Acc], CountChoices2}
154 | end,
155 | {Result, _} = lists:foldl(WriteFun, {[], 0}, Elements),
156 | string:join(lists:reverse(Result), ",").
157 |
158 |
159 | writeElement(#el{alts = Alternatives, mn = Min, mx = Max, nillable = Nillable}, Mixed, Hierarchy, CountChoices) ->
160 | {Label, Types, Count2} = case Mixed of
161 | true ->
162 | writeAlternatives(Alternatives, 1, 1, false, Hierarchy, CountChoices);
163 | _ ->
164 | writeAlternatives(Alternatives, Min, Max, Nillable, Hierarchy, CountChoices)
165 | end,
166 | TypeSpec = case Mixed of
167 | true ->
168 | ["\n\t", Label, "[", Types, " | string()]"];
169 | _ ->
170 | ["\n\t", Label, Types]
171 | end,
172 | {lists:flatten(TypeSpec), Count2}.
173 |
174 |
175 | %% returns {Label (including " :: "), Type, CountChoices}
176 | %%
177 | %% more than 1 alternative: a choice
178 | writeAlternatives(Alts, Min, Max, _N, Hierarchy, CountChoices) when length(Alts) > 1 ->
179 | Label = case CountChoices of
180 | 0 ->
181 | "choice :: ";
182 | _ ->
183 | ["choice", integer_to_list(CountChoices), " :: "]
184 | end,
185 | Alternatives = case lists:keyfind('#any', #alt.tag, Alts) of
186 | false ->
187 | [writeAlternative(A, 1, 1, false, Hierarchy) || A <- Alts];
188 | Alt ->
189 | %% it makes no sense to have a choice between many things if
190 | %% one of them is "any()" - in that case the any() suffices.
191 | [writeAlternative(Alt, 1, 1, false, Hierarchy)]
192 | end,
193 | Type = lists:flatten([minMaxType(string:join(Alternatives, " | "),
194 | Min, Max, 1, false, simple)]),
195 | {Label, Type, CountChoices + 1};
196 | %% 1 alternative (not a choice)
197 | writeAlternatives([#alt{tag = Tag, tp = Tp, rl=Rl} = Alt], Min, Max, Nillable, Hierarchy, CountChoices) ->
198 | LabelAtom = case Rl of
199 | true ->
200 | %% erlsom_lib:nameWithoutPrefix(atom_to_list(Tag));
201 | baseName(Tag);
202 | _ when Rl == false; Rl == simple ->
203 | case Tp of
204 | {'#PCDATA', _} ->
205 | Tag;
206 | _ ->
207 | Tp
208 | end
209 | end,
210 | Label = io_lib:format("~p :: ", [LabelAtom]),
211 | Type = writeAlternative(Alt, Min, Max, Nillable, Hierarchy),
212 | {Label, Type, CountChoices}.
213 |
214 | %% alternative (not a choice), 'real' element (not a group)
215 | writeAlternative(#alt{tag = '#any'}, _, _, _, _) ->
216 | "any()";
217 | writeAlternative(#alt{rl = true, tp = {Tp1, Tp2}, mx = Max2}, Min, Max, Nillable, _H) ->
218 | formatSimpleType(Tp1, Tp2, Min, Max, Max2, Nillable);
219 | writeAlternative(#alt{rl = Rl, tp = Type, mx = Max2}, Min, Max, Nillable, Hierarchy)
220 | when Rl == true; Rl == simple ->
221 | %% The type could be abstract, in that case put the 'leaves' of the type hierarchy
222 | case erlsom_lib:getDescendants(Type, Hierarchy) of
223 | [] ->
224 | formatListType(Type, Min, Max, Max2, Nillable);
225 | Leaves ->
226 | minMaxType(string:join([formatType(L) || L <- [Type | Leaves]], " | "),
227 | Min, Max, Max2, Nillable, false)
228 | end;
229 | %% simpleContent type
230 | writeAlternative(#alt{rl = false, tp = {Tp1, Tp2}, mx=Max2}, Min, Max, Nillable, _H) ->
231 | formatSimpleType(Tp1, Tp2, Min, Max, Max2, Nillable);
232 | %% group type
233 | writeAlternative(#alt{rl = false, tp=Tp, mx=Max2}, Min, Max, Nillable, _H) ->
234 | formatListType(Tp, Min, Max, Max2, Nillable).
235 |
236 |
237 | formatRecord(Type) ->
238 | io_lib:format("#~p{}", [Type]).
239 |
240 | formatType('#ANY') ->
241 | "any()";
242 | formatType(Type) ->
243 | io_lib:format("~p()", [Type]).
244 |
245 | %% TODO: delete the flatten call.
246 | writeAttributes(Attributes) ->
247 | string:join(lists:map(fun writeAttribute/1, Attributes), ",").
248 |
249 |
250 | -spec writeAttribute(#att{}) -> Acc when Acc :: list().
251 |
252 | writeAttribute(#att{nm = Name, opt = Optional, tp = Type}) ->
253 | OptOrReq = if Optional -> " | undefined"; true -> "" end,
254 | AttrPrefix = erlang:get(erlsom_attribute_hrl_prefix),
255 | AttrName = list_to_atom(AttrPrefix ++ atom_to_list(baseName(Name))),
256 | Format = "~n\t~p :: ~s~s",
257 | lists:flatten(io_lib:format(Format, [AttrName, makeType(Type), OptOrReq])).
258 |
259 | %% the names of the fields should not have the prefix
260 | baseName(Atom) when is_atom(Atom) ->
261 | String = atom_to_list(Atom),
262 | String_no_prefix = case string:tokens(String, ":") of
263 | [_Prefix, Name] ->
264 | Name;
265 | _ ->
266 | String
267 | end,
268 | list_to_atom(String_no_prefix).
269 |
270 | formatSimpleType(Tp1, Tp2, Min, Max, Max2, Nullable) ->
271 | Type = simpleType(Tp1, Tp2),
272 | minMaxType(Type, Min, Max, Max2, Nullable, true).
273 |
274 | minMaxType(Type, Min, Max, Max2, Nullable, Simple) ->
275 | Optional = if
276 | Min == 0 ->
277 | " | undefined";
278 | true -> ""
279 | end,
280 | {Bracket1, Bracket2} = if
281 | Max == 1 ->
282 | {"", ""};
283 | true ->
284 | {"[", "]"}
285 | end,
286 | {BracketA, BracketB} = if
287 | Max2 == 1 ->
288 | {"", ""};
289 | true ->
290 | {"[", "]"}
291 | end,
292 | NullAlternative = case {Nullable, Simple} of
293 | {true, true} ->
294 | " | nil";
295 | {true, false} ->
296 | [" | {nil, ", Type, "}"];
297 | _ ->
298 | ""
299 | end,
300 | lists:flatten([ Bracket1
301 | , BracketA
302 | , Type
303 | , NullAlternative
304 | , BracketB
305 | , Bracket2
306 | , Optional
307 | ]).
308 |
309 | simpleType(_, Type) -> makeType(Type).
310 |
311 | makeType(char) -> "string()";
312 | makeType(integer) -> "integer()";
313 | makeType({integer, negativeInteger}) -> "neg_integer()";
314 | makeType({integer, positiveInteger}) -> "pos_integer()";
315 | makeType({integer, nonPositiveInteger}) -> "neg_integer() | 0";
316 | makeType({integer, Non_neg})
317 | when Non_neg == nonNegativeInteger;
318 | Non_neg == unsignedLong;
319 | Non_neg == unsignedInt;
320 | Non_neg == unsignedShort;
321 | Non_neg == unsignedByte -> "non_neg_integer()";
322 | makeType({integer, _}) -> "integer()";
323 | makeType(bool) -> "boolean()";
324 | makeType(float) -> "float() | 'NaN' | 'INF' | '-INF'";
325 | makeType(qname) -> "#qname{}".
326 |
327 | formatListType(Type, Min, Max, Max2, Nullable) ->
328 | TypeAsString = formatType(Type),
329 | minMaxType(TypeAsString, Min, Max, Max2, Nullable, false).
330 |
--------------------------------------------------------------------------------
/src/erlsom_example_value.erl:
--------------------------------------------------------------------------------
1 | -module(erlsom_example_value).
2 |
3 | %% output code that makes an example value for a type, using an erlsom model as input.
4 | %%
5 | %% example:
6 | %% #'p:Transaction'{
7 | %% 'TransactionID' = "?",
8 | %% 'SessionID' = "?",
9 | %% 'CurrencyID' = "?",
10 | %% 'Value' = "?"}
11 | %%
12 | %% In order to be able to embed the result it must be possible to specificy
13 | %% indentation.
14 |
15 | -include("erlsom_parse.hrl").
16 |
17 | -export([from_model/2]).
18 | -export([from_model/3]).
19 | -export([test/1]).
20 |
21 | -type options() :: [option()].
22 | -type option() :: {indent, integer()} | {indent_level, integer()}.
23 |
24 | -record(e_state, {
25 | indent :: integer(),
26 | level :: integer(),
27 | choice_depth = 0 :: integer()
28 | }).
29 |
30 | test(File) ->
31 | Options = [{include_any_attribs, false}],
32 | % generate hrl file, store in test_example.hrl
33 | erlsom:write_xsd_hrl_file(File, "test_example.hrl", Options),
34 | {ok, Model} = erlsom:compile_xsd_file(File, Options),
35 | % Pick a type from the model
36 | #model{tps = [#type{nm = '_document', els=Elements} | _]} = Model,
37 | [#el{alts = [#alt{tp = Type} | _]} | _] = Elements,
38 | % generate an example value,
39 | Example_value = from_model(Type, Model),
40 | file:write_file("test_example.erl",
41 | [test_header(), Example_value, ".\n"]),
42 | %% See if it compiles
43 | compile:file("test_example.erl").
44 |
45 | test_header() ->
46 | "-module(test_example).\n"
47 | "-export([go/0]).\n"
48 | "-include(\"test_example.hrl\").\n"
49 | "go() -> \n".
50 |
51 | -spec from_model(Type::atom(), erlsom:model()) -> string().
52 | from_model(Type, Model) ->
53 | from_model(Type, Model, []).
54 |
55 | -spec from_model(Type::atom(), erlsom:model(), options()) -> string().
56 | from_model(Type, Model, Options) ->
57 | State = #e_state{indent = proplists:get_value(indent, Options, 4),
58 | level = proplists:get_value(indent_level, Options, 0)},
59 | from_type(Type, Model, State).
60 |
61 | from_type(Type, #model{tps = Types} = Model, State) ->
62 | case lists:keyfind(Type, #type.nm, Types) of
63 | false ->
64 | throw({error, "Type not found", Type});
65 | Value ->
66 | from_type2(Value, Model, State)
67 | end.
68 |
69 | from_type2(#type{nm = Name, els = Elements, atts = Attributes},
70 | #model{any_attribs = AnyAtts} = Model, State) ->
71 | Attribute_result = [from_attribute(A, Model, State) || A <- Attributes],
72 | Element_result = from_elements(Elements, Model, State),
73 | Fields =
74 | case AnyAtts of
75 | true ->
76 | Any_attribs = [[comment(State), indent(State), " anyAttribs = []"]],
77 | ["{\n", add_commas(Any_attribs ++ Attribute_result ++ Element_result), $}];
78 | false ->
79 | ["{\n", add_commas(Attribute_result ++ Element_result), $}]
80 | end,
81 | [comment(State), indent(State), $#, atom_list(Name), Fields].
82 |
83 | from_elements(Elements, Model, State) ->
84 | from_elements(Elements, Model, State, 0, []).
85 |
86 | from_elements([], _Model, _State, _ChoiceCount, Acc) ->
87 | lists:reverse(Acc);
88 | from_elements([E | T], Model, State, ChoiceCount, Acc) ->
89 | {Result, NewCount} =
90 | from_element(E, Model, State, ChoiceCount),
91 | from_elements(T, Model, State, NewCount, [Result | Acc]).
92 |
93 |
94 | indent(#e_state{indent = Indent, level = Level}) ->
95 | lists:duplicate(Indent * Level, 32). % 32 = space.
96 |
97 | bump_level(State) ->
98 | bump_level(State, 2).
99 |
100 | bump_level(#e_state{level = Level} = State, Nr) ->
101 | State#e_state{level = Level + Nr}.
102 |
103 | %% This is used between the alternatives - no comma's
104 | %% because only one of them should be used, the others are commented
105 | %% out.
106 | add_breaks(List) ->
107 | separate(List, "\n").
108 |
109 | add_commas(List) ->
110 | separate(List, ",\n").
111 |
112 | separate([], _) ->
113 | [];
114 | separate([H | T], Separator) ->
115 | separate(T, [H], Separator).
116 |
117 | separate([], Acc, _) ->
118 | lists:reverse(Acc);
119 | separate([H | T], Acc, Separator) ->
120 | separate(T, [H, Separator | Acc], Separator).
121 |
122 | from_attribute(#att{nm = Name, opt = Optional, tp = Type}, Model,
123 | State) ->
124 | Comment = case Optional of
125 | true ->
126 | [comment(State), indent(State), " % Optional:\n"];
127 | false ->
128 | ""
129 | end,
130 | Value = default_value(Type, Model, State),
131 | [Comment, comment(State), indent(State), io_lib:format(" ~p = ~s", [Name, Value])].
132 |
133 | from_element(#el{alts = Alternatives, mn = Min, mx = Max}, Model, State, Nr_choices) when
134 | length(Alternatives) == 1 ->
135 | Min_Max_comment = min_max_comment(Min, Max, State),
136 | Values = [from_alternative(A, Max, Model, State) || A <- Alternatives],
137 | {[Min_Max_comment, Values], Nr_choices};
138 | from_element(#el{alts = Alternatives, mn = Min, mx = Max}, Model, State, Nr_choices) ->
139 | Unique_alternatives = lists:ukeysort(#alt.tp, Alternatives),
140 | Choice_comment = choice_comment(length(Unique_alternatives), State),
141 | Min_Max_comment = min_max_comment(Min, Max, State),
142 | %% If there are several tags that lead to 1 alternative, there may be more than 1
143 | %% #alt{} record for the same type.
144 | Alts = from_alternatives(Unique_alternatives, Model, State),
145 | Label = choice_label(Nr_choices),
146 | Result =
147 | case (Max > 1) of %% unbound > 1
148 | true ->
149 | [Min_Max_comment, comment(State), indent(State), " ", Label, " = [\n",
150 | Choice_comment, add_breaks(Alts), $]];
151 | false ->
152 | [Min_Max_comment, comment(State), indent(State), " ", Label, " = \n",
153 | Choice_comment, add_breaks(Alts)]
154 | end,
155 | {Result, Nr_choices + 1}.
156 |
157 |
158 | choice_label(0) ->
159 | "choice";
160 | choice_label(N) ->
161 | ["choice", integer_to_list(N)].
162 |
163 | %% only used for alternatives of a choice
164 | from_alternatives(Alternatives, Model, State) ->
165 | from_alternatives(Alternatives, Model, State, 1, []).
166 |
167 | from_alternatives([], _, _, _, Acc) ->
168 | lists:reverse(Acc);
169 | from_alternatives([H|T], Model, #e_state{choice_depth= Depth} = State,
170 | Count, Acc) ->
171 | %% All alternatives are commented out, exacpt for the last one
172 | %% (the last one, because otherwise there are problems with commas, closing braces
173 | %% etc.).
174 | New_depth =
175 | case T of
176 | [] -> % no more alternatives, so this is the last one
177 | Depth;
178 | _ ->
179 | Depth + 1
180 | end,
181 | from_alternatives(T, Model, State, Count + 1,
182 | [from_alternative2(H, Model,
183 | State#e_state{choice_depth = New_depth}) | Acc]).
184 |
185 | from_alternative(#alt{tag = Tag, tp = Type, rl = Real, mn = _Min2, mx = _Max2},
186 | Max, Model, State) ->
187 | Field_name = name(Tag, Type, Real),
188 | %% add a newline if the type is a record
189 | Newline = newline(Type),
190 | %% add a relevant comment if the type is 'any':
191 | Any_comment = any_comment(Type, State),
192 | Value = default_value(Type, Model, State),
193 | Field = case (Max > 1) of %% unbound > 1
194 | true ->
195 | case Type of
196 | _ when Type == any; Type == '#ANY' ->
197 | %% Note: this is not correct if MinOccurs > 0,
198 | %% but that is rare, and it would be difficult
199 | %% to figure out what to put in such a case.
200 | io_lib:format(" ~p = ~s[]",
201 | [Field_name, Newline]);
202 | _ ->
203 | io_lib:format(" ~p = [~s~s]",
204 | [Field_name, Newline, Value])
205 | end;
206 | false ->
207 | io_lib:format(" ~p = ~s~s", [Field_name, Newline, Value])
208 | end,
209 | [Any_comment, comment(State), indent(State), Field].
210 |
211 | any_comment(any, State) ->
212 | [comment(State), indent(State), " % Any value:\n"];
213 | any_comment(_, _State) ->
214 | "".
215 |
216 | %% inside a choice
217 | from_alternative2(#alt{tp = Type, mn = Min, mx = Max}, Model, State) ->
218 | Value = default_value(Type, Model, State),
219 | Min_Max_comment = min_max_comment(Min, Max, bump_level(State, 1)),
220 | Field = case (Max > 1) of %% unbound > 1
221 | true ->
222 | Value2 = put_brace(Value),
223 | io_lib:format("~s]", [Value2]);
224 | false ->
225 | io_lib:format("~s", [Value])
226 | end,
227 | [Min_Max_comment, Field].
228 |
229 | name(Tag, Type, Real) ->
230 | With_prefix =
231 | case Real of
232 | false ->
233 | case Type of
234 | {_,_} ->
235 | Tag;
236 | _ ->
237 | Type
238 | end;
239 | _ ->
240 | Tag
241 | end,
242 | base_name(With_prefix).
243 |
244 |
245 | %% the names of the fields should not have the prefix
246 | base_name(Atom) when is_atom(Atom) ->
247 | String = atom_to_list(Atom),
248 | String_no_prefix = case string:tokens(String, ":") of
249 | [_Prefix, Name] ->
250 | Name;
251 | _ ->
252 | String
253 | end,
254 | list_to_atom(String_no_prefix).
255 |
256 | comment(#e_state{choice_depth = D}) ->
257 | lists:duplicate(D, $%).
258 |
259 |
260 | put_brace(String) ->
261 | Flat = lists:flatten(String),
262 | put_brace(Flat, []).
263 |
264 | %% String = n * space + Something, must become:
265 | %% n * space + [ + Something
266 | put_brace([32 | T], Acc) ->
267 | put_brace(T, [32 | Acc]);
268 | put_brace([$% | T], Acc) ->
269 | put_brace(T, [$% | Acc]);
270 | put_brace([_ | _T] = Rest, Acc) ->
271 | [lists:reverse(Acc), $[, Rest].
272 |
273 | min_max_comment(1, 1, _) ->
274 | "";
275 | min_max_comment(0, 1, State) ->
276 | [comment(State), indent(State), " % Optional:\n"];
277 | min_max_comment(0, M, State) ->
278 | [comment(State), indent(State),
279 | io_lib:format(" % List with zero ~s elements:~n", [max_as_string(M)])];
280 | min_max_comment(N, M, State) ->
281 | [comment(State), indent(State),
282 | io_lib:format(" % List with ~p ~s elements:~n", [N, max_as_string(M)])].
283 |
284 | max_as_string(unbound) ->
285 | "or more";
286 | max_as_string(N) ->
287 | "to " ++ integer_to_list(N).
288 |
289 |
290 | choice_comment(1, _State) ->
291 | "";
292 | choice_comment(N, State) ->
293 | [comment(State), indent(State),
294 | io_lib:format(" % Select one from the following ~p elements:~n", [N])].
295 |
296 | newline({_, _}) ->
297 | % simple type
298 | [];
299 | newline(any) ->
300 | [];
301 | newline(_) ->
302 | "\n".
303 |
304 | default_value({'#PCDATA', Type}, Model, State) ->
305 | default_value(Type, Model, State);
306 | default_value(bool, _, _) ->
307 | "true";
308 | default_value(any, _, _) ->
309 | "undefined";
310 | default_value('#ANY', _, _) ->
311 | "\"could be anything\"";
312 | default_value(qname, _, _) ->
313 | "qname";
314 | default_value(integer, _, _) ->
315 | "42";
316 | default_value({integer, long}, _, _) ->
317 | "42000";
318 | default_value({integer, int}, _, _) ->
319 | "4200";
320 | default_value({integer, short}, _, _) ->
321 | "420";
322 | default_value({integer, byte}, _, _) ->
323 | "42";
324 | default_value({integer, unsignedLong}, _, _) ->
325 | "43000";
326 | default_value({integer, unsignedInt}, _, _) ->
327 | "4300";
328 | default_value({integer, unsignedShort}, _, _) ->
329 | "430";
330 | default_value({integer, unsignedByte}, _, _) ->
331 | "43";
332 | default_value({integer, nonPositiveInteger}, _, _) ->
333 | "0";
334 | default_value({integer, positiveInteger}, _, _) ->
335 | "42";
336 | default_value({integer, negativeInteger}, _, _) ->
337 | "-42";
338 | default_value({integer, nonNegativeInteger}, _, _) ->
339 | "0";
340 | default_value(float, _, _) ->
341 | "3.1415927";
342 | default_value(char, _, _) ->
343 | "\"?\"";
344 | default_value(Type, Model, State) ->
345 | from_type(Type, Model, bump_level(State)).
346 |
347 | atom_list(Atom) ->
348 | io_lib:format("~p", [Atom]).
349 |
--------------------------------------------------------------------------------