├── .codeclimate.yml
├── .github
└── workflows
│ └── ci.yml
├── LICENSE
├── README.md
├── architecture.png
├── circle.yml
├── components
├── catfilterer.go
├── const.go
├── filereader.go
├── filereader_test.go
├── iptypes.go
├── mwxmlcreator.go
├── mwxmlcreator_test.go
├── residxcreator.go
├── residxcreator_test.go
├── residxfanout.go
├── residxfanout_test.go
├── residxtoresaggrconv.go
├── residxtoresaggrconv_test.go
├── smwtplformatter.go
├── strfilewriter.go
├── stringprinter.go
├── tripleaggregator.go
├── tripleaggregator_test.go
├── tripleaggrfanout.go
├── tripleaggrprinter.go
├── tripleparser.go
├── tripleprinter.go
├── triplestowikipageconv.go
├── triplestowikipageconv_test.go
├── ttlfilereader.go
├── ttlfilereader_test.go
└── wikipageprinter.go
├── go.mod
├── go.sum
├── main.go
└── testcov.sh
/.codeclimate.yml:
--------------------------------------------------------------------------------
1 | engines:
2 | gofmt:
3 | enabled: true
4 | golint:
5 | enabled: true
6 | govet:
7 | enabled: true
8 |
9 | ratings:
10 | paths:
11 | - "**.go"
12 |
--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
1 | # This workflow will build rdf2smw as a golang project
2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-go
3 |
4 | name: CI
5 |
6 | on:
7 | push:
8 | branches: [ "master", "dev" ]
9 | pull_request:
10 | branches: [ "master", "dev" ]
11 |
12 | jobs:
13 |
14 | build:
15 | runs-on: ubuntu-latest
16 | steps:
17 | - uses: actions/checkout@v4
18 |
19 | - name: Set up Go
20 | uses: actions/setup-go@v4
21 | with:
22 | go-version: '1.24'
23 |
24 | - name: Build
25 | run: go build -v ./...
26 |
27 | - name: Test
28 | run: go test -v ./...
29 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2015 Samuel Lampa
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
23 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | rdf2smw
2 | =======
3 |
4 | [](https://github.com/rdfio/rdf2smw/actions/workflows/ci.yml)
5 | [](https://codecov.io/gh/rdfio/rdf2smw)
6 | [](https://goreportcard.com/report/github.com/rdfio/rdf2smw)
7 | [](https://codeclimate.com/github/rdfio/rdf2smw)
8 | [](https://codeclimate.com/github/rdfio/rdf2smw)
9 | [](https://codeclimate.com/github/rdfio/rdf2smw)
10 | [](https://godoc.org/github.com/rdfio/rdf2smw)
11 |
12 | Updates
13 | -------
14 |
15 | **Mar 28, 2025:** Added `go.mod`/`go.sum` files to make building work again
16 | with the latest Go (1.24.0).
17 |
18 | **Sep 30, 2016:** rdf2smw was covered in a talk at SMWCon in Frankfurt, Sep 2016. See: [Talk page](https://www.semantic-mediawiki.org/wiki/SMWCon_Fall_2016/Batch_import_of_large_RDF_datasets_using_RDFIO_or_the_new_rdf2smw_tool), [Slides](https://www.slideshare.net/SamuelLampa/batch-import-of-large-rdf-datasets-into-semantic-mediawiki), [Video](https://www.youtube.com/watch?v=k70er1u1ZYs).
19 |
20 | **Sep 4, 2017:** Our paper on RDFIO and rdf2smw was just published! If you use rdf2smw in scientific work, please cite:
21 | Lampa S, Willighagen E, Kohonen P, King A, Vrandečić D, Grafström R, Spjuth O
22 | [RDFIO: extending Semantic MediaWiki for interoperable biomedical data management](https://jbiomedsem.biomedcentral.com/articles/10.1186/s13326-017-0136-y)
23 | *Journal of Biomedical Semantics*. **8**:35 (2017). DOI: [10.1186/s13326-017-0136-y](https://dx.doi.org/10.1186/s13326-017-0136-y).
24 |
25 | Import / convert RDF data into a Semantic MediaWiki
26 | ---------------------------------------------------
27 |
28 | A commandline tool to convert from RDF triples to [Semantic MediaWiki](http://semantic-mediawiki.org) facts
29 | in MediaWiki XML export format to be used with [MediaWiki](https://www.mediawiki.org)'s built-in
30 | [XML import feature](https://www.mediawiki.org/wiki/Manual:Importing_XML_dumps).
31 |
32 | This allows you to quickly and simply populate a Semantic MediaWiki page
33 | structure, from an RDF data file.
34 |
35 | It is written in Go for better performance than PHP. The latest version
36 | processes triples into pages in the order of ~55K triples/sec converted into
37 | ~13K pages/sec on an 2014 i5 Haswell dual core processor, to give an idea.
38 |
39 | rdf2smw is very similar to the RDF import function in the
40 | [RDFIO](https://github.com/rdfio/RDFIO) Semantic MediaWiki extension, but takes
41 | another approach: Whereas RDFIO converts RDF to wiki pages and imports them in
42 | the same go, rdf2smw first converts RDF to an XML file outside of PHP (for
43 | better performance), and then importing using MediaWiki's built-in import
44 | function.
45 |
46 | **Status:** The tool is pretty much feature complete, including ability to
47 | write facts via template calls if a categorization (via owl:Class or rdf:type)
48 | of the subject can be done. What is lacking is more options to fine-tune
49 | things. Right now you'll have to modify the source code yourself if you need
50 | any customization. Hope to address this in the near future.
51 |
52 | Dependencies
53 | ------------
54 |
55 | The tool itself does not have any dependencies, apart from a unix-like
56 | operating system. For importing the generated XML dump file to make sense
57 | though, you will need a web server, PHP, MediaWiki and Semantic MediaWiki.
58 |
59 | An automated virtualbox generation script (so valled "vagrant box"), with all
60 | of this, plus the RDFIO extension, can be found
61 | [here](https://github.com/rdfio/rdfio-vagrantbox), and is highly recommended,
62 | if you don't have a MediaWiki / SemanticMediawiki installation already!
63 |
64 | Installation
65 | ------------
66 |
67 | For linux 64 bit:
68 |
69 | 1. Download the file `rdf2smw_linux64.gz`, on the [latest release](https://github.com/samuell/rdf2smw/releases).
70 | 2. Unpack it with: `gunzip rdf2smw_linux64.gz`
71 | 3. Call it, on the commandline (see the usage section below).
72 |
73 | Usage
74 | -----
75 |
76 | Call the rdf2smw binary, specifying a file with triples in n-triples or turtle
77 | format, with the `--in` flag, and an output file in XML format with the
78 | `--out` flag, like so:
79 |
80 | ```bash
81 | ./rdf2smw --in triples.nt --out semantic_mediawiki_pages.xml
82 | ```
83 |
84 | In addition to the specified output file, there will be separate files for
85 | templates and properties, named similar to the main output file, but replacing
86 | `.xml` with `_templates.xml` and `_properties.xml` respectively.
87 |
88 | These XML files can then be imported into MediaWiki / Semantic MediaWiki, via
89 | the `importDump.php` maintenance script, located in the `maintenance` folder
90 | under the main mediawiki folder.
91 |
92 | ```bash
93 | php /maintenance/importDump.php semantic_mediawiki_pages_templates.xml
94 | php /maintenance/importDump.php semantic_mediawiki_pages_properties.xml
95 | php /maintenance/importDump.php semantic_mediawiki_pages.xml
96 | ```
97 |
98 | Note that the order above is highly recommended (templates, then properties,
99 | then the rest), so as to avoid unnecessary re-computing of semantic data after
100 | the import is done.
101 |
102 | Architecture
103 | ------------
104 |
105 | Find below a schematic illustration of the flow-based programming process graph
106 | of the rdf2smw program:
107 |
108 | 
109 |
110 | _Illustration created with
111 | [drawfbp](https://github.com/jpaulm/drawfbp)_
112 |
113 | Known limitations
114 | -----------------
115 |
116 | Only N-triples is supported as input format right now. We plan to add more formats shortly.
117 |
118 | Technical notes
119 | ---------------
120 |
121 | rdf2smw is based on the [FlowBase](https://github.com/flowbase/flowbase)
122 | flow-based programming micro-framework.
123 |
124 | Acknowledgements
125 | ----------------
126 |
127 | rdf2smw makes heavy use of [Petter Goksøyr Åsen](https://github.com/boutros)'s awesome [RDF parsing library](https://github.com/knakk/rdf).
128 |
--------------------------------------------------------------------------------
/architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rdfio/rdf2smw/5d86513aabe52616e3a3e7fd7acb4467f3a1376a/architecture.png
--------------------------------------------------------------------------------
/circle.yml:
--------------------------------------------------------------------------------
1 | ---
2 | machine:
3 | post:
4 | - go get github.com/jstemmer/go-junit-report
5 | - go get github.com/axw/gocov/gocov
6 | - go get github.com/AlekSi/gocov-xml
7 |
8 | test:
9 | override:
10 | - mkdir -p $GOPATH/src/github.com/rdfio
11 | - ln -s $HOME/rdf2smw $GOPATH/src/github.com/rdfio/rdf2smw
12 | - mkdir -p $CIRCLE_TEST_REPORTS/go-junit
13 | - go test -v -race ./... | go-junit-report > $CIRCLE_TEST_REPORTS/go-junit/report.xml
14 | - bash testcov.sh
15 | - gocov convert cover.out | gocov-xml > coverage.xml
16 | post:
17 | - bash <(curl -s https://codecov.io/bash)
18 |
--------------------------------------------------------------------------------
/components/catfilterer.go:
--------------------------------------------------------------------------------
1 | package components
2 |
3 | type CategoryFilterer struct {
4 | In chan *WikiPage
5 | Out chan *WikiPage
6 | Categories []*Category
7 | }
8 |
9 | func NewCategoryFilterer(categories []*Category) *CategoryFilterer {
10 | return &CategoryFilterer{
11 | In: make(chan *WikiPage, BUFSIZE),
12 | Out: make(chan *WikiPage, BUFSIZE),
13 | Categories: categories,
14 | }
15 | }
16 |
17 | func (p *CategoryFilterer) Run() {
18 | defer close(p.Out)
19 | for page := range p.In {
20 | for _, pageCat := range page.Categories {
21 | if catInArray(pageCat, p.Categories) {
22 | p.Out <- page
23 | break
24 | }
25 | }
26 | }
27 | }
28 |
29 | func catInArray(searchCat *Category, cats []*Category) bool {
30 | for _, cat := range cats {
31 | if searchCat.Name == cat.Name {
32 | return true
33 | }
34 | }
35 | return false
36 | }
37 |
--------------------------------------------------------------------------------
/components/const.go:
--------------------------------------------------------------------------------
1 | package components
2 |
3 | const (
4 | BUFSIZE = 16
5 | )
6 |
--------------------------------------------------------------------------------
/components/filereader.go:
--------------------------------------------------------------------------------
1 | package components
2 |
3 | import (
4 | "bufio"
5 | "log"
6 |
7 | "github.com/flowbase/flowbase"
8 | "github.com/spf13/afero"
9 | )
10 |
11 | // --------------------------------------------------------------------------------
12 | // FileReader
13 | // --------------------------------------------------------------------------------
14 |
15 | // FileReader is a process that reads files, based on file names it receives on the
16 | // FileReader.InFileName port / channel, and writes out the output line by line
17 | // as strings on the FileReader.OutLine port / channel.
18 | type FileReader struct {
19 | InFileName chan string
20 | OutLine chan string
21 | fs afero.Fs
22 | }
23 |
24 | // NewOsFileReader returns an initialized FileReader, initialized with an OS
25 | // (normal) file system
26 | func NewOsFileReader() *FileReader {
27 | return NewFileReader(afero.NewOsFs())
28 | }
29 |
30 | // NewFileReader returns an initialized FileReader, initialized with the afero
31 | // file system provided as an argument
32 | func NewFileReader(fileSystem afero.Fs) *FileReader {
33 | return &FileReader{
34 | InFileName: make(chan string, BUFSIZE),
35 | OutLine: make(chan string, BUFSIZE),
36 | fs: fileSystem,
37 | }
38 | }
39 |
40 | // Run runs the FileReader process. It does not spawn a separate go-routine, so
41 | // you have to prepend the go keyword when calling it, in order to have it run
42 | // in a separate go-routine.
43 | func (p *FileReader) Run() {
44 | defer close(p.OutLine)
45 |
46 | flowbase.Debug.Println("Starting loop")
47 | for fileName := range p.InFileName {
48 | flowbase.Debug.Printf("Starting processing file %s\n", fileName)
49 | fh, err := p.fs.Open(fileName)
50 | if err != nil {
51 | log.Fatal(err)
52 | }
53 | defer fh.Close()
54 |
55 | sc := bufio.NewScanner(fh)
56 | for sc.Scan() {
57 | if err := sc.Err(); err != nil {
58 | log.Fatal(err)
59 | }
60 | p.OutLine <- sc.Text()
61 | }
62 | }
63 | }
64 |
--------------------------------------------------------------------------------
/components/filereader_test.go:
--------------------------------------------------------------------------------
1 | package components
2 |
3 | import (
4 | "github.com/flowbase/flowbase"
5 | "github.com/spf13/afero"
6 | "testing"
7 | )
8 |
9 | // TestNewOSFileReader tests NewOSFileReader
10 | func TestNewOSFileReader(t *testing.T) {
11 | flowbase.InitLogWarning()
12 |
13 | fr := NewOsFileReader()
14 | if fr.InFileName == nil {
15 | t.Error("In-port InFileName not initialized in New FileReader")
16 | }
17 | if fr.OutLine == nil {
18 | t.Error("In-port InFileName not initialized in New FileReader")
19 | }
20 |
21 | go func() {
22 | fr.InFileName <- "teststring"
23 | }()
24 | teststr1 := <-fr.InFileName
25 | if teststr1 != "teststring" {
26 | t.Error("In-port InFileName is not a string channel")
27 | fr.InFileName <- "teststring"
28 | }
29 |
30 | go func() {
31 | fr.OutLine <- "teststring"
32 | }()
33 | teststr2 := <-fr.OutLine
34 | if teststr2 != "teststring" {
35 | t.Error("Out-port OutLine is not a string channel")
36 | }
37 | }
38 |
39 | // Tests the main behavior of the FileReader process
40 | func TestFileReader(t *testing.T) {
41 | flowbase.InitLogWarning()
42 |
43 | testFileName := "testfile.txt"
44 | line1 := "line one"
45 | line2 := "line two"
46 | testContent := line1 + "\n" + line2
47 |
48 | fs := afero.NewMemMapFs()
49 |
50 | f, err := fs.Create(testFileName)
51 | if err != nil {
52 | t.Errorf("Could not create file %s in memory file system", testFileName)
53 | }
54 | f.WriteString(testContent)
55 | f.Close()
56 |
57 | tmp := []byte{}
58 | f.Read(tmp)
59 |
60 | println(string(tmp))
61 |
62 | fr := NewFileReader(fs)
63 | go func() {
64 | defer close(fr.InFileName)
65 | fr.InFileName <- testFileName
66 | }()
67 |
68 | go fr.Run()
69 |
70 | outStr1 := <-fr.OutLine
71 | outStr2 := <-fr.OutLine
72 |
73 | if outStr1 != line1 {
74 | t.Error("First output from file reader does not match first line in file")
75 | }
76 | if outStr2 != line2 {
77 | t.Error("Second output from file reader does not match second line in file")
78 | }
79 | }
80 |
--------------------------------------------------------------------------------
/components/iptypes.go:
--------------------------------------------------------------------------------
1 | package components
2 |
3 | import (
4 | str "strings"
5 |
6 | "github.com/knakk/rdf"
7 | )
8 |
9 | // --------------------------------------------------------------------------------
10 | // TripleAggregate
11 | // --------------------------------------------------------------------------------
12 |
13 | type TripleAggregate struct {
14 | Subject rdf.Subject
15 | SubjectStr string
16 | Triples []rdf.Triple
17 | }
18 |
19 | func NewTripleAggregate(subj rdf.Subject, triples []rdf.Triple) *TripleAggregate {
20 | return &TripleAggregate{
21 | Subject: subj,
22 | SubjectStr: subj.String(),
23 | Triples: triples,
24 | }
25 | }
26 |
27 | // --------------------------------------------------------------------------------
28 | // WikiPage
29 | // --------------------------------------------------------------------------------
30 |
31 | type WikiPage struct {
32 | Title string
33 | Type int
34 | Facts []*Fact
35 | Categories []*Category
36 | SpecificCategory *Category
37 | }
38 |
39 | func NewWikiPage(title string, facts []*Fact, categories []*Category, specificCategory *Category, pageType int) *WikiPage {
40 | return &WikiPage{
41 | Title: title,
42 | Facts: facts,
43 | Categories: categories,
44 | SpecificCategory: specificCategory,
45 | Type: pageType,
46 | }
47 | }
48 |
49 | func (p *WikiPage) AddFact(fact *Fact) {
50 | p.Facts = append(p.Facts, fact)
51 | }
52 |
53 | func (p *WikiPage) AddFactUnique(fact *Fact) {
54 | factExists := false
55 | for _, existingFact := range p.Facts {
56 | if fact.Property == existingFact.Property && fact.Value == existingFact.Value {
57 | factExists = true
58 | break
59 | }
60 | }
61 | if !factExists {
62 | p.AddFact(fact)
63 | }
64 | }
65 |
66 | func (p *WikiPage) AddCategory(category *Category) {
67 | p.Categories = append(p.Categories, category)
68 | }
69 |
70 | func (p *WikiPage) AddCategoryUnique(category *Category) {
71 | catExists := false
72 | for _, existingCat := range p.Categories {
73 | if category.Name == existingCat.Name {
74 | catExists = true
75 | break
76 | }
77 | }
78 | if !catExists {
79 | p.AddCategory(category)
80 | }
81 | }
82 |
83 | // ------------------------------------------------------------
84 | // Helper type: Fact
85 | // ------------------------------------------------------------
86 |
87 | type Fact struct {
88 | Property string
89 | Value string
90 | }
91 |
92 | func NewFact(property string, value string) *Fact {
93 | return &Fact{
94 | Property: property,
95 | Value: value,
96 | }
97 | }
98 |
99 | func (f *Fact) asWikiFact() string {
100 | return "[[" + f.Property + "::" + f.escapeWikiChars(f.Value) + "]]\n"
101 | }
102 |
103 | func (f *Fact) escapeWikiChars(inStr string) string {
104 | outStr := str.Replace(inStr, "[", "(", -1)
105 | outStr = str.Replace(outStr, "]", ")", -1)
106 | outStr = str.Replace(outStr, "|", ",", -1)
107 | outStr = str.Replace(outStr, "=", "-", -1)
108 | outStr = str.Replace(outStr, "<", "<", -1)
109 | outStr = str.Replace(outStr, ">", ">", -1)
110 | return outStr
111 | }
112 |
113 | // ------------------------------------------------------------
114 | // Helper type: Category
115 | // ------------------------------------------------------------
116 |
117 | type Category struct {
118 | Name string
119 | }
120 |
121 | func NewCategory(name string) *Category {
122 | return &Category{
123 | Name: name,
124 | }
125 | }
126 |
127 | func (c *Category) asWikiString() string {
128 | return "[[Category:" + c.Name + "]]\n"
129 | }
130 |
--------------------------------------------------------------------------------
/components/mwxmlcreator.go:
--------------------------------------------------------------------------------
1 | package components
2 |
3 | import (
4 | "fmt"
5 | str "strings"
6 | "time"
7 | )
8 |
9 | type MWXMLCreator struct {
10 | InWikiPage chan *WikiPage
11 | OutTemplates chan string
12 | OutProperties chan string
13 | OutPages chan string
14 | UseTemplates bool
15 | }
16 |
17 | func NewMWXMLCreator(useTemplates bool) *MWXMLCreator {
18 | return &MWXMLCreator{
19 | InWikiPage: make(chan *WikiPage, BUFSIZE),
20 | OutTemplates: make(chan string, BUFSIZE),
21 | OutProperties: make(chan string, BUFSIZE),
22 | OutPages: make(chan string, BUFSIZE),
23 | UseTemplates: useTemplates,
24 | }
25 | }
26 |
27 | const wikiXmlTpl = `
28 |
29 | %s
30 | %d
31 |
32 | %s
33 |
34 | 127.0.0.1
35 |
36 | Page created by RDF2SMW commandline tool
37 | wikitext
38 | text/x-wiki
39 |
40 | %s
41 |
42 |
43 | `
44 |
45 | var pageTypeToMWNamespace = map[int]int{
46 | URITypeClass: 14,
47 | URITypeTemplate: 10,
48 | URITypePredicate: 102,
49 | URITypeUndefined: 0,
50 | }
51 |
52 | func (p *MWXMLCreator) Run() {
53 | tplPropertyIdx := make(map[string]map[string]int)
54 |
55 | defer close(p.OutTemplates)
56 | defer close(p.OutProperties)
57 | defer close(p.OutPages)
58 |
59 | p.OutPages <- "\n"
60 | p.OutProperties <- "\n"
61 |
62 | for page := range p.InWikiPage {
63 |
64 | wikiText := ""
65 |
66 | if p.UseTemplates && len(page.Categories) > 0 { // We need at least one category, as to name the (to-be) template
67 |
68 | var templateName string
69 | if page.SpecificCategory.Name != "" {
70 | templateName = page.SpecificCategory.Name
71 | } else {
72 | // Pick last item (biggest chance to be pretty specific?)
73 | templateName = page.Categories[len(page.Categories)-1].Name
74 | //println("Page ", page.Title, " | Didn't have a specific catogory, so selected ", templateName)
75 | }
76 | templateTitle := "Template:" + templateName
77 |
78 | // Make sure template page exists
79 | if tplPropertyIdx[templateTitle] == nil {
80 | tplPropertyIdx[templateTitle] = make(map[string]int)
81 | }
82 |
83 | wikiText += "{{" + templateName + "\n" // TODO: What to do when we have multipel categories?
84 |
85 | // Add facts as parameters to the template call
86 | var lastProperty string
87 | for _, fact := range page.Facts {
88 | // Write facts to template call on current page
89 |
90 | val := escapeWikiChars(fact.Value)
91 | if fact.Property == lastProperty {
92 | wikiText += "," + val + "\n"
93 | } else {
94 | wikiText += "|" + spacesToUnderscores(fact.Property) + "=" + val + "\n"
95 | }
96 |
97 | lastProperty = fact.Property
98 |
99 | // Add fact to the relevant template page
100 | tplPropertyIdx[templateTitle][fact.Property] = 1
101 | }
102 |
103 | // Add categories as multi-valued call to the "categories" value of the template
104 | wikiText += "|Categories="
105 | for i, cat := range page.Categories {
106 | if i == 0 {
107 | wikiText += cat.Name
108 | } else {
109 | wikiText += "," + cat.Name
110 | }
111 | }
112 |
113 | wikiText += "\n}}"
114 | } else {
115 |
116 | // Add fact statements
117 | for _, fact := range page.Facts {
118 | wikiText += fact.asWikiFact()
119 | }
120 |
121 | // Add category statements
122 | for _, cat := range page.Categories {
123 | wikiText += cat.asWikiString()
124 | }
125 |
126 | }
127 |
128 | xmlData := fmt.Sprintf(wikiXmlTpl, page.Title, pageTypeToMWNamespace[page.Type], time.Now().Format("2006-01-02T15:04:05Z"), wikiText)
129 |
130 | // Print out the generated XML one line at a time
131 | if page.Type == URITypePredicate {
132 | p.OutProperties <- xmlData
133 | } else {
134 | p.OutPages <- xmlData
135 | }
136 | }
137 | p.OutPages <- "\n"
138 | p.OutProperties <- "\n"
139 |
140 | p.OutTemplates <- "\n"
141 | // Create template pages
142 | for tplName, tplProperties := range tplPropertyIdx {
143 | tplText := `{|class="wikitable smwtable"
144 | !colspan="2"| ` + str.Replace(tplName, "Template:", "", -1) + `: {{PAGENAMEE}}
145 | `
146 | for property := range tplProperties {
147 | argName := spacesToUnderscores(property)
148 | tplText += fmt.Sprintf("|-\n!%s\n|{{#arraymap:{{{%s|}}}|,|x|[[%s::x]]|,}}\n", property, argName, property)
149 | }
150 | tplText += "|}\n\n"
151 | // Add categories
152 | tplText += "{{#arraymap:{{{Categories}}}|,|x|[[Category:x]]|}}\n"
153 |
154 | xmlData := fmt.Sprintf(wikiXmlTpl, tplName, pageTypeToMWNamespace[URITypeTemplate], time.Now().Format("2006-01-02T15:04:05Z"), tplText)
155 | p.OutTemplates <- xmlData
156 | }
157 | p.OutTemplates <- "\n"
158 | }
159 |
160 | func spacesToUnderscores(inStr string) string {
161 | return str.Replace(inStr, " ", "_", -1)
162 | }
163 |
164 | // TODO: Probably move out to separate component!
165 | func escapeWikiChars(inStr string) string {
166 | outStr := str.Replace(inStr, "[", "(", -1)
167 | outStr = str.Replace(outStr, "]", ")", -1)
168 | outStr = str.Replace(outStr, "|", ",", -1)
169 | outStr = str.Replace(outStr, "=", "-", -1)
170 | outStr = str.Replace(outStr, "<", "<", -1)
171 | outStr = str.Replace(outStr, ">", ">", -1)
172 | return outStr
173 | }
174 |
--------------------------------------------------------------------------------
/components/mwxmlcreator_test.go:
--------------------------------------------------------------------------------
1 | package components
2 |
3 | import (
4 | "github.com/flowbase/flowbase"
5 | "testing"
6 | )
7 |
8 | // TestNewMWXMLCreator tests NewMWXMLCreator
9 | func TestNewMWXMLCreator(t *testing.T) {
10 | flowbase.InitLogDebug()
11 |
12 | mxc := NewMWXMLCreator(true)
13 |
14 | if mxc.InWikiPage == nil {
15 | t.Error("InWikiPage is not initialized")
16 | }
17 | if mxc.OutTemplates == nil {
18 | t.Error("OutTemplates is not initialized")
19 | }
20 | if mxc.OutProperties == nil {
21 | t.Error("OutProperties is not initialized")
22 | }
23 | if mxc.OutPages == nil {
24 | t.Error("OutPages is not initialized")
25 | }
26 | if mxc.UseTemplates != true {
27 | t.Error("UseTemplates field is initialized wrongly")
28 | }
29 | }
30 |
--------------------------------------------------------------------------------
/components/residxcreator.go:
--------------------------------------------------------------------------------
1 | package components
2 |
3 | type ResourceIndexCreator struct {
4 | In chan *TripleAggregate
5 | Out chan *map[string]*TripleAggregate
6 | }
7 |
8 | func NewResourceIndexCreator() *ResourceIndexCreator {
9 | return &ResourceIndexCreator{
10 | In: make(chan *TripleAggregate, BUFSIZE),
11 | Out: make(chan *map[string]*TripleAggregate),
12 | }
13 | }
14 |
15 | func (p *ResourceIndexCreator) Run() {
16 | defer close(p.Out)
17 |
18 | idx := make(map[string]*TripleAggregate)
19 | for aggr := range p.In {
20 | idx[aggr.SubjectStr] = aggr
21 | }
22 |
23 | p.Out <- &idx
24 | }
25 |
--------------------------------------------------------------------------------
/components/residxcreator_test.go:
--------------------------------------------------------------------------------
1 | package components
2 |
3 | import (
4 | "fmt"
5 | "github.com/flowbase/flowbase"
6 | "github.com/knakk/rdf"
7 | "testing"
8 | )
9 |
10 | // TestNewResouceIndexCreator tests NewResourceIndexCreator
11 | func TestNewResourceIndexCreator(t *testing.T) {
12 | flowbase.InitLogWarning()
13 |
14 | ric := NewResourceIndexCreator()
15 |
16 | if ric.In == nil {
17 | t.Error("In-port not initialized")
18 | }
19 | if ric.Out == nil {
20 | t.Error("Out-port not initialized")
21 | }
22 | }
23 |
24 | func TestResourceIndexCreator(t *testing.T) {
25 | flowbase.InitLogWarning()
26 |
27 | ric := NewResourceIndexCreator()
28 |
29 | var triples = []rdf.Triple{}
30 |
31 | go func() {
32 | defer close(ric.In)
33 |
34 | for i := 1; i <= 2; i++ {
35 |
36 | triples = []rdf.Triple{}
37 | s, serr := rdf.NewIRI(fmt.Sprintf("http://example.org/s%d", i))
38 | if serr != nil {
39 | t.Error("Could not create Subject IRI")
40 | }
41 | for j := 1; j <= 3; j++ {
42 | p, perr := rdf.NewIRI(fmt.Sprintf("http://example.org/p%d", j))
43 | if perr != nil {
44 | t.Error("Could not create Predicate IRI")
45 | }
46 | o, oerr := rdf.NewLiteral(fmt.Sprintf("o%d", j))
47 | if oerr != nil {
48 | t.Error("Could not create Object Literal")
49 | }
50 | tr := rdf.Triple{
51 | Subj: s,
52 | Pred: p,
53 | Obj: o,
54 | }
55 | triples = append(triples, tr)
56 | }
57 |
58 | aggr := NewTripleAggregate(s, triples)
59 | ric.In <- aggr
60 | }
61 | }()
62 |
63 | go ric.Run()
64 |
65 | resIdx := <-ric.Out
66 |
67 | if (*resIdx)["http://example.org/s1"] == nil {
68 | t.Error("Resource index does not contain first subject")
69 | }
70 |
71 | if (*resIdx)["http://example.org/s1"].Subject.String() != "http://example.org/s1" {
72 | t.Error("Subject string in first subject is wrong")
73 | }
74 |
75 | if len((*resIdx)["http://example.org/s1"].Triples) != 3 {
76 | t.Error("Wrong number of triples for first subject")
77 | }
78 |
79 | if (*resIdx)["http://example.org/s2"] == nil {
80 | t.Error("Resource index does not contain second subject")
81 | }
82 |
83 | if (*resIdx)["http://example.org/s2"].Subject.String() != "http://example.org/s2" {
84 | t.Error("Subject string in second subject is wrong")
85 | }
86 |
87 | if len((*resIdx)["http://example.org/s2"].Triples) != 3 {
88 | t.Error("Wrong number of triples for second subject")
89 | }
90 | }
91 |
--------------------------------------------------------------------------------
/components/residxfanout.go:
--------------------------------------------------------------------------------
1 | package components
2 |
3 | type ResourceIndexFanOut struct {
4 | In chan *map[string]*TripleAggregate
5 | Out map[string]chan *map[string]*TripleAggregate
6 | }
7 |
8 | func NewResourceIndexFanOut() *ResourceIndexFanOut {
9 | return &ResourceIndexFanOut{
10 | In: make(chan *map[string]*TripleAggregate),
11 | Out: make(map[string]chan *map[string]*TripleAggregate),
12 | }
13 | }
14 |
15 | func (p *ResourceIndexFanOut) Run() {
16 | for _, outPort := range p.Out {
17 | defer close(outPort)
18 | }
19 |
20 | for idx := range p.In {
21 | for _, outPort := range p.Out {
22 | outPort <- idx
23 | }
24 | }
25 | }
26 |
--------------------------------------------------------------------------------
/components/residxfanout_test.go:
--------------------------------------------------------------------------------
1 | package components
2 |
3 | import (
4 | "github.com/flowbase/flowbase"
5 | "testing"
6 | )
7 |
8 | // TestNewResourceIndexFanOut tests NewResourceIndexFanOut
9 | func TestNewResourceIndexFanOut(t *testing.T) {
10 | flowbase.InitLogDebug()
11 |
12 | rif := NewResourceIndexFanOut()
13 |
14 | if rif.In == nil {
15 | t.Error("In-port not initialized with channel")
16 | }
17 | if rif.Out == nil {
18 | t.Error("Out-port not initialized with map of channels")
19 | }
20 | }
21 |
22 | func TestResourceIndexFanOut(t *testing.T) {
23 | flowbase.InitLogDebug()
24 |
25 | rif := NewResourceIndexFanOut()
26 | rif.Out["out1"] = make(chan *map[string]*TripleAggregate)
27 | rif.Out["out2"] = make(chan *map[string]*TripleAggregate)
28 |
29 | resIdxInner := make(map[string]*TripleAggregate)
30 | resIdx := &resIdxInner
31 |
32 | go func() {
33 | defer close(rif.In)
34 | rif.In <- resIdx
35 | }()
36 | go rif.Run()
37 |
38 | resIdx1 := <-rif.Out["out1"]
39 | if resIdx1 == nil {
40 | t.Error("Got nil as output from out1")
41 | }
42 | resIdx2 := <-rif.Out["out2"]
43 | if resIdx2 == nil {
44 | t.Error("Got nil as output from out2")
45 | }
46 | }
47 |
--------------------------------------------------------------------------------
/components/residxtoresaggrconv.go:
--------------------------------------------------------------------------------
1 | package components
2 |
3 | type ResourceIndexToTripleAggregates struct {
4 | In chan *map[string]*TripleAggregate
5 | Out chan *TripleAggregate
6 | }
7 |
8 | func NewResourceIndexToTripleAggregates() *ResourceIndexToTripleAggregates {
9 | return &ResourceIndexToTripleAggregates{
10 | In: make(chan *map[string]*TripleAggregate, BUFSIZE),
11 | Out: make(chan *TripleAggregate, BUFSIZE),
12 | }
13 | }
14 |
15 | func (p *ResourceIndexToTripleAggregates) Run() {
16 | defer close(p.Out)
17 |
18 | for idx := range p.In {
19 | for _, aggr := range *idx {
20 | p.Out <- aggr
21 | }
22 | }
23 | }
24 |
--------------------------------------------------------------------------------
/components/residxtoresaggrconv_test.go:
--------------------------------------------------------------------------------
1 | package components
2 |
3 | import (
4 | "github.com/flowbase/flowbase"
5 | "github.com/knakk/rdf"
6 | "testing"
7 | )
8 |
9 | // TestNewResourceIndexToTripleAggregates tests NewResourceIndexToTripleAggregates
10 | func TestNewResourceIndexToTripleAggregates(t *testing.T) {
11 | flowbase.InitLogDebug()
12 |
13 | rita := NewResourceIndexToTripleAggregates()
14 |
15 | if rita.In == nil {
16 | t.Error("In-port not initialized with map of channels")
17 | }
18 | if rita.Out == nil {
19 | t.Error("Out-port not initialized with channel")
20 | }
21 | }
22 |
23 | // TestResourceIndexToTripleAggregates tests ResourceIndexToTripleAggregates
24 | func TestResourceIndexToTripleAggregates(t *testing.T) {
25 | flowbase.InitLogDebug()
26 | rita := NewResourceIndexToTripleAggregates()
27 |
28 | resIdxInner := make(map[string]*TripleAggregate)
29 | s, err := rdf.NewIRI("http://example.org/s")
30 | if err != nil {
31 | t.Error("Could not create subject IRI")
32 | }
33 | resIdxInner["aggr1"] = NewTripleAggregate(s, nil)
34 | resIdx := &resIdxInner
35 |
36 | go func() {
37 | defer close(rita.In)
38 | rita.In <- resIdx
39 | }()
40 | go rita.Run()
41 | aggr := <-rita.Out
42 | if aggr == nil {
43 | t.Error("Output from ResourceIndexToTripleAggregates was nil")
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/components/smwtplformatter.go:
--------------------------------------------------------------------------------
1 | package components
2 |
3 | import "fmt"
4 |
5 | type SMWTemplateCallFormatter struct {
6 | InWikiPage chan *WikiPage
7 | OutWikiPageXML chan string
8 | }
9 |
10 | func NewSMWTemplateCallFormatter() *SMWTemplateCallFormatter {
11 | return &SMWTemplateCallFormatter{
12 | InWikiPage: make(chan *WikiPage, BUFSIZE),
13 | OutWikiPageXML: make(chan string, BUFSIZE),
14 | }
15 | }
16 |
17 | func (p *SMWTemplateCallFormatter) Run() {
18 | fmt.Println("Running SMWTemplateCallFormatter ...")
19 | }
20 |
--------------------------------------------------------------------------------
/components/strfilewriter.go:
--------------------------------------------------------------------------------
1 | package components
2 |
3 | import (
4 | "os"
5 |
6 | "github.com/flowbase/flowbase"
7 | )
8 |
9 | type StringFileWriter struct {
10 | In chan string
11 | OutDone chan interface{}
12 | fileName string
13 | }
14 |
15 | func NewStringFileWriter(fileName string) *StringFileWriter {
16 | return &StringFileWriter{
17 | In: make(chan string, BUFSIZE),
18 | OutDone: make(chan interface{}, BUFSIZE),
19 | fileName: fileName,
20 | }
21 | }
22 |
23 | func (p *StringFileWriter) Run() {
24 | defer close(p.OutDone)
25 |
26 | fh, err := os.Create(p.fileName)
27 | if err != nil {
28 | panic("Could not create output file: " + err.Error())
29 | }
30 | defer fh.Close()
31 | for s := range p.In {
32 | fh.WriteString(s)
33 | }
34 |
35 | flowbase.Debug.Printf("Sending done signal on chan %v now in StringFileWriter ...\n", p.OutDone)
36 | p.OutDone <- &DoneSignal{}
37 | }
38 |
39 | type DoneSignal struct{}
40 |
--------------------------------------------------------------------------------
/components/stringprinter.go:
--------------------------------------------------------------------------------
1 | package components
2 |
3 | import "fmt"
4 |
5 | type StringPrinter struct {
6 | In chan string
7 | }
8 |
9 | func NewStringPrinter() *StringPrinter {
10 | return &StringPrinter{
11 | In: make(chan string, BUFSIZE),
12 | }
13 | }
14 |
15 | func (p *StringPrinter) Run() {
16 | for s := range p.In {
17 | fmt.Print(s)
18 | }
19 | }
20 |
--------------------------------------------------------------------------------
/components/tripleaggregator.go:
--------------------------------------------------------------------------------
1 | package components
2 |
3 | import "github.com/knakk/rdf"
4 |
5 | // TripleAggregator aggregates triples by subject into a TripleAggregate object
6 | // per subject, containing all the triples for that subject.
7 | type TripleAggregator struct {
8 | In chan rdf.Triple
9 | Out chan *TripleAggregate
10 | }
11 |
12 | // NewTripleAggregator returns an initialized TripleAggregator process.
13 | func NewTripleAggregator() *TripleAggregator {
14 | return &TripleAggregator{
15 | In: make(chan rdf.Triple, BUFSIZE),
16 | Out: make(chan *TripleAggregate, BUFSIZE),
17 | }
18 | }
19 |
20 | // Run runs the TripleAggregator process.
21 | func (p *TripleAggregator) Run() {
22 | defer close(p.Out)
23 | resourceIndex := make(map[rdf.Subject][]rdf.Triple)
24 | for triple := range p.In {
25 | resourceIndex[triple.Subj] = append(resourceIndex[triple.Subj], triple)
26 | }
27 | for subj, triples := range resourceIndex {
28 | tripleAggregate := NewTripleAggregate(subj, triples)
29 | p.Out <- tripleAggregate
30 | }
31 | }
32 |
--------------------------------------------------------------------------------
/components/tripleaggregator_test.go:
--------------------------------------------------------------------------------
1 | package components
2 |
3 | import (
4 | "fmt"
5 | "github.com/flowbase/flowbase"
6 | "github.com/knakk/rdf"
7 | "strings"
8 | "testing"
9 | )
10 |
11 | // TestNewNewAggregateTriplesPerSubject tests NewAggregateTriplesPerSubject
12 | func TestNewTripleAggregator(t *testing.T) {
13 | flowbase.InitLogWarning()
14 |
15 | aggr := NewTripleAggregator()
16 |
17 | if aggr.In == nil {
18 | t.Error("In-port not initialized")
19 | }
20 | if aggr.Out == nil {
21 | t.Error("Out-port not initialized")
22 | }
23 | }
24 |
25 | func TestTripleAggregator(t *testing.T) {
26 | flowbase.InitLogWarning()
27 |
28 | tripleAggregatorTestIndata := `
29 | "o1" .
30 | "o2" .
31 | "o3" .
32 | "o4" .
33 | "o5" .
34 | "o6" .
35 | `
36 |
37 | strReader := strings.NewReader(tripleAggregatorTestIndata)
38 | dec := rdf.NewTripleDecoder(strReader, rdf.NTriples)
39 | triples, err := dec.DecodeAll()
40 | if err != nil {
41 | t.Error("Could not decode n-triples test data")
42 | }
43 |
44 | aggregator := NewTripleAggregator()
45 | go func() {
46 | defer close(aggregator.In)
47 | for _, tr := range triples {
48 | aggregator.In <- tr
49 | }
50 | }()
51 | go aggregator.Run()
52 |
53 | aggr1 := <-aggregator.Out
54 | aggr2 := <-aggregator.Out
55 |
56 | if aggr1.Subject.String() == "http://example.org/s2" {
57 | // Swap order of variables
58 | aggr1, aggr2 = aggr2, aggr1
59 | }
60 |
61 | for i, tr := range aggr1.Triples {
62 | j := i + 1
63 |
64 | // subject, predicate, object
65 | s := tr.Subj.String()
66 | p := tr.Pred.String()
67 | o := tr.Obj.String()
68 |
69 | // expected ditto
70 | es := "http://example.org/s1"
71 | ep := fmt.Sprintf("http://example.org/p%d", j)
72 | eo := fmt.Sprintf("o%d", j)
73 |
74 | if s != es {
75 | t.Errorf("Subject in triple %d of first aggregate is wrong (Expected %s, got %s)", j, es, s)
76 | }
77 | if p != ep {
78 | t.Errorf("Subject in triple %d of first aggregate is wrong (Expected %s, got %s)", j, ep, p)
79 | }
80 | if o != eo {
81 | t.Errorf("Subject in triple %d of first aggregate is wrong (Expected %s, got %s)", j, eo, o)
82 | }
83 | }
84 |
85 | if aggr2.Subject.String() != "http://example.org/s2" {
86 | t.Error("Subject of second aggregate is wrong")
87 | }
88 | for i, tr := range aggr2.Triples {
89 | j := i + 4
90 |
91 | // subject, predicate, object
92 | s := tr.Subj.String()
93 | p := tr.Pred.String()
94 | o := tr.Obj.String()
95 |
96 | // expected ditto
97 | es := "http://example.org/s1"
98 | ep := fmt.Sprintf("http://example.org/p%d", j)
99 | eo := fmt.Sprintf("o%d", j)
100 |
101 | if tr.Subj.String() != "http://example.org/s2" {
102 | t.Errorf("Subject in triple %d of second aggregate is wrong (Expected %s, got %s)", j, es, s)
103 | }
104 | if tr.Pred.String() != fmt.Sprintf("http://example.org/p%d", j) {
105 | t.Errorf("Subject in triple %d of second aggregate is wrong (Expected %s, got %s)", j, ep, p)
106 | }
107 | if tr.Obj.String() != fmt.Sprintf("o%d", j) {
108 | t.Errorf("Subject in triple %d of second aggregate is wrong (Expected %s, got %s)", j, eo, o)
109 | }
110 | }
111 |
112 | }
113 |
--------------------------------------------------------------------------------
/components/tripleaggrfanout.go:
--------------------------------------------------------------------------------
1 | package components
2 |
3 | type FanOutTripleAggregate struct {
4 | In chan *TripleAggregate
5 | Out map[string](chan *TripleAggregate)
6 | }
7 |
8 | // NewFanOut creates a new FanOut process
9 | func NewFanOutTripleAggregate() *FanOutTripleAggregate {
10 | return &FanOutTripleAggregate{
11 | In: make(chan *TripleAggregate, BUFSIZE),
12 | Out: make(map[string](chan *TripleAggregate)),
13 | }
14 | }
15 |
16 | // Run runs the FanOut process
17 | func (proc *FanOutTripleAggregate) Run() {
18 | for _, outPort := range proc.Out {
19 | defer close(outPort)
20 | }
21 |
22 | for ft := range proc.In {
23 | for _, outPort := range proc.Out {
24 | outPort <- ft
25 | }
26 | }
27 | }
28 |
--------------------------------------------------------------------------------
/components/tripleaggrprinter.go:
--------------------------------------------------------------------------------
1 | package components
2 |
3 | import "fmt"
4 |
5 | type TripleAggregatePrinter struct {
6 | In chan *TripleAggregate
7 | }
8 |
9 | func NewTripleAggregatePrinter() *TripleAggregatePrinter {
10 | return &TripleAggregatePrinter{
11 | In: make(chan *TripleAggregate, BUFSIZE),
12 | }
13 | }
14 |
15 | func (p *TripleAggregatePrinter) Run() {
16 | for trAggr := range p.In {
17 | fmt.Printf("Subject: %s\nTriples:\n", trAggr.Subject)
18 | for _, tr := range trAggr.Triples {
19 | fmt.Printf("\t<%s> <%s> <%s>\n", tr.Subj.String(), tr.Pred.String(), tr.Obj.String())
20 | }
21 | fmt.Println()
22 | }
23 | }
24 |
--------------------------------------------------------------------------------
/components/tripleparser.go:
--------------------------------------------------------------------------------
1 | package components
2 |
3 | import (
4 | "io"
5 | "log"
6 | str "strings"
7 |
8 | "github.com/knakk/rdf"
9 | )
10 |
11 | type TripleParser struct {
12 | In chan string
13 | Out chan rdf.Triple
14 | }
15 |
16 | func NewTripleParser() *TripleParser {
17 | return &TripleParser{
18 | In: make(chan string, BUFSIZE),
19 | Out: make(chan rdf.Triple, BUFSIZE),
20 | }
21 | }
22 |
23 | func (p *TripleParser) Run() {
24 | defer close(p.Out)
25 | for line := range p.In {
26 | lineReader := str.NewReader(line)
27 | dec := rdf.NewTripleDecoder(lineReader, rdf.Turtle)
28 | for triple, err := dec.Decode(); err != io.EOF; triple, err = dec.Decode() {
29 | if err != nil {
30 | log.Fatal("Could not encode to triple: ", err.Error())
31 | } else if triple.Subj != nil && triple.Pred != nil && triple.Obj != nil {
32 | p.Out <- triple
33 | } else {
34 | log.Fatal("Something was encoded as nil in the triple:", triple)
35 | }
36 | }
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/components/tripleprinter.go:
--------------------------------------------------------------------------------
1 | package components
2 |
3 | import (
4 | "fmt"
5 |
6 | "github.com/knakk/rdf"
7 | )
8 |
9 | type TriplePrinter struct {
10 | In chan rdf.Triple
11 | }
12 |
13 | func NewTriplePrinter() *TriplePrinter {
14 | return &TriplePrinter{
15 | In: make(chan rdf.Triple, BUFSIZE),
16 | }
17 | }
18 |
19 | func (p *TriplePrinter) Run() {
20 | for tr := range p.In {
21 | fmt.Printf("S: %s\nP: %s\nO: %s\n\n", tr.Subj.String(), tr.Pred.String(), tr.Obj.String())
22 | }
23 | }
24 |
--------------------------------------------------------------------------------
/components/triplestowikipageconv.go:
--------------------------------------------------------------------------------
1 | package components
2 |
3 | import (
4 | "regexp"
5 | str "strings"
6 |
7 | "github.com/knakk/rdf"
8 | )
9 |
10 | // Constants etc ---------------------------------------------------------------
11 |
12 | var titleProperties = []string{
13 | "http://semantic-mediawiki.org/swivt/1.0#page",
14 | "http://www.w3.org/2000/01/rdf-schema#label",
15 | "http://purl.org/dc/elements/1.1/title",
16 | "http://purl.org/dc/terms/title",
17 | "http://www.w3.org/2004/02/skos/core#preferredLabel",
18 | "http://xmlns.com/foaf/0.1/name",
19 | }
20 |
21 | var namespaceAbbreviations = map[string]string{
22 | "http://www.opentox.org/api/1.1#": "opentox",
23 | }
24 |
25 | var propertyTypes = []string{
26 | "http://www.w3.org/2002/07/owl#AnnotationProperty",
27 | "http://www.w3.org/2002/07/owl#DatatypeProperty",
28 | "http://www.w3.org/2002/07/owl#ObjectProperty",
29 | }
30 |
31 | var categoryTypes = []string{
32 | "http://www.w3.org/2002/07/owl#Class",
33 | }
34 |
35 | const (
36 | typePropertyURI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
37 | subClassPropertyURI = "http://www.w3.org/2000/01/rdf-schema#subClassOf"
38 | )
39 |
40 | const (
41 | dataTypeURIString = "http://www.w3.org/2001/XMLSchema#string"
42 | dataTypeURILangString = "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString"
43 | dataTypeURIInteger = "http://www.w3.org/2001/XMLSchema#integer"
44 | dataTypeURIFloat = "http://www.w3.org/2001/XMLSchema#float"
45 | )
46 |
47 | const (
48 | _ = iota
49 | URITypeUndefined
50 | URITypePredicate
51 | URITypeClass
52 | URITypeTemplate
53 | )
54 |
55 | // Code -----------------------------------------------------------------------
56 |
57 | // TripleAggregateToWikiPageConverter takes *TripleAggregate's and converts
58 | // them into a *WikiPage which can be used to generate wiki text content.
59 | type TripleAggregateToWikiPageConverter struct {
60 | InAggregate chan *TripleAggregate
61 | InIndex chan *map[string]*TripleAggregate
62 | OutPage chan *WikiPage
63 | cleanUpRegexes []*regexp.Regexp
64 | }
65 |
66 | func NewTripleAggregateToWikiPageConverter() *TripleAggregateToWikiPageConverter {
67 | return &TripleAggregateToWikiPageConverter{
68 | InAggregate: make(chan *TripleAggregate, BUFSIZE),
69 | InIndex: make(chan *map[string]*TripleAggregate, BUFSIZE),
70 | OutPage: make(chan *WikiPage, BUFSIZE),
71 | cleanUpRegexes: []*regexp.Regexp{
72 | regexp.MustCompile(" [(][^)]*:[^)]*[)]"),
73 | regexp.MustCompile(" [[][^]]*:[^]]*[]]"),
74 | },
75 | }
76 | }
77 |
78 | func (p *TripleAggregateToWikiPageConverter) Run() {
79 | defer close(p.OutPage)
80 |
81 | predPageIndex := make(map[string]*WikiPage)
82 |
83 | resourceIndex := <-p.InIndex
84 |
85 | for aggr := range p.InAggregate {
86 | pageType := p.determineType(aggr)
87 |
88 | pageTitle, _ := p.convertUriToWikiTitle(aggr.SubjectStr, pageType, resourceIndex)
89 |
90 | page := NewWikiPage(pageTitle, []*Fact{}, []*Category{}, nil, pageType)
91 |
92 | topSuperCatsCnt := 0
93 | for _, tr := range aggr.Triples {
94 |
95 | predTitle, propertyStr := p.convertUriToWikiTitle(tr.Pred.String(), URITypePredicate, resourceIndex) // Here we know it is a predicate, simply because its location in a triple
96 |
97 | // Make sure property page exists
98 | if predPageIndex[predTitle] == nil {
99 | predPageIndex[predTitle] = NewWikiPage(predTitle, []*Fact{}, []*Category{}, nil, URITypePredicate)
100 | }
101 |
102 | var valueStr string
103 |
104 | if tr.Obj.Type() == rdf.TermIRI {
105 |
106 | valueAggr := (*resourceIndex)[tr.Obj.String()]
107 | valueUriType := p.determineType(valueAggr)
108 | _, valueStr = p.convertUriToWikiTitle(tr.Obj.String(), valueUriType, resourceIndex)
109 |
110 | predPageIndex[predTitle].AddFactUnique(NewFact("Has type", "Page"))
111 |
112 | } else if tr.Obj.Type() == rdf.TermLiteral {
113 |
114 | valueStr = tr.Obj.String()
115 |
116 | for _, r := range p.cleanUpRegexes {
117 | valueStr = r.ReplaceAllString(valueStr, "")
118 | }
119 |
120 | dataTypeStr := tr.Obj.(rdf.Literal).DataType.String()
121 |
122 | // Add type info on the current property's page
123 | switch dataTypeStr {
124 | case dataTypeURIString:
125 | predPageIndex[predTitle].AddFactUnique(NewFact("Has type", "Text"))
126 | case dataTypeURILangString:
127 | predPageIndex[predTitle].AddFactUnique(NewFact("Has type", "Text"))
128 | case dataTypeURIInteger:
129 | predPageIndex[predTitle].AddFactUnique(NewFact("Has type", "Number"))
130 | case dataTypeURIFloat:
131 | predPageIndex[predTitle].AddFactUnique(NewFact("Has type", "Number"))
132 | }
133 | }
134 |
135 | if tr.Pred.String() == typePropertyURI || tr.Pred.String() == subClassPropertyURI {
136 | page.AddCategoryUnique(NewCategory(valueStr))
137 | superCatsCnt := p.countSuperCategories(tr, resourceIndex)
138 | if superCatsCnt > topSuperCatsCnt {
139 | topSuperCatsCnt = superCatsCnt
140 | page.SpecificCategory = NewCategory(valueStr)
141 | //println("Page:", page.Title, " | Adding cat", valueStr, "since has", superCatsCnt, "super categories.")
142 | }
143 | } else {
144 | page.AddFactUnique(NewFact(propertyStr, valueStr))
145 | }
146 | }
147 |
148 | // Add Equivalent URI fact
149 | equivURIFact := NewFact("Equivalent URI", aggr.Subject.String())
150 | page.AddFactUnique(equivURIFact)
151 |
152 | // Don't send predicates just yet (we want to gather facts about them,
153 | // and send at the end) ...
154 | if pageType == URITypePredicate {
155 | if predPageIndex[page.Title] != nil {
156 | // Add facts and categories to existing page
157 | for _, fact := range page.Facts {
158 | predPageIndex[page.Title].AddFactUnique(fact)
159 | }
160 | for _, cat := range page.Categories {
161 | predPageIndex[page.Title].AddCategoryUnique(cat)
162 | }
163 | } else {
164 | // If page does not exist, use the newly created one
165 | predPageIndex[page.Title] = page
166 | }
167 | } else {
168 | p.OutPage <- page
169 | }
170 | }
171 |
172 | for _, predPage := range predPageIndex {
173 | p.OutPage <- predPage
174 | }
175 | }
176 |
177 | func (p *TripleAggregateToWikiPageConverter) determineType(uriAggr *TripleAggregate) int {
178 | if uriAggr != nil {
179 | if uriAggr.Triples != nil {
180 | for _, tr := range uriAggr.Triples {
181 | for _, propType := range propertyTypes {
182 | if tr.Pred.String() == typePropertyURI && tr.Obj.String() == propType {
183 | return URITypePredicate
184 | }
185 | }
186 | for _, catType := range categoryTypes {
187 | if tr.Pred.String() == typePropertyURI && tr.Obj.String() == catType {
188 | return URITypeClass
189 | }
190 | }
191 | }
192 | }
193 | }
194 | return URITypeUndefined
195 | }
196 |
197 | // For properties, the factTitle and pageTitle will be different (The page
198 | // title including the "Property:" prefix), while for normal pages, they will
199 | // be the same.
200 | func (p *TripleAggregateToWikiPageConverter) convertUriToWikiTitle(uri string, uriType int, resourceIndex *map[string]*TripleAggregate) (pageTitle string, factTitle string) {
201 |
202 | aggr := (*resourceIndex)[uri]
203 |
204 | // Conversion strategies:
205 | // 1. Existing wiki title (in wiki, or cache)
206 | // 2. Use configured title-deciding properties
207 | if aggr != nil {
208 | factTitle = p.findTitleInTriples(aggr.Triples)
209 | }
210 |
211 | // 3. Shorten URI namespace to alias (e.g. http://purl.org/dc -> dc:)
212 | // (Does this apply for properties only?)
213 |
214 | // 4. Remove namespace, keep only local part of URL (Split on '/' or '#')
215 | if factTitle == "" {
216 | bits := str.Split(uri, "#")
217 | lastBit := bits[len(bits)-1]
218 | bits = str.Split(lastBit, "/")
219 | lastBit = bits[len(bits)-1]
220 | factTitle = lastBit
221 | }
222 |
223 | // Clean up strange characters
224 | factTitle = str.Replace(factTitle, "[", "(", -1)
225 | factTitle = str.Replace(factTitle, "]", ")", -1)
226 | factTitle = str.Replace(factTitle, "{", "(", -1)
227 | factTitle = str.Replace(factTitle, "}", ")", -1)
228 | factTitle = str.Replace(factTitle, "|", " ", -1)
229 | factTitle = str.Replace(factTitle, "#", " ", -1)
230 | factTitle = str.Replace(factTitle, "<", "less than", -1)
231 | factTitle = str.Replace(factTitle, ">", "greater than", -1)
232 | factTitle = str.Replace(factTitle, "?", " ", -1)
233 | factTitle = str.Replace(factTitle, "&", " ", -1)
234 | factTitle = str.Replace(factTitle, ",", " ", -1) // Can't allow comma's as we use it as a separator in template variables
235 | factTitle = str.Replace(factTitle, ".", " ", -1)
236 | factTitle = str.Replace(factTitle, "=", "-", -1)
237 |
238 | // Clean up according to regexes
239 | for _, r := range p.cleanUpRegexes {
240 | factTitle = r.ReplaceAllString(factTitle, "")
241 | }
242 |
243 | // Limit to max 255 chars (due to MediaWiki limitation)
244 | titleIsShortened := false
245 | for len(factTitle) >= 250 {
246 | factTitle = removeLastWord(factTitle)
247 | titleIsShortened = true
248 | }
249 |
250 | if titleIsShortened {
251 | factTitle += " ..."
252 | }
253 |
254 | factTitle = p.upperCaseFirst(factTitle)
255 |
256 | if uriType == URITypePredicate {
257 | pageTitle = "Property:" + factTitle
258 | } else if uriType == URITypeClass {
259 | pageTitle = "Category:" + factTitle
260 | } else {
261 | pageTitle = factTitle
262 | }
263 |
264 | return pageTitle, factTitle
265 | }
266 |
267 | func (p *TripleAggregateToWikiPageConverter) findTitleInTriples(triples []rdf.Triple) string {
268 | for _, titleProp := range titleProperties {
269 | for _, tr := range triples {
270 | if tr.Pred.String() == titleProp {
271 | return tr.Obj.String()
272 | }
273 | }
274 | }
275 | return ""
276 | }
277 |
278 | func (p *TripleAggregateToWikiPageConverter) countSuperCategories(tr rdf.Triple, ri *map[string]*TripleAggregate) int {
279 | catPage := (*ri)[tr.Obj.String()]
280 | topSuperCatsCnt := 0
281 | if catPage != nil {
282 | for _, subTr := range catPage.Triples {
283 | if subTr.Pred.String() == typePropertyURI || subTr.Pred.String() == subClassPropertyURI {
284 | superCatsCnt := p.countSuperCategories(subTr, ri) + 1
285 | if superCatsCnt > topSuperCatsCnt {
286 | topSuperCatsCnt = superCatsCnt
287 | }
288 | }
289 | }
290 | }
291 | return topSuperCatsCnt
292 | }
293 |
294 | func (p *TripleAggregateToWikiPageConverter) upperCaseFirst(inStr string) string {
295 | var outStr string
296 | if inStr != "" {
297 | outStr = str.ToUpper(inStr[0:1]) + inStr[1:]
298 | }
299 | return outStr
300 | }
301 |
302 | func removeLastWord(inStr string) string {
303 | bits := str.Split(inStr, " ")
304 | outStr := str.Join(append(bits[:len(bits)-1]), " ")
305 | return outStr
306 | }
307 |
--------------------------------------------------------------------------------
/components/triplestowikipageconv_test.go:
--------------------------------------------------------------------------------
1 | package components
2 |
3 | import (
4 | "github.com/flowbase/flowbase"
5 | "testing"
6 | )
7 |
8 | // TestNewTripleAggregateToWikiPageConverter tests NewTripleAggregateToWikiPageConverter()
9 | func TestNewTripleAggregateToWikiPageConverter(t *testing.T) {
10 | flowbase.InitLogDebug()
11 |
12 | mxc := NewTripleAggregateToWikiPageConverter()
13 |
14 | if mxc.InAggregate == nil {
15 | t.Error("InAggregate is not initialized")
16 | }
17 | if mxc.InIndex == nil {
18 | t.Error("InIndex is not initialized")
19 | }
20 | if mxc.OutPage == nil {
21 | t.Error("OutPage is not initialized")
22 | }
23 | if mxc.cleanUpRegexes == nil {
24 | t.Error("cleanUpRegexes is not initialized")
25 | }
26 | }
27 |
--------------------------------------------------------------------------------
/components/ttlfilereader.go:
--------------------------------------------------------------------------------
1 | package components
2 |
3 | import (
4 | "io"
5 | "log"
6 |
7 | "github.com/flowbase/flowbase"
8 | "github.com/knakk/rdf"
9 | "github.com/spf13/afero"
10 | )
11 |
12 | // TurtleFileReader is a process that reads turtle files (Files in the turtle
13 | // RDF format), based on file names it receives on the FileReader.InFileName
14 | // port / channel, and writes out the output line by line as strings on the
15 | // FileReader.OutLine port / channel.
16 | type TurtleFileReader struct {
17 | InFileName chan string
18 | OutTriple chan rdf.Triple
19 | fs afero.Fs
20 | }
21 |
22 | // NewOsTurtleFileReader returns an initialized TurtleFileReader, with an OS
23 | // (normal) file system
24 | func NewOsTurtleFileReader() *TurtleFileReader {
25 | return NewTurtleFileReader(afero.NewOsFs())
26 | }
27 |
28 | // NewTurtleFileReader returns an initialized TurtleFileReader, initialized
29 | // with the afero file system provided provided as an argument
30 | func NewTurtleFileReader(fileSystem afero.Fs) *TurtleFileReader {
31 | return &TurtleFileReader{
32 | InFileName: make(chan string, BUFSIZE),
33 | OutTriple: make(chan rdf.Triple, BUFSIZE),
34 | fs: fileSystem,
35 | }
36 | }
37 |
38 | // Run runs the TurtleFileReader process. It does not spawn a separate
39 | // go-routine, so you have to prepend the go keyword when calling it, in order
40 | // to have it run in a separate go-routine.
41 | func (p *TurtleFileReader) Run() {
42 | defer close(p.OutTriple)
43 |
44 | flowbase.Debug.Println("Starting loop")
45 | for fileName := range p.InFileName {
46 | flowbase.Debug.Printf("Starting processing file %s\n", fileName)
47 | fh, err := p.fs.Open(fileName)
48 | if err != nil {
49 | log.Fatal(err)
50 | }
51 | defer fh.Close()
52 |
53 | dec := rdf.NewTripleDecoder(fh, rdf.Turtle)
54 | for triple, err := dec.Decode(); err != io.EOF; triple, err = dec.Decode() {
55 | if err != nil {
56 | log.Fatal("Could not encode to triple: ", err.Error())
57 | } else if triple.Subj != nil && triple.Pred != nil && triple.Obj != nil {
58 | p.OutTriple <- triple
59 | } else {
60 | log.Fatal("Something was encoded as nil in the triple:", triple)
61 | }
62 | }
63 | }
64 | }
65 |
--------------------------------------------------------------------------------
/components/ttlfilereader_test.go:
--------------------------------------------------------------------------------
1 | package components
2 |
3 | import (
4 | "github.com/flowbase/flowbase"
5 | "github.com/spf13/afero"
6 | "testing"
7 | )
8 |
9 | // TestNewOSFileReader tests NewOSTurtleFileReader
10 | func TestNewTurtleFileReader(t *testing.T) {
11 | flowbase.InitLogWarning()
12 |
13 | fr := NewOsTurtleFileReader()
14 | if fr.InFileName == nil {
15 | t.Error("In-port InFileName not initialized in New FileReader")
16 | }
17 | if fr.OutTriple == nil {
18 | t.Error("In-port InFileName not initialized in New FileReader")
19 | }
20 |
21 | go func() {
22 | fr.InFileName <- "teststring"
23 | }()
24 | teststr1 := <-fr.InFileName
25 | if teststr1 != "teststring" {
26 | t.Error("In-port InFileName is not a string channel")
27 | fr.InFileName <- "teststring"
28 | }
29 | }
30 |
31 | // Tests the main behavior of the TurtleFileReader process
32 | func TestTurtleFileReader(t *testing.T) {
33 | flowbase.InitLogWarning()
34 |
35 | s1 := "http://example.org/s1"
36 | p1 := "http://example.org/p1"
37 | o1 := "string1"
38 | s2 := "http://example.org/p2"
39 | p2 := "http://example.org/p2"
40 | o2 := "string2"
41 | triple1 := "<" + s1 + "> <" + p1 + "> \"" + o1 + "\" ."
42 | triple2 := "<" + s2 + "> <" + p2 + "> \"" + o2 + "\" ."
43 | testContent := triple1 + "\n" + triple2
44 |
45 | fs := afero.NewMemMapFs()
46 |
47 | testFileName := "testfile.ttl"
48 | f, err := fs.Create(testFileName)
49 | if err != nil {
50 | t.Errorf("Could not create file %s in memory file system", testFileName)
51 | }
52 | f.WriteString(testContent)
53 | f.Close()
54 |
55 | fr := NewTurtleFileReader(fs)
56 | go func() {
57 | defer close(fr.InFileName)
58 | fr.InFileName <- testFileName
59 | }()
60 |
61 | go fr.Run()
62 |
63 | outTriple1 := <-fr.OutTriple
64 | outTriple2 := <-fr.OutTriple
65 |
66 | if outTriple1.Subj.String() != s1 {
67 | t.Error("Subject of first triple is wrong")
68 | }
69 | if outTriple1.Pred.String() != p1 {
70 | t.Error("Predicate of first triple is wrong")
71 | }
72 | if outTriple1.Obj.String() != o1 {
73 | t.Error("Object of first triple is wrong")
74 | }
75 | if outTriple2.Subj.String() != s2 {
76 | t.Error("Subject of second triple is wrong")
77 | }
78 | if outTriple2.Pred.String() != p2 {
79 | t.Error("Predicate of second triple is wrong")
80 | }
81 | if outTriple2.Obj.String() != o2 {
82 | t.Error("Object of second triple is wrong")
83 | }
84 | }
85 |
--------------------------------------------------------------------------------
/components/wikipageprinter.go:
--------------------------------------------------------------------------------
1 | package components
2 |
3 | import (
4 | "fmt"
5 |
6 | "github.com/flowbase/flowbase"
7 | )
8 |
9 | type WikiPagePrinter struct {
10 | In chan *WikiPage
11 | }
12 |
13 | func NewWikiPagePrinter() *WikiPagePrinter {
14 | return &WikiPagePrinter{
15 | In: make(chan *WikiPage, flowbase.BUFSIZE),
16 | }
17 | }
18 |
19 | func (p *WikiPagePrinter) Run() {
20 | for page := range p.In {
21 | fmt.Println("Title:", page.Title)
22 | for _, fact := range page.Facts {
23 | fmt.Print(fact.asWikiFact())
24 | }
25 | for _, cat := range page.Categories {
26 | fmt.Print(cat.asWikiString())
27 | }
28 | fmt.Println("") // Print an empty line
29 | }
30 | }
31 |
--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | module github.com/rdfio/rdf2smw
2 |
3 | go 1.23.0
4 |
5 | toolchain go1.24.0
6 |
7 | require (
8 | github.com/flowbase/flowbase v0.1.0
9 | github.com/knakk/rdf v0.0.0-20190304171630-8521bf4c5042
10 | github.com/spf13/afero v1.14.0
11 | )
12 |
13 | require (
14 | github.com/stretchr/testify v1.10.0 // indirect
15 | golang.org/x/text v0.23.0 // indirect
16 | )
17 |
--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
1 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
2 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
3 | github.com/flowbase/flowbase v0.1.0 h1:/bdKbxZc1P+Kdh+ro03uky+2bqlO3KWkghMDJoVkT/Q=
4 | github.com/flowbase/flowbase v0.1.0/go.mod h1:Yq3H0kx4JWEumGeyYXjEzDiCxyh0RO00DSDXUupOzo4=
5 | github.com/knakk/rdf v0.0.0-20190304171630-8521bf4c5042 h1:Vzdm5hdlLdpJOKK+hKtkV5u7xGZmNW6aUBjGcTfwx84=
6 | github.com/knakk/rdf v0.0.0-20190304171630-8521bf4c5042/go.mod h1:fYE0718xXI13XMYLc6iHtvXudfyCGMsZ9hxSM1Ommpg=
7 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
8 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
9 | github.com/spf13/afero v1.14.0 h1:9tH6MapGnn/j0eb0yIXiLjERO8RB6xIVZRDCX7PtqWA=
10 | github.com/spf13/afero v1.14.0/go.mod h1:acJQ8t0ohCGuMN3O+Pv0V0hgMxNYDlvdk+VTfyZmbYo=
11 | github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
12 | github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
13 | golang.org/x/text v0.23.0 h1:D71I7dUrlY+VX0gQShAThNGHFxZ13dGLBHQLVl1mJlY=
14 | golang.org/x/text v0.23.0/go.mod h1:/BLNzu4aZCJ1+kcD0DNRotWKage4q2rGVAg4o22unh4=
15 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
16 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
17 |
--------------------------------------------------------------------------------
/main.go:
--------------------------------------------------------------------------------
1 | /*
2 | rdf2smw is a commandline tool to convert from RDF data to MediaWiki XML Dump
3 | files, for import using MediaWiki's built in importDump.php script.
4 |
5 | Usage
6 |
7 | ./rdf2smw -in -out
8 |
9 | Flags
10 |
11 | -in Input file in RDF N-triples format
12 | -out Output file in (MediaWiki) XML format
13 |
14 | Example usage
15 |
16 | ./rdf2smw -in mydata.nt -out mydata.xml
17 |
18 | For importing the generated XML Dumps into MediaWiki, see this page:
19 | https://www.mediawiki.org/wiki/Manual:Importing_XML_dumps
20 | */
21 | package main
22 |
23 | import (
24 | "flag"
25 | "fmt"
26 | "github.com/rdfio/rdf2smw/components"
27 | "os"
28 |
29 | str "strings"
30 |
31 | "github.com/flowbase/flowbase"
32 | )
33 |
34 | const (
35 | BUFSIZE = 16
36 | )
37 |
38 | func main() {
39 | //flowbase.InitLogDebug()
40 |
41 | inFileName := flag.String("in", "", "The input file name")
42 | outFileName := flag.String("out", "", "The output file name")
43 | flag.Parse()
44 |
45 | doExit := false
46 | if *inFileName == "" {
47 | fmt.Println("No filename specified to --in")
48 | doExit = true
49 | } else if *outFileName == "" {
50 | fmt.Println("No filename specified to --out")
51 | doExit = true
52 | }
53 |
54 | if doExit {
55 | os.Exit(1)
56 | }
57 |
58 | // ------------------------------------------
59 | // Initialize processes
60 | // ------------------------------------------
61 |
62 | // Create a pipeline runner
63 | net := flowbase.NewNet()
64 |
65 | // Read in-file
66 | ttlFileRead := components.NewOsTurtleFileReader()
67 | net.AddProcess(ttlFileRead)
68 |
69 | // TripleAggregator
70 | aggregator := components.NewTripleAggregator()
71 | net.AddProcess(aggregator)
72 |
73 | // Create an subject-indexed "index" of all triples
74 | indexCreator := components.NewResourceIndexCreator()
75 | net.AddProcess(indexCreator)
76 |
77 | // Fan-out the triple index to the converter and serializer
78 | indexFanOut := components.NewResourceIndexFanOut()
79 | net.AddProcess(indexFanOut)
80 |
81 | // Serialize the index back to individual subject-tripleaggregates
82 | indexToAggr := components.NewResourceIndexToTripleAggregates()
83 | net.AddProcess(indexToAggr)
84 |
85 | // Convert TripleAggregate to WikiPage
86 | triplesToWikiConverter := components.NewTripleAggregateToWikiPageConverter()
87 | net.AddProcess(triplesToWikiConverter)
88 |
89 | //categoryFilterer := components.NewCategoryFilterer([]string{"DataEntry"})
90 | //net.AddProcess(categoryFilterer)
91 |
92 | // Pretty-print wiki page data
93 | //wikiPagePrinter := components.NewWikiPagePrinter()
94 | //net.AddProcess(wikiPagePrinter)
95 |
96 | useTemplates := true
97 | xmlCreator := components.NewMWXMLCreator(useTemplates)
98 | net.AddProcess(xmlCreator)
99 |
100 | //printer := components.NewStringPrinter()
101 | //net.AddProcess(printer)
102 | templateWriter := components.NewStringFileWriter(str.Replace(*outFileName, ".xml", "_templates.xml", 1))
103 | net.AddProcess(templateWriter)
104 |
105 | propertyWriter := components.NewStringFileWriter(str.Replace(*outFileName, ".xml", "_properties.xml", 1))
106 | net.AddProcess(propertyWriter)
107 |
108 | pageWriter := components.NewStringFileWriter(*outFileName)
109 | net.AddProcess(pageWriter)
110 |
111 | snk := flowbase.NewSink()
112 | net.AddProcess(snk)
113 |
114 | // ------------------------------------------
115 | // Connect network
116 | // ------------------------------------------
117 |
118 | ttlFileRead.OutTriple = aggregator.In
119 |
120 | aggregator.Out = indexCreator.In
121 |
122 | indexCreator.Out = indexFanOut.In
123 | indexFanOut.Out["serialize"] = indexToAggr.In
124 | indexFanOut.Out["conv"] = triplesToWikiConverter.InIndex
125 |
126 | indexToAggr.Out = triplesToWikiConverter.InAggregate
127 |
128 | //triplesToWikiConverter.OutPage = categoryFilterer.In
129 | //categoryFilterer.Out = xmlCreator.InWikiPage
130 |
131 | triplesToWikiConverter.OutPage = xmlCreator.InWikiPage
132 |
133 | xmlCreator.OutTemplates = templateWriter.In
134 | xmlCreator.OutProperties = propertyWriter.In
135 | xmlCreator.OutPages = pageWriter.In
136 |
137 | snk.Connect(templateWriter.OutDone)
138 | snk.Connect(propertyWriter.OutDone)
139 | snk.Connect(pageWriter.OutDone)
140 |
141 | // ------------------------------------------
142 | // Send in-data and run
143 | // ------------------------------------------
144 |
145 | go func() {
146 | defer close(ttlFileRead.InFileName)
147 | ttlFileRead.InFileName <- *inFileName
148 | }()
149 |
150 | net.Run()
151 | }
152 |
--------------------------------------------------------------------------------
/testcov.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -ex
3 | for pkg in "github.com/rdfio/rdf2smw" "github.com/rdfio/rdf2smw/components"; do
4 | touch profile_tmp.cov
5 | go test -v -covermode=count -coverprofile=profile_tmp.cov $pkg || ERROR="Error testing $pkg"
6 | tail -n +2 profile_tmp.cov >> cover.out || exit "Unable to append coverage for $pkg"
7 | done
8 |
--------------------------------------------------------------------------------