├── .codeclimate.yml ├── .github └── workflows │ └── ci.yml ├── LICENSE ├── README.md ├── architecture.png ├── circle.yml ├── components ├── catfilterer.go ├── const.go ├── filereader.go ├── filereader_test.go ├── iptypes.go ├── mwxmlcreator.go ├── mwxmlcreator_test.go ├── residxcreator.go ├── residxcreator_test.go ├── residxfanout.go ├── residxfanout_test.go ├── residxtoresaggrconv.go ├── residxtoresaggrconv_test.go ├── smwtplformatter.go ├── strfilewriter.go ├── stringprinter.go ├── tripleaggregator.go ├── tripleaggregator_test.go ├── tripleaggrfanout.go ├── tripleaggrprinter.go ├── tripleparser.go ├── tripleprinter.go ├── triplestowikipageconv.go ├── triplestowikipageconv_test.go ├── ttlfilereader.go ├── ttlfilereader_test.go └── wikipageprinter.go ├── go.mod ├── go.sum ├── main.go └── testcov.sh /.codeclimate.yml: -------------------------------------------------------------------------------- 1 | engines: 2 | gofmt: 3 | enabled: true 4 | golint: 5 | enabled: true 6 | govet: 7 | enabled: true 8 | 9 | ratings: 10 | paths: 11 | - "**.go" 12 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | # This workflow will build rdf2smw as a golang project 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-go 3 | 4 | name: CI 5 | 6 | on: 7 | push: 8 | branches: [ "master", "dev" ] 9 | pull_request: 10 | branches: [ "master", "dev" ] 11 | 12 | jobs: 13 | 14 | build: 15 | runs-on: ubuntu-latest 16 | steps: 17 | - uses: actions/checkout@v4 18 | 19 | - name: Set up Go 20 | uses: actions/setup-go@v4 21 | with: 22 | go-version: '1.24' 23 | 24 | - name: Build 25 | run: go build -v ./... 26 | 27 | - name: Test 28 | run: go test -v ./... 29 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Samuel Lampa 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | rdf2smw 2 | ======= 3 | 4 | [![GitHub Actions CI](https://img.shields.io/github/actions/workflow/status/rdfio/rdf2smw/ci.yml)](https://github.com/rdfio/rdf2smw/actions/workflows/ci.yml) 5 | [![Test Coverage](https://img.shields.io/codecov/c/github/rdfio/rdf2smw.svg)](https://codecov.io/gh/rdfio/rdf2smw) 6 | [![Go Report Card Status](https://goreportcard.com/badge/github.com/rdfio/rdf2smw)](https://goreportcard.com/report/github.com/rdfio/rdf2smw) 7 | [![Code Climate Maintainability](https://img.shields.io/codeclimate/maintainability/rdfio/rdf2smw)](https://codeclimate.com/github/rdfio/rdf2smw) 8 | [![Code Climate Tech Debt](https://img.shields.io/codeclimate/tech-debt/rdfio/rdf2smw)](https://codeclimate.com/github/rdfio/rdf2smw) 9 | [![Code Climate Issues](https://img.shields.io/codeclimate/issues/rdfio/rdf2smw)](https://codeclimate.com/github/rdfio/rdf2smw) 10 | [![GoDoc](https://godoc.org/github.com/rdfio/rdf2smw?status.svg)](https://godoc.org/github.com/rdfio/rdf2smw) 11 | 12 | Updates 13 | ------- 14 | 15 | **Mar 28, 2025:** Added `go.mod`/`go.sum` files to make building work again 16 | with the latest Go (1.24.0). 17 | 18 | **Sep 30, 2016:** rdf2smw was covered in a talk at SMWCon in Frankfurt, Sep 2016. See: [Talk page](https://www.semantic-mediawiki.org/wiki/SMWCon_Fall_2016/Batch_import_of_large_RDF_datasets_using_RDFIO_or_the_new_rdf2smw_tool), [Slides](https://www.slideshare.net/SamuelLampa/batch-import-of-large-rdf-datasets-into-semantic-mediawiki), [Video](https://www.youtube.com/watch?v=k70er1u1ZYs). 19 | 20 | **Sep 4, 2017:** Our paper on RDFIO and rdf2smw was just published! If you use rdf2smw in scientific work, please cite:
21 | Lampa S, Willighagen E, Kohonen P, King A, Vrandečić D, Grafström R, Spjuth O
22 | [RDFIO: extending Semantic MediaWiki for interoperable biomedical data management](https://jbiomedsem.biomedcentral.com/articles/10.1186/s13326-017-0136-y)
23 | *Journal of Biomedical Semantics*. **8**:35 (2017). DOI: [10.1186/s13326-017-0136-y](https://dx.doi.org/10.1186/s13326-017-0136-y). 24 | 25 | Import / convert RDF data into a Semantic MediaWiki 26 | --------------------------------------------------- 27 | 28 | A commandline tool to convert from RDF triples to [Semantic MediaWiki](http://semantic-mediawiki.org) facts 29 | in MediaWiki XML export format to be used with [MediaWiki](https://www.mediawiki.org)'s built-in 30 | [XML import feature](https://www.mediawiki.org/wiki/Manual:Importing_XML_dumps). 31 | 32 | This allows you to quickly and simply populate a Semantic MediaWiki page 33 | structure, from an RDF data file. 34 | 35 | It is written in Go for better performance than PHP. The latest version 36 | processes triples into pages in the order of ~55K triples/sec converted into 37 | ~13K pages/sec on an 2014 i5 Haswell dual core processor, to give an idea. 38 | 39 | rdf2smw is very similar to the RDF import function in the 40 | [RDFIO](https://github.com/rdfio/RDFIO) Semantic MediaWiki extension, but takes 41 | another approach: Whereas RDFIO converts RDF to wiki pages and imports them in 42 | the same go, rdf2smw first converts RDF to an XML file outside of PHP (for 43 | better performance), and then importing using MediaWiki's built-in import 44 | function. 45 | 46 | **Status:** The tool is pretty much feature complete, including ability to 47 | write facts via template calls if a categorization (via owl:Class or rdf:type) 48 | of the subject can be done. What is lacking is more options to fine-tune 49 | things. Right now you'll have to modify the source code yourself if you need 50 | any customization. Hope to address this in the near future. 51 | 52 | Dependencies 53 | ------------ 54 | 55 | The tool itself does not have any dependencies, apart from a unix-like 56 | operating system. For importing the generated XML dump file to make sense 57 | though, you will need a web server, PHP, MediaWiki and Semantic MediaWiki. 58 | 59 | An automated virtualbox generation script (so valled "vagrant box"), with all 60 | of this, plus the RDFIO extension, can be found 61 | [here](https://github.com/rdfio/rdfio-vagrantbox), and is highly recommended, 62 | if you don't have a MediaWiki / SemanticMediawiki installation already! 63 | 64 | Installation 65 | ------------ 66 | 67 | For linux 64 bit: 68 | 69 | 1. Download the file `rdf2smw_linux64.gz`, on the [latest release](https://github.com/samuell/rdf2smw/releases). 70 | 2. Unpack it with: `gunzip rdf2smw_linux64.gz` 71 | 3. Call it, on the commandline (see the usage section below). 72 | 73 | Usage 74 | ----- 75 | 76 | Call the rdf2smw binary, specifying a file with triples in n-triples or turtle 77 | format, with the `--in` flag, and an output file in XML format with the 78 | `--out` flag, like so: 79 | 80 | ```bash 81 | ./rdf2smw --in triples.nt --out semantic_mediawiki_pages.xml 82 | ``` 83 | 84 | In addition to the specified output file, there will be separate files for 85 | templates and properties, named similar to the main output file, but replacing 86 | `.xml` with `_templates.xml` and `_properties.xml` respectively. 87 | 88 | These XML files can then be imported into MediaWiki / Semantic MediaWiki, via 89 | the `importDump.php` maintenance script, located in the `maintenance` folder 90 | under the main mediawiki folder. 91 | 92 | ```bash 93 | php /maintenance/importDump.php semantic_mediawiki_pages_templates.xml 94 | php /maintenance/importDump.php semantic_mediawiki_pages_properties.xml 95 | php /maintenance/importDump.php semantic_mediawiki_pages.xml 96 | ``` 97 | 98 | Note that the order above is highly recommended (templates, then properties, 99 | then the rest), so as to avoid unnecessary re-computing of semantic data after 100 | the import is done. 101 | 102 | Architecture 103 | ------------ 104 | 105 | Find below a schematic illustration of the flow-based programming process graph 106 | of the rdf2smw program: 107 | 108 | ![Flow-based programming diagram of rdf2smw](architecture.png) 109 | 110 | _Illustration created with 111 | [drawfbp](https://github.com/jpaulm/drawfbp)_ 112 | 113 | Known limitations 114 | ----------------- 115 | 116 | Only N-triples is supported as input format right now. We plan to add more formats shortly. 117 | 118 | Technical notes 119 | --------------- 120 | 121 | rdf2smw is based on the [FlowBase](https://github.com/flowbase/flowbase) 122 | flow-based programming micro-framework. 123 | 124 | Acknowledgements 125 | ---------------- 126 | 127 | rdf2smw makes heavy use of [Petter Goksøyr Åsen](https://github.com/boutros)'s awesome [RDF parsing library](https://github.com/knakk/rdf). 128 | -------------------------------------------------------------------------------- /architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rdfio/rdf2smw/5d86513aabe52616e3a3e7fd7acb4467f3a1376a/architecture.png -------------------------------------------------------------------------------- /circle.yml: -------------------------------------------------------------------------------- 1 | --- 2 | machine: 3 | post: 4 | - go get github.com/jstemmer/go-junit-report 5 | - go get github.com/axw/gocov/gocov 6 | - go get github.com/AlekSi/gocov-xml 7 | 8 | test: 9 | override: 10 | - mkdir -p $GOPATH/src/github.com/rdfio 11 | - ln -s $HOME/rdf2smw $GOPATH/src/github.com/rdfio/rdf2smw 12 | - mkdir -p $CIRCLE_TEST_REPORTS/go-junit 13 | - go test -v -race ./... | go-junit-report > $CIRCLE_TEST_REPORTS/go-junit/report.xml 14 | - bash testcov.sh 15 | - gocov convert cover.out | gocov-xml > coverage.xml 16 | post: 17 | - bash <(curl -s https://codecov.io/bash) 18 | -------------------------------------------------------------------------------- /components/catfilterer.go: -------------------------------------------------------------------------------- 1 | package components 2 | 3 | type CategoryFilterer struct { 4 | In chan *WikiPage 5 | Out chan *WikiPage 6 | Categories []*Category 7 | } 8 | 9 | func NewCategoryFilterer(categories []*Category) *CategoryFilterer { 10 | return &CategoryFilterer{ 11 | In: make(chan *WikiPage, BUFSIZE), 12 | Out: make(chan *WikiPage, BUFSIZE), 13 | Categories: categories, 14 | } 15 | } 16 | 17 | func (p *CategoryFilterer) Run() { 18 | defer close(p.Out) 19 | for page := range p.In { 20 | for _, pageCat := range page.Categories { 21 | if catInArray(pageCat, p.Categories) { 22 | p.Out <- page 23 | break 24 | } 25 | } 26 | } 27 | } 28 | 29 | func catInArray(searchCat *Category, cats []*Category) bool { 30 | for _, cat := range cats { 31 | if searchCat.Name == cat.Name { 32 | return true 33 | } 34 | } 35 | return false 36 | } 37 | -------------------------------------------------------------------------------- /components/const.go: -------------------------------------------------------------------------------- 1 | package components 2 | 3 | const ( 4 | BUFSIZE = 16 5 | ) 6 | -------------------------------------------------------------------------------- /components/filereader.go: -------------------------------------------------------------------------------- 1 | package components 2 | 3 | import ( 4 | "bufio" 5 | "log" 6 | 7 | "github.com/flowbase/flowbase" 8 | "github.com/spf13/afero" 9 | ) 10 | 11 | // -------------------------------------------------------------------------------- 12 | // FileReader 13 | // -------------------------------------------------------------------------------- 14 | 15 | // FileReader is a process that reads files, based on file names it receives on the 16 | // FileReader.InFileName port / channel, and writes out the output line by line 17 | // as strings on the FileReader.OutLine port / channel. 18 | type FileReader struct { 19 | InFileName chan string 20 | OutLine chan string 21 | fs afero.Fs 22 | } 23 | 24 | // NewOsFileReader returns an initialized FileReader, initialized with an OS 25 | // (normal) file system 26 | func NewOsFileReader() *FileReader { 27 | return NewFileReader(afero.NewOsFs()) 28 | } 29 | 30 | // NewFileReader returns an initialized FileReader, initialized with the afero 31 | // file system provided as an argument 32 | func NewFileReader(fileSystem afero.Fs) *FileReader { 33 | return &FileReader{ 34 | InFileName: make(chan string, BUFSIZE), 35 | OutLine: make(chan string, BUFSIZE), 36 | fs: fileSystem, 37 | } 38 | } 39 | 40 | // Run runs the FileReader process. It does not spawn a separate go-routine, so 41 | // you have to prepend the go keyword when calling it, in order to have it run 42 | // in a separate go-routine. 43 | func (p *FileReader) Run() { 44 | defer close(p.OutLine) 45 | 46 | flowbase.Debug.Println("Starting loop") 47 | for fileName := range p.InFileName { 48 | flowbase.Debug.Printf("Starting processing file %s\n", fileName) 49 | fh, err := p.fs.Open(fileName) 50 | if err != nil { 51 | log.Fatal(err) 52 | } 53 | defer fh.Close() 54 | 55 | sc := bufio.NewScanner(fh) 56 | for sc.Scan() { 57 | if err := sc.Err(); err != nil { 58 | log.Fatal(err) 59 | } 60 | p.OutLine <- sc.Text() 61 | } 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /components/filereader_test.go: -------------------------------------------------------------------------------- 1 | package components 2 | 3 | import ( 4 | "github.com/flowbase/flowbase" 5 | "github.com/spf13/afero" 6 | "testing" 7 | ) 8 | 9 | // TestNewOSFileReader tests NewOSFileReader 10 | func TestNewOSFileReader(t *testing.T) { 11 | flowbase.InitLogWarning() 12 | 13 | fr := NewOsFileReader() 14 | if fr.InFileName == nil { 15 | t.Error("In-port InFileName not initialized in New FileReader") 16 | } 17 | if fr.OutLine == nil { 18 | t.Error("In-port InFileName not initialized in New FileReader") 19 | } 20 | 21 | go func() { 22 | fr.InFileName <- "teststring" 23 | }() 24 | teststr1 := <-fr.InFileName 25 | if teststr1 != "teststring" { 26 | t.Error("In-port InFileName is not a string channel") 27 | fr.InFileName <- "teststring" 28 | } 29 | 30 | go func() { 31 | fr.OutLine <- "teststring" 32 | }() 33 | teststr2 := <-fr.OutLine 34 | if teststr2 != "teststring" { 35 | t.Error("Out-port OutLine is not a string channel") 36 | } 37 | } 38 | 39 | // Tests the main behavior of the FileReader process 40 | func TestFileReader(t *testing.T) { 41 | flowbase.InitLogWarning() 42 | 43 | testFileName := "testfile.txt" 44 | line1 := "line one" 45 | line2 := "line two" 46 | testContent := line1 + "\n" + line2 47 | 48 | fs := afero.NewMemMapFs() 49 | 50 | f, err := fs.Create(testFileName) 51 | if err != nil { 52 | t.Errorf("Could not create file %s in memory file system", testFileName) 53 | } 54 | f.WriteString(testContent) 55 | f.Close() 56 | 57 | tmp := []byte{} 58 | f.Read(tmp) 59 | 60 | println(string(tmp)) 61 | 62 | fr := NewFileReader(fs) 63 | go func() { 64 | defer close(fr.InFileName) 65 | fr.InFileName <- testFileName 66 | }() 67 | 68 | go fr.Run() 69 | 70 | outStr1 := <-fr.OutLine 71 | outStr2 := <-fr.OutLine 72 | 73 | if outStr1 != line1 { 74 | t.Error("First output from file reader does not match first line in file") 75 | } 76 | if outStr2 != line2 { 77 | t.Error("Second output from file reader does not match second line in file") 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /components/iptypes.go: -------------------------------------------------------------------------------- 1 | package components 2 | 3 | import ( 4 | str "strings" 5 | 6 | "github.com/knakk/rdf" 7 | ) 8 | 9 | // -------------------------------------------------------------------------------- 10 | // TripleAggregate 11 | // -------------------------------------------------------------------------------- 12 | 13 | type TripleAggregate struct { 14 | Subject rdf.Subject 15 | SubjectStr string 16 | Triples []rdf.Triple 17 | } 18 | 19 | func NewTripleAggregate(subj rdf.Subject, triples []rdf.Triple) *TripleAggregate { 20 | return &TripleAggregate{ 21 | Subject: subj, 22 | SubjectStr: subj.String(), 23 | Triples: triples, 24 | } 25 | } 26 | 27 | // -------------------------------------------------------------------------------- 28 | // WikiPage 29 | // -------------------------------------------------------------------------------- 30 | 31 | type WikiPage struct { 32 | Title string 33 | Type int 34 | Facts []*Fact 35 | Categories []*Category 36 | SpecificCategory *Category 37 | } 38 | 39 | func NewWikiPage(title string, facts []*Fact, categories []*Category, specificCategory *Category, pageType int) *WikiPage { 40 | return &WikiPage{ 41 | Title: title, 42 | Facts: facts, 43 | Categories: categories, 44 | SpecificCategory: specificCategory, 45 | Type: pageType, 46 | } 47 | } 48 | 49 | func (p *WikiPage) AddFact(fact *Fact) { 50 | p.Facts = append(p.Facts, fact) 51 | } 52 | 53 | func (p *WikiPage) AddFactUnique(fact *Fact) { 54 | factExists := false 55 | for _, existingFact := range p.Facts { 56 | if fact.Property == existingFact.Property && fact.Value == existingFact.Value { 57 | factExists = true 58 | break 59 | } 60 | } 61 | if !factExists { 62 | p.AddFact(fact) 63 | } 64 | } 65 | 66 | func (p *WikiPage) AddCategory(category *Category) { 67 | p.Categories = append(p.Categories, category) 68 | } 69 | 70 | func (p *WikiPage) AddCategoryUnique(category *Category) { 71 | catExists := false 72 | for _, existingCat := range p.Categories { 73 | if category.Name == existingCat.Name { 74 | catExists = true 75 | break 76 | } 77 | } 78 | if !catExists { 79 | p.AddCategory(category) 80 | } 81 | } 82 | 83 | // ------------------------------------------------------------ 84 | // Helper type: Fact 85 | // ------------------------------------------------------------ 86 | 87 | type Fact struct { 88 | Property string 89 | Value string 90 | } 91 | 92 | func NewFact(property string, value string) *Fact { 93 | return &Fact{ 94 | Property: property, 95 | Value: value, 96 | } 97 | } 98 | 99 | func (f *Fact) asWikiFact() string { 100 | return "[[" + f.Property + "::" + f.escapeWikiChars(f.Value) + "]]\n" 101 | } 102 | 103 | func (f *Fact) escapeWikiChars(inStr string) string { 104 | outStr := str.Replace(inStr, "[", "(", -1) 105 | outStr = str.Replace(outStr, "]", ")", -1) 106 | outStr = str.Replace(outStr, "|", ",", -1) 107 | outStr = str.Replace(outStr, "=", "-", -1) 108 | outStr = str.Replace(outStr, "<", "<", -1) 109 | outStr = str.Replace(outStr, ">", ">", -1) 110 | return outStr 111 | } 112 | 113 | // ------------------------------------------------------------ 114 | // Helper type: Category 115 | // ------------------------------------------------------------ 116 | 117 | type Category struct { 118 | Name string 119 | } 120 | 121 | func NewCategory(name string) *Category { 122 | return &Category{ 123 | Name: name, 124 | } 125 | } 126 | 127 | func (c *Category) asWikiString() string { 128 | return "[[Category:" + c.Name + "]]\n" 129 | } 130 | -------------------------------------------------------------------------------- /components/mwxmlcreator.go: -------------------------------------------------------------------------------- 1 | package components 2 | 3 | import ( 4 | "fmt" 5 | str "strings" 6 | "time" 7 | ) 8 | 9 | type MWXMLCreator struct { 10 | InWikiPage chan *WikiPage 11 | OutTemplates chan string 12 | OutProperties chan string 13 | OutPages chan string 14 | UseTemplates bool 15 | } 16 | 17 | func NewMWXMLCreator(useTemplates bool) *MWXMLCreator { 18 | return &MWXMLCreator{ 19 | InWikiPage: make(chan *WikiPage, BUFSIZE), 20 | OutTemplates: make(chan string, BUFSIZE), 21 | OutProperties: make(chan string, BUFSIZE), 22 | OutPages: make(chan string, BUFSIZE), 23 | UseTemplates: useTemplates, 24 | } 25 | } 26 | 27 | const wikiXmlTpl = ` 28 | 29 | %s 30 | %d 31 | 32 | %s 33 | 34 | 127.0.0.1 35 | 36 | Page created by RDF2SMW commandline tool 37 | wikitext 38 | text/x-wiki 39 | 40 | %s 41 | 42 | 43 | ` 44 | 45 | var pageTypeToMWNamespace = map[int]int{ 46 | URITypeClass: 14, 47 | URITypeTemplate: 10, 48 | URITypePredicate: 102, 49 | URITypeUndefined: 0, 50 | } 51 | 52 | func (p *MWXMLCreator) Run() { 53 | tplPropertyIdx := make(map[string]map[string]int) 54 | 55 | defer close(p.OutTemplates) 56 | defer close(p.OutProperties) 57 | defer close(p.OutPages) 58 | 59 | p.OutPages <- "\n" 60 | p.OutProperties <- "\n" 61 | 62 | for page := range p.InWikiPage { 63 | 64 | wikiText := "" 65 | 66 | if p.UseTemplates && len(page.Categories) > 0 { // We need at least one category, as to name the (to-be) template 67 | 68 | var templateName string 69 | if page.SpecificCategory.Name != "" { 70 | templateName = page.SpecificCategory.Name 71 | } else { 72 | // Pick last item (biggest chance to be pretty specific?) 73 | templateName = page.Categories[len(page.Categories)-1].Name 74 | //println("Page ", page.Title, " | Didn't have a specific catogory, so selected ", templateName) 75 | } 76 | templateTitle := "Template:" + templateName 77 | 78 | // Make sure template page exists 79 | if tplPropertyIdx[templateTitle] == nil { 80 | tplPropertyIdx[templateTitle] = make(map[string]int) 81 | } 82 | 83 | wikiText += "{{" + templateName + "\n" // TODO: What to do when we have multipel categories? 84 | 85 | // Add facts as parameters to the template call 86 | var lastProperty string 87 | for _, fact := range page.Facts { 88 | // Write facts to template call on current page 89 | 90 | val := escapeWikiChars(fact.Value) 91 | if fact.Property == lastProperty { 92 | wikiText += "," + val + "\n" 93 | } else { 94 | wikiText += "|" + spacesToUnderscores(fact.Property) + "=" + val + "\n" 95 | } 96 | 97 | lastProperty = fact.Property 98 | 99 | // Add fact to the relevant template page 100 | tplPropertyIdx[templateTitle][fact.Property] = 1 101 | } 102 | 103 | // Add categories as multi-valued call to the "categories" value of the template 104 | wikiText += "|Categories=" 105 | for i, cat := range page.Categories { 106 | if i == 0 { 107 | wikiText += cat.Name 108 | } else { 109 | wikiText += "," + cat.Name 110 | } 111 | } 112 | 113 | wikiText += "\n}}" 114 | } else { 115 | 116 | // Add fact statements 117 | for _, fact := range page.Facts { 118 | wikiText += fact.asWikiFact() 119 | } 120 | 121 | // Add category statements 122 | for _, cat := range page.Categories { 123 | wikiText += cat.asWikiString() 124 | } 125 | 126 | } 127 | 128 | xmlData := fmt.Sprintf(wikiXmlTpl, page.Title, pageTypeToMWNamespace[page.Type], time.Now().Format("2006-01-02T15:04:05Z"), wikiText) 129 | 130 | // Print out the generated XML one line at a time 131 | if page.Type == URITypePredicate { 132 | p.OutProperties <- xmlData 133 | } else { 134 | p.OutPages <- xmlData 135 | } 136 | } 137 | p.OutPages <- "\n" 138 | p.OutProperties <- "\n" 139 | 140 | p.OutTemplates <- "\n" 141 | // Create template pages 142 | for tplName, tplProperties := range tplPropertyIdx { 143 | tplText := `{|class="wikitable smwtable" 144 | !colspan="2"| ` + str.Replace(tplName, "Template:", "", -1) + `: {{PAGENAMEE}} 145 | ` 146 | for property := range tplProperties { 147 | argName := spacesToUnderscores(property) 148 | tplText += fmt.Sprintf("|-\n!%s\n|{{#arraymap:{{{%s|}}}|,|x|[[%s::x]]|,}}\n", property, argName, property) 149 | } 150 | tplText += "|}\n\n" 151 | // Add categories 152 | tplText += "{{#arraymap:{{{Categories}}}|,|x|[[Category:x]]|}}\n" 153 | 154 | xmlData := fmt.Sprintf(wikiXmlTpl, tplName, pageTypeToMWNamespace[URITypeTemplate], time.Now().Format("2006-01-02T15:04:05Z"), tplText) 155 | p.OutTemplates <- xmlData 156 | } 157 | p.OutTemplates <- "\n" 158 | } 159 | 160 | func spacesToUnderscores(inStr string) string { 161 | return str.Replace(inStr, " ", "_", -1) 162 | } 163 | 164 | // TODO: Probably move out to separate component! 165 | func escapeWikiChars(inStr string) string { 166 | outStr := str.Replace(inStr, "[", "(", -1) 167 | outStr = str.Replace(outStr, "]", ")", -1) 168 | outStr = str.Replace(outStr, "|", ",", -1) 169 | outStr = str.Replace(outStr, "=", "-", -1) 170 | outStr = str.Replace(outStr, "<", "<", -1) 171 | outStr = str.Replace(outStr, ">", ">", -1) 172 | return outStr 173 | } 174 | -------------------------------------------------------------------------------- /components/mwxmlcreator_test.go: -------------------------------------------------------------------------------- 1 | package components 2 | 3 | import ( 4 | "github.com/flowbase/flowbase" 5 | "testing" 6 | ) 7 | 8 | // TestNewMWXMLCreator tests NewMWXMLCreator 9 | func TestNewMWXMLCreator(t *testing.T) { 10 | flowbase.InitLogDebug() 11 | 12 | mxc := NewMWXMLCreator(true) 13 | 14 | if mxc.InWikiPage == nil { 15 | t.Error("InWikiPage is not initialized") 16 | } 17 | if mxc.OutTemplates == nil { 18 | t.Error("OutTemplates is not initialized") 19 | } 20 | if mxc.OutProperties == nil { 21 | t.Error("OutProperties is not initialized") 22 | } 23 | if mxc.OutPages == nil { 24 | t.Error("OutPages is not initialized") 25 | } 26 | if mxc.UseTemplates != true { 27 | t.Error("UseTemplates field is initialized wrongly") 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /components/residxcreator.go: -------------------------------------------------------------------------------- 1 | package components 2 | 3 | type ResourceIndexCreator struct { 4 | In chan *TripleAggregate 5 | Out chan *map[string]*TripleAggregate 6 | } 7 | 8 | func NewResourceIndexCreator() *ResourceIndexCreator { 9 | return &ResourceIndexCreator{ 10 | In: make(chan *TripleAggregate, BUFSIZE), 11 | Out: make(chan *map[string]*TripleAggregate), 12 | } 13 | } 14 | 15 | func (p *ResourceIndexCreator) Run() { 16 | defer close(p.Out) 17 | 18 | idx := make(map[string]*TripleAggregate) 19 | for aggr := range p.In { 20 | idx[aggr.SubjectStr] = aggr 21 | } 22 | 23 | p.Out <- &idx 24 | } 25 | -------------------------------------------------------------------------------- /components/residxcreator_test.go: -------------------------------------------------------------------------------- 1 | package components 2 | 3 | import ( 4 | "fmt" 5 | "github.com/flowbase/flowbase" 6 | "github.com/knakk/rdf" 7 | "testing" 8 | ) 9 | 10 | // TestNewResouceIndexCreator tests NewResourceIndexCreator 11 | func TestNewResourceIndexCreator(t *testing.T) { 12 | flowbase.InitLogWarning() 13 | 14 | ric := NewResourceIndexCreator() 15 | 16 | if ric.In == nil { 17 | t.Error("In-port not initialized") 18 | } 19 | if ric.Out == nil { 20 | t.Error("Out-port not initialized") 21 | } 22 | } 23 | 24 | func TestResourceIndexCreator(t *testing.T) { 25 | flowbase.InitLogWarning() 26 | 27 | ric := NewResourceIndexCreator() 28 | 29 | var triples = []rdf.Triple{} 30 | 31 | go func() { 32 | defer close(ric.In) 33 | 34 | for i := 1; i <= 2; i++ { 35 | 36 | triples = []rdf.Triple{} 37 | s, serr := rdf.NewIRI(fmt.Sprintf("http://example.org/s%d", i)) 38 | if serr != nil { 39 | t.Error("Could not create Subject IRI") 40 | } 41 | for j := 1; j <= 3; j++ { 42 | p, perr := rdf.NewIRI(fmt.Sprintf("http://example.org/p%d", j)) 43 | if perr != nil { 44 | t.Error("Could not create Predicate IRI") 45 | } 46 | o, oerr := rdf.NewLiteral(fmt.Sprintf("o%d", j)) 47 | if oerr != nil { 48 | t.Error("Could not create Object Literal") 49 | } 50 | tr := rdf.Triple{ 51 | Subj: s, 52 | Pred: p, 53 | Obj: o, 54 | } 55 | triples = append(triples, tr) 56 | } 57 | 58 | aggr := NewTripleAggregate(s, triples) 59 | ric.In <- aggr 60 | } 61 | }() 62 | 63 | go ric.Run() 64 | 65 | resIdx := <-ric.Out 66 | 67 | if (*resIdx)["http://example.org/s1"] == nil { 68 | t.Error("Resource index does not contain first subject") 69 | } 70 | 71 | if (*resIdx)["http://example.org/s1"].Subject.String() != "http://example.org/s1" { 72 | t.Error("Subject string in first subject is wrong") 73 | } 74 | 75 | if len((*resIdx)["http://example.org/s1"].Triples) != 3 { 76 | t.Error("Wrong number of triples for first subject") 77 | } 78 | 79 | if (*resIdx)["http://example.org/s2"] == nil { 80 | t.Error("Resource index does not contain second subject") 81 | } 82 | 83 | if (*resIdx)["http://example.org/s2"].Subject.String() != "http://example.org/s2" { 84 | t.Error("Subject string in second subject is wrong") 85 | } 86 | 87 | if len((*resIdx)["http://example.org/s2"].Triples) != 3 { 88 | t.Error("Wrong number of triples for second subject") 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /components/residxfanout.go: -------------------------------------------------------------------------------- 1 | package components 2 | 3 | type ResourceIndexFanOut struct { 4 | In chan *map[string]*TripleAggregate 5 | Out map[string]chan *map[string]*TripleAggregate 6 | } 7 | 8 | func NewResourceIndexFanOut() *ResourceIndexFanOut { 9 | return &ResourceIndexFanOut{ 10 | In: make(chan *map[string]*TripleAggregate), 11 | Out: make(map[string]chan *map[string]*TripleAggregate), 12 | } 13 | } 14 | 15 | func (p *ResourceIndexFanOut) Run() { 16 | for _, outPort := range p.Out { 17 | defer close(outPort) 18 | } 19 | 20 | for idx := range p.In { 21 | for _, outPort := range p.Out { 22 | outPort <- idx 23 | } 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /components/residxfanout_test.go: -------------------------------------------------------------------------------- 1 | package components 2 | 3 | import ( 4 | "github.com/flowbase/flowbase" 5 | "testing" 6 | ) 7 | 8 | // TestNewResourceIndexFanOut tests NewResourceIndexFanOut 9 | func TestNewResourceIndexFanOut(t *testing.T) { 10 | flowbase.InitLogDebug() 11 | 12 | rif := NewResourceIndexFanOut() 13 | 14 | if rif.In == nil { 15 | t.Error("In-port not initialized with channel") 16 | } 17 | if rif.Out == nil { 18 | t.Error("Out-port not initialized with map of channels") 19 | } 20 | } 21 | 22 | func TestResourceIndexFanOut(t *testing.T) { 23 | flowbase.InitLogDebug() 24 | 25 | rif := NewResourceIndexFanOut() 26 | rif.Out["out1"] = make(chan *map[string]*TripleAggregate) 27 | rif.Out["out2"] = make(chan *map[string]*TripleAggregate) 28 | 29 | resIdxInner := make(map[string]*TripleAggregate) 30 | resIdx := &resIdxInner 31 | 32 | go func() { 33 | defer close(rif.In) 34 | rif.In <- resIdx 35 | }() 36 | go rif.Run() 37 | 38 | resIdx1 := <-rif.Out["out1"] 39 | if resIdx1 == nil { 40 | t.Error("Got nil as output from out1") 41 | } 42 | resIdx2 := <-rif.Out["out2"] 43 | if resIdx2 == nil { 44 | t.Error("Got nil as output from out2") 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /components/residxtoresaggrconv.go: -------------------------------------------------------------------------------- 1 | package components 2 | 3 | type ResourceIndexToTripleAggregates struct { 4 | In chan *map[string]*TripleAggregate 5 | Out chan *TripleAggregate 6 | } 7 | 8 | func NewResourceIndexToTripleAggregates() *ResourceIndexToTripleAggregates { 9 | return &ResourceIndexToTripleAggregates{ 10 | In: make(chan *map[string]*TripleAggregate, BUFSIZE), 11 | Out: make(chan *TripleAggregate, BUFSIZE), 12 | } 13 | } 14 | 15 | func (p *ResourceIndexToTripleAggregates) Run() { 16 | defer close(p.Out) 17 | 18 | for idx := range p.In { 19 | for _, aggr := range *idx { 20 | p.Out <- aggr 21 | } 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /components/residxtoresaggrconv_test.go: -------------------------------------------------------------------------------- 1 | package components 2 | 3 | import ( 4 | "github.com/flowbase/flowbase" 5 | "github.com/knakk/rdf" 6 | "testing" 7 | ) 8 | 9 | // TestNewResourceIndexToTripleAggregates tests NewResourceIndexToTripleAggregates 10 | func TestNewResourceIndexToTripleAggregates(t *testing.T) { 11 | flowbase.InitLogDebug() 12 | 13 | rita := NewResourceIndexToTripleAggregates() 14 | 15 | if rita.In == nil { 16 | t.Error("In-port not initialized with map of channels") 17 | } 18 | if rita.Out == nil { 19 | t.Error("Out-port not initialized with channel") 20 | } 21 | } 22 | 23 | // TestResourceIndexToTripleAggregates tests ResourceIndexToTripleAggregates 24 | func TestResourceIndexToTripleAggregates(t *testing.T) { 25 | flowbase.InitLogDebug() 26 | rita := NewResourceIndexToTripleAggregates() 27 | 28 | resIdxInner := make(map[string]*TripleAggregate) 29 | s, err := rdf.NewIRI("http://example.org/s") 30 | if err != nil { 31 | t.Error("Could not create subject IRI") 32 | } 33 | resIdxInner["aggr1"] = NewTripleAggregate(s, nil) 34 | resIdx := &resIdxInner 35 | 36 | go func() { 37 | defer close(rita.In) 38 | rita.In <- resIdx 39 | }() 40 | go rita.Run() 41 | aggr := <-rita.Out 42 | if aggr == nil { 43 | t.Error("Output from ResourceIndexToTripleAggregates was nil") 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /components/smwtplformatter.go: -------------------------------------------------------------------------------- 1 | package components 2 | 3 | import "fmt" 4 | 5 | type SMWTemplateCallFormatter struct { 6 | InWikiPage chan *WikiPage 7 | OutWikiPageXML chan string 8 | } 9 | 10 | func NewSMWTemplateCallFormatter() *SMWTemplateCallFormatter { 11 | return &SMWTemplateCallFormatter{ 12 | InWikiPage: make(chan *WikiPage, BUFSIZE), 13 | OutWikiPageXML: make(chan string, BUFSIZE), 14 | } 15 | } 16 | 17 | func (p *SMWTemplateCallFormatter) Run() { 18 | fmt.Println("Running SMWTemplateCallFormatter ...") 19 | } 20 | -------------------------------------------------------------------------------- /components/strfilewriter.go: -------------------------------------------------------------------------------- 1 | package components 2 | 3 | import ( 4 | "os" 5 | 6 | "github.com/flowbase/flowbase" 7 | ) 8 | 9 | type StringFileWriter struct { 10 | In chan string 11 | OutDone chan interface{} 12 | fileName string 13 | } 14 | 15 | func NewStringFileWriter(fileName string) *StringFileWriter { 16 | return &StringFileWriter{ 17 | In: make(chan string, BUFSIZE), 18 | OutDone: make(chan interface{}, BUFSIZE), 19 | fileName: fileName, 20 | } 21 | } 22 | 23 | func (p *StringFileWriter) Run() { 24 | defer close(p.OutDone) 25 | 26 | fh, err := os.Create(p.fileName) 27 | if err != nil { 28 | panic("Could not create output file: " + err.Error()) 29 | } 30 | defer fh.Close() 31 | for s := range p.In { 32 | fh.WriteString(s) 33 | } 34 | 35 | flowbase.Debug.Printf("Sending done signal on chan %v now in StringFileWriter ...\n", p.OutDone) 36 | p.OutDone <- &DoneSignal{} 37 | } 38 | 39 | type DoneSignal struct{} 40 | -------------------------------------------------------------------------------- /components/stringprinter.go: -------------------------------------------------------------------------------- 1 | package components 2 | 3 | import "fmt" 4 | 5 | type StringPrinter struct { 6 | In chan string 7 | } 8 | 9 | func NewStringPrinter() *StringPrinter { 10 | return &StringPrinter{ 11 | In: make(chan string, BUFSIZE), 12 | } 13 | } 14 | 15 | func (p *StringPrinter) Run() { 16 | for s := range p.In { 17 | fmt.Print(s) 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /components/tripleaggregator.go: -------------------------------------------------------------------------------- 1 | package components 2 | 3 | import "github.com/knakk/rdf" 4 | 5 | // TripleAggregator aggregates triples by subject into a TripleAggregate object 6 | // per subject, containing all the triples for that subject. 7 | type TripleAggregator struct { 8 | In chan rdf.Triple 9 | Out chan *TripleAggregate 10 | } 11 | 12 | // NewTripleAggregator returns an initialized TripleAggregator process. 13 | func NewTripleAggregator() *TripleAggregator { 14 | return &TripleAggregator{ 15 | In: make(chan rdf.Triple, BUFSIZE), 16 | Out: make(chan *TripleAggregate, BUFSIZE), 17 | } 18 | } 19 | 20 | // Run runs the TripleAggregator process. 21 | func (p *TripleAggregator) Run() { 22 | defer close(p.Out) 23 | resourceIndex := make(map[rdf.Subject][]rdf.Triple) 24 | for triple := range p.In { 25 | resourceIndex[triple.Subj] = append(resourceIndex[triple.Subj], triple) 26 | } 27 | for subj, triples := range resourceIndex { 28 | tripleAggregate := NewTripleAggregate(subj, triples) 29 | p.Out <- tripleAggregate 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /components/tripleaggregator_test.go: -------------------------------------------------------------------------------- 1 | package components 2 | 3 | import ( 4 | "fmt" 5 | "github.com/flowbase/flowbase" 6 | "github.com/knakk/rdf" 7 | "strings" 8 | "testing" 9 | ) 10 | 11 | // TestNewNewAggregateTriplesPerSubject tests NewAggregateTriplesPerSubject 12 | func TestNewTripleAggregator(t *testing.T) { 13 | flowbase.InitLogWarning() 14 | 15 | aggr := NewTripleAggregator() 16 | 17 | if aggr.In == nil { 18 | t.Error("In-port not initialized") 19 | } 20 | if aggr.Out == nil { 21 | t.Error("Out-port not initialized") 22 | } 23 | } 24 | 25 | func TestTripleAggregator(t *testing.T) { 26 | flowbase.InitLogWarning() 27 | 28 | tripleAggregatorTestIndata := ` 29 | "o1" . 30 | "o2" . 31 | "o3" . 32 | "o4" . 33 | "o5" . 34 | "o6" . 35 | ` 36 | 37 | strReader := strings.NewReader(tripleAggregatorTestIndata) 38 | dec := rdf.NewTripleDecoder(strReader, rdf.NTriples) 39 | triples, err := dec.DecodeAll() 40 | if err != nil { 41 | t.Error("Could not decode n-triples test data") 42 | } 43 | 44 | aggregator := NewTripleAggregator() 45 | go func() { 46 | defer close(aggregator.In) 47 | for _, tr := range triples { 48 | aggregator.In <- tr 49 | } 50 | }() 51 | go aggregator.Run() 52 | 53 | aggr1 := <-aggregator.Out 54 | aggr2 := <-aggregator.Out 55 | 56 | if aggr1.Subject.String() == "http://example.org/s2" { 57 | // Swap order of variables 58 | aggr1, aggr2 = aggr2, aggr1 59 | } 60 | 61 | for i, tr := range aggr1.Triples { 62 | j := i + 1 63 | 64 | // subject, predicate, object 65 | s := tr.Subj.String() 66 | p := tr.Pred.String() 67 | o := tr.Obj.String() 68 | 69 | // expected ditto 70 | es := "http://example.org/s1" 71 | ep := fmt.Sprintf("http://example.org/p%d", j) 72 | eo := fmt.Sprintf("o%d", j) 73 | 74 | if s != es { 75 | t.Errorf("Subject in triple %d of first aggregate is wrong (Expected %s, got %s)", j, es, s) 76 | } 77 | if p != ep { 78 | t.Errorf("Subject in triple %d of first aggregate is wrong (Expected %s, got %s)", j, ep, p) 79 | } 80 | if o != eo { 81 | t.Errorf("Subject in triple %d of first aggregate is wrong (Expected %s, got %s)", j, eo, o) 82 | } 83 | } 84 | 85 | if aggr2.Subject.String() != "http://example.org/s2" { 86 | t.Error("Subject of second aggregate is wrong") 87 | } 88 | for i, tr := range aggr2.Triples { 89 | j := i + 4 90 | 91 | // subject, predicate, object 92 | s := tr.Subj.String() 93 | p := tr.Pred.String() 94 | o := tr.Obj.String() 95 | 96 | // expected ditto 97 | es := "http://example.org/s1" 98 | ep := fmt.Sprintf("http://example.org/p%d", j) 99 | eo := fmt.Sprintf("o%d", j) 100 | 101 | if tr.Subj.String() != "http://example.org/s2" { 102 | t.Errorf("Subject in triple %d of second aggregate is wrong (Expected %s, got %s)", j, es, s) 103 | } 104 | if tr.Pred.String() != fmt.Sprintf("http://example.org/p%d", j) { 105 | t.Errorf("Subject in triple %d of second aggregate is wrong (Expected %s, got %s)", j, ep, p) 106 | } 107 | if tr.Obj.String() != fmt.Sprintf("o%d", j) { 108 | t.Errorf("Subject in triple %d of second aggregate is wrong (Expected %s, got %s)", j, eo, o) 109 | } 110 | } 111 | 112 | } 113 | -------------------------------------------------------------------------------- /components/tripleaggrfanout.go: -------------------------------------------------------------------------------- 1 | package components 2 | 3 | type FanOutTripleAggregate struct { 4 | In chan *TripleAggregate 5 | Out map[string](chan *TripleAggregate) 6 | } 7 | 8 | // NewFanOut creates a new FanOut process 9 | func NewFanOutTripleAggregate() *FanOutTripleAggregate { 10 | return &FanOutTripleAggregate{ 11 | In: make(chan *TripleAggregate, BUFSIZE), 12 | Out: make(map[string](chan *TripleAggregate)), 13 | } 14 | } 15 | 16 | // Run runs the FanOut process 17 | func (proc *FanOutTripleAggregate) Run() { 18 | for _, outPort := range proc.Out { 19 | defer close(outPort) 20 | } 21 | 22 | for ft := range proc.In { 23 | for _, outPort := range proc.Out { 24 | outPort <- ft 25 | } 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /components/tripleaggrprinter.go: -------------------------------------------------------------------------------- 1 | package components 2 | 3 | import "fmt" 4 | 5 | type TripleAggregatePrinter struct { 6 | In chan *TripleAggregate 7 | } 8 | 9 | func NewTripleAggregatePrinter() *TripleAggregatePrinter { 10 | return &TripleAggregatePrinter{ 11 | In: make(chan *TripleAggregate, BUFSIZE), 12 | } 13 | } 14 | 15 | func (p *TripleAggregatePrinter) Run() { 16 | for trAggr := range p.In { 17 | fmt.Printf("Subject: %s\nTriples:\n", trAggr.Subject) 18 | for _, tr := range trAggr.Triples { 19 | fmt.Printf("\t<%s> <%s> <%s>\n", tr.Subj.String(), tr.Pred.String(), tr.Obj.String()) 20 | } 21 | fmt.Println() 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /components/tripleparser.go: -------------------------------------------------------------------------------- 1 | package components 2 | 3 | import ( 4 | "io" 5 | "log" 6 | str "strings" 7 | 8 | "github.com/knakk/rdf" 9 | ) 10 | 11 | type TripleParser struct { 12 | In chan string 13 | Out chan rdf.Triple 14 | } 15 | 16 | func NewTripleParser() *TripleParser { 17 | return &TripleParser{ 18 | In: make(chan string, BUFSIZE), 19 | Out: make(chan rdf.Triple, BUFSIZE), 20 | } 21 | } 22 | 23 | func (p *TripleParser) Run() { 24 | defer close(p.Out) 25 | for line := range p.In { 26 | lineReader := str.NewReader(line) 27 | dec := rdf.NewTripleDecoder(lineReader, rdf.Turtle) 28 | for triple, err := dec.Decode(); err != io.EOF; triple, err = dec.Decode() { 29 | if err != nil { 30 | log.Fatal("Could not encode to triple: ", err.Error()) 31 | } else if triple.Subj != nil && triple.Pred != nil && triple.Obj != nil { 32 | p.Out <- triple 33 | } else { 34 | log.Fatal("Something was encoded as nil in the triple:", triple) 35 | } 36 | } 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /components/tripleprinter.go: -------------------------------------------------------------------------------- 1 | package components 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/knakk/rdf" 7 | ) 8 | 9 | type TriplePrinter struct { 10 | In chan rdf.Triple 11 | } 12 | 13 | func NewTriplePrinter() *TriplePrinter { 14 | return &TriplePrinter{ 15 | In: make(chan rdf.Triple, BUFSIZE), 16 | } 17 | } 18 | 19 | func (p *TriplePrinter) Run() { 20 | for tr := range p.In { 21 | fmt.Printf("S: %s\nP: %s\nO: %s\n\n", tr.Subj.String(), tr.Pred.String(), tr.Obj.String()) 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /components/triplestowikipageconv.go: -------------------------------------------------------------------------------- 1 | package components 2 | 3 | import ( 4 | "regexp" 5 | str "strings" 6 | 7 | "github.com/knakk/rdf" 8 | ) 9 | 10 | // Constants etc --------------------------------------------------------------- 11 | 12 | var titleProperties = []string{ 13 | "http://semantic-mediawiki.org/swivt/1.0#page", 14 | "http://www.w3.org/2000/01/rdf-schema#label", 15 | "http://purl.org/dc/elements/1.1/title", 16 | "http://purl.org/dc/terms/title", 17 | "http://www.w3.org/2004/02/skos/core#preferredLabel", 18 | "http://xmlns.com/foaf/0.1/name", 19 | } 20 | 21 | var namespaceAbbreviations = map[string]string{ 22 | "http://www.opentox.org/api/1.1#": "opentox", 23 | } 24 | 25 | var propertyTypes = []string{ 26 | "http://www.w3.org/2002/07/owl#AnnotationProperty", 27 | "http://www.w3.org/2002/07/owl#DatatypeProperty", 28 | "http://www.w3.org/2002/07/owl#ObjectProperty", 29 | } 30 | 31 | var categoryTypes = []string{ 32 | "http://www.w3.org/2002/07/owl#Class", 33 | } 34 | 35 | const ( 36 | typePropertyURI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" 37 | subClassPropertyURI = "http://www.w3.org/2000/01/rdf-schema#subClassOf" 38 | ) 39 | 40 | const ( 41 | dataTypeURIString = "http://www.w3.org/2001/XMLSchema#string" 42 | dataTypeURILangString = "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString" 43 | dataTypeURIInteger = "http://www.w3.org/2001/XMLSchema#integer" 44 | dataTypeURIFloat = "http://www.w3.org/2001/XMLSchema#float" 45 | ) 46 | 47 | const ( 48 | _ = iota 49 | URITypeUndefined 50 | URITypePredicate 51 | URITypeClass 52 | URITypeTemplate 53 | ) 54 | 55 | // Code ----------------------------------------------------------------------- 56 | 57 | // TripleAggregateToWikiPageConverter takes *TripleAggregate's and converts 58 | // them into a *WikiPage which can be used to generate wiki text content. 59 | type TripleAggregateToWikiPageConverter struct { 60 | InAggregate chan *TripleAggregate 61 | InIndex chan *map[string]*TripleAggregate 62 | OutPage chan *WikiPage 63 | cleanUpRegexes []*regexp.Regexp 64 | } 65 | 66 | func NewTripleAggregateToWikiPageConverter() *TripleAggregateToWikiPageConverter { 67 | return &TripleAggregateToWikiPageConverter{ 68 | InAggregate: make(chan *TripleAggregate, BUFSIZE), 69 | InIndex: make(chan *map[string]*TripleAggregate, BUFSIZE), 70 | OutPage: make(chan *WikiPage, BUFSIZE), 71 | cleanUpRegexes: []*regexp.Regexp{ 72 | regexp.MustCompile(" [(][^)]*:[^)]*[)]"), 73 | regexp.MustCompile(" [[][^]]*:[^]]*[]]"), 74 | }, 75 | } 76 | } 77 | 78 | func (p *TripleAggregateToWikiPageConverter) Run() { 79 | defer close(p.OutPage) 80 | 81 | predPageIndex := make(map[string]*WikiPage) 82 | 83 | resourceIndex := <-p.InIndex 84 | 85 | for aggr := range p.InAggregate { 86 | pageType := p.determineType(aggr) 87 | 88 | pageTitle, _ := p.convertUriToWikiTitle(aggr.SubjectStr, pageType, resourceIndex) 89 | 90 | page := NewWikiPage(pageTitle, []*Fact{}, []*Category{}, nil, pageType) 91 | 92 | topSuperCatsCnt := 0 93 | for _, tr := range aggr.Triples { 94 | 95 | predTitle, propertyStr := p.convertUriToWikiTitle(tr.Pred.String(), URITypePredicate, resourceIndex) // Here we know it is a predicate, simply because its location in a triple 96 | 97 | // Make sure property page exists 98 | if predPageIndex[predTitle] == nil { 99 | predPageIndex[predTitle] = NewWikiPage(predTitle, []*Fact{}, []*Category{}, nil, URITypePredicate) 100 | } 101 | 102 | var valueStr string 103 | 104 | if tr.Obj.Type() == rdf.TermIRI { 105 | 106 | valueAggr := (*resourceIndex)[tr.Obj.String()] 107 | valueUriType := p.determineType(valueAggr) 108 | _, valueStr = p.convertUriToWikiTitle(tr.Obj.String(), valueUriType, resourceIndex) 109 | 110 | predPageIndex[predTitle].AddFactUnique(NewFact("Has type", "Page")) 111 | 112 | } else if tr.Obj.Type() == rdf.TermLiteral { 113 | 114 | valueStr = tr.Obj.String() 115 | 116 | for _, r := range p.cleanUpRegexes { 117 | valueStr = r.ReplaceAllString(valueStr, "") 118 | } 119 | 120 | dataTypeStr := tr.Obj.(rdf.Literal).DataType.String() 121 | 122 | // Add type info on the current property's page 123 | switch dataTypeStr { 124 | case dataTypeURIString: 125 | predPageIndex[predTitle].AddFactUnique(NewFact("Has type", "Text")) 126 | case dataTypeURILangString: 127 | predPageIndex[predTitle].AddFactUnique(NewFact("Has type", "Text")) 128 | case dataTypeURIInteger: 129 | predPageIndex[predTitle].AddFactUnique(NewFact("Has type", "Number")) 130 | case dataTypeURIFloat: 131 | predPageIndex[predTitle].AddFactUnique(NewFact("Has type", "Number")) 132 | } 133 | } 134 | 135 | if tr.Pred.String() == typePropertyURI || tr.Pred.String() == subClassPropertyURI { 136 | page.AddCategoryUnique(NewCategory(valueStr)) 137 | superCatsCnt := p.countSuperCategories(tr, resourceIndex) 138 | if superCatsCnt > topSuperCatsCnt { 139 | topSuperCatsCnt = superCatsCnt 140 | page.SpecificCategory = NewCategory(valueStr) 141 | //println("Page:", page.Title, " | Adding cat", valueStr, "since has", superCatsCnt, "super categories.") 142 | } 143 | } else { 144 | page.AddFactUnique(NewFact(propertyStr, valueStr)) 145 | } 146 | } 147 | 148 | // Add Equivalent URI fact 149 | equivURIFact := NewFact("Equivalent URI", aggr.Subject.String()) 150 | page.AddFactUnique(equivURIFact) 151 | 152 | // Don't send predicates just yet (we want to gather facts about them, 153 | // and send at the end) ... 154 | if pageType == URITypePredicate { 155 | if predPageIndex[page.Title] != nil { 156 | // Add facts and categories to existing page 157 | for _, fact := range page.Facts { 158 | predPageIndex[page.Title].AddFactUnique(fact) 159 | } 160 | for _, cat := range page.Categories { 161 | predPageIndex[page.Title].AddCategoryUnique(cat) 162 | } 163 | } else { 164 | // If page does not exist, use the newly created one 165 | predPageIndex[page.Title] = page 166 | } 167 | } else { 168 | p.OutPage <- page 169 | } 170 | } 171 | 172 | for _, predPage := range predPageIndex { 173 | p.OutPage <- predPage 174 | } 175 | } 176 | 177 | func (p *TripleAggregateToWikiPageConverter) determineType(uriAggr *TripleAggregate) int { 178 | if uriAggr != nil { 179 | if uriAggr.Triples != nil { 180 | for _, tr := range uriAggr.Triples { 181 | for _, propType := range propertyTypes { 182 | if tr.Pred.String() == typePropertyURI && tr.Obj.String() == propType { 183 | return URITypePredicate 184 | } 185 | } 186 | for _, catType := range categoryTypes { 187 | if tr.Pred.String() == typePropertyURI && tr.Obj.String() == catType { 188 | return URITypeClass 189 | } 190 | } 191 | } 192 | } 193 | } 194 | return URITypeUndefined 195 | } 196 | 197 | // For properties, the factTitle and pageTitle will be different (The page 198 | // title including the "Property:" prefix), while for normal pages, they will 199 | // be the same. 200 | func (p *TripleAggregateToWikiPageConverter) convertUriToWikiTitle(uri string, uriType int, resourceIndex *map[string]*TripleAggregate) (pageTitle string, factTitle string) { 201 | 202 | aggr := (*resourceIndex)[uri] 203 | 204 | // Conversion strategies: 205 | // 1. Existing wiki title (in wiki, or cache) 206 | // 2. Use configured title-deciding properties 207 | if aggr != nil { 208 | factTitle = p.findTitleInTriples(aggr.Triples) 209 | } 210 | 211 | // 3. Shorten URI namespace to alias (e.g. http://purl.org/dc -> dc:) 212 | // (Does this apply for properties only?) 213 | 214 | // 4. Remove namespace, keep only local part of URL (Split on '/' or '#') 215 | if factTitle == "" { 216 | bits := str.Split(uri, "#") 217 | lastBit := bits[len(bits)-1] 218 | bits = str.Split(lastBit, "/") 219 | lastBit = bits[len(bits)-1] 220 | factTitle = lastBit 221 | } 222 | 223 | // Clean up strange characters 224 | factTitle = str.Replace(factTitle, "[", "(", -1) 225 | factTitle = str.Replace(factTitle, "]", ")", -1) 226 | factTitle = str.Replace(factTitle, "{", "(", -1) 227 | factTitle = str.Replace(factTitle, "}", ")", -1) 228 | factTitle = str.Replace(factTitle, "|", " ", -1) 229 | factTitle = str.Replace(factTitle, "#", " ", -1) 230 | factTitle = str.Replace(factTitle, "<", "less than", -1) 231 | factTitle = str.Replace(factTitle, ">", "greater than", -1) 232 | factTitle = str.Replace(factTitle, "?", " ", -1) 233 | factTitle = str.Replace(factTitle, "&", " ", -1) 234 | factTitle = str.Replace(factTitle, ",", " ", -1) // Can't allow comma's as we use it as a separator in template variables 235 | factTitle = str.Replace(factTitle, ".", " ", -1) 236 | factTitle = str.Replace(factTitle, "=", "-", -1) 237 | 238 | // Clean up according to regexes 239 | for _, r := range p.cleanUpRegexes { 240 | factTitle = r.ReplaceAllString(factTitle, "") 241 | } 242 | 243 | // Limit to max 255 chars (due to MediaWiki limitation) 244 | titleIsShortened := false 245 | for len(factTitle) >= 250 { 246 | factTitle = removeLastWord(factTitle) 247 | titleIsShortened = true 248 | } 249 | 250 | if titleIsShortened { 251 | factTitle += " ..." 252 | } 253 | 254 | factTitle = p.upperCaseFirst(factTitle) 255 | 256 | if uriType == URITypePredicate { 257 | pageTitle = "Property:" + factTitle 258 | } else if uriType == URITypeClass { 259 | pageTitle = "Category:" + factTitle 260 | } else { 261 | pageTitle = factTitle 262 | } 263 | 264 | return pageTitle, factTitle 265 | } 266 | 267 | func (p *TripleAggregateToWikiPageConverter) findTitleInTriples(triples []rdf.Triple) string { 268 | for _, titleProp := range titleProperties { 269 | for _, tr := range triples { 270 | if tr.Pred.String() == titleProp { 271 | return tr.Obj.String() 272 | } 273 | } 274 | } 275 | return "" 276 | } 277 | 278 | func (p *TripleAggregateToWikiPageConverter) countSuperCategories(tr rdf.Triple, ri *map[string]*TripleAggregate) int { 279 | catPage := (*ri)[tr.Obj.String()] 280 | topSuperCatsCnt := 0 281 | if catPage != nil { 282 | for _, subTr := range catPage.Triples { 283 | if subTr.Pred.String() == typePropertyURI || subTr.Pred.String() == subClassPropertyURI { 284 | superCatsCnt := p.countSuperCategories(subTr, ri) + 1 285 | if superCatsCnt > topSuperCatsCnt { 286 | topSuperCatsCnt = superCatsCnt 287 | } 288 | } 289 | } 290 | } 291 | return topSuperCatsCnt 292 | } 293 | 294 | func (p *TripleAggregateToWikiPageConverter) upperCaseFirst(inStr string) string { 295 | var outStr string 296 | if inStr != "" { 297 | outStr = str.ToUpper(inStr[0:1]) + inStr[1:] 298 | } 299 | return outStr 300 | } 301 | 302 | func removeLastWord(inStr string) string { 303 | bits := str.Split(inStr, " ") 304 | outStr := str.Join(append(bits[:len(bits)-1]), " ") 305 | return outStr 306 | } 307 | -------------------------------------------------------------------------------- /components/triplestowikipageconv_test.go: -------------------------------------------------------------------------------- 1 | package components 2 | 3 | import ( 4 | "github.com/flowbase/flowbase" 5 | "testing" 6 | ) 7 | 8 | // TestNewTripleAggregateToWikiPageConverter tests NewTripleAggregateToWikiPageConverter() 9 | func TestNewTripleAggregateToWikiPageConverter(t *testing.T) { 10 | flowbase.InitLogDebug() 11 | 12 | mxc := NewTripleAggregateToWikiPageConverter() 13 | 14 | if mxc.InAggregate == nil { 15 | t.Error("InAggregate is not initialized") 16 | } 17 | if mxc.InIndex == nil { 18 | t.Error("InIndex is not initialized") 19 | } 20 | if mxc.OutPage == nil { 21 | t.Error("OutPage is not initialized") 22 | } 23 | if mxc.cleanUpRegexes == nil { 24 | t.Error("cleanUpRegexes is not initialized") 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /components/ttlfilereader.go: -------------------------------------------------------------------------------- 1 | package components 2 | 3 | import ( 4 | "io" 5 | "log" 6 | 7 | "github.com/flowbase/flowbase" 8 | "github.com/knakk/rdf" 9 | "github.com/spf13/afero" 10 | ) 11 | 12 | // TurtleFileReader is a process that reads turtle files (Files in the turtle 13 | // RDF format), based on file names it receives on the FileReader.InFileName 14 | // port / channel, and writes out the output line by line as strings on the 15 | // FileReader.OutLine port / channel. 16 | type TurtleFileReader struct { 17 | InFileName chan string 18 | OutTriple chan rdf.Triple 19 | fs afero.Fs 20 | } 21 | 22 | // NewOsTurtleFileReader returns an initialized TurtleFileReader, with an OS 23 | // (normal) file system 24 | func NewOsTurtleFileReader() *TurtleFileReader { 25 | return NewTurtleFileReader(afero.NewOsFs()) 26 | } 27 | 28 | // NewTurtleFileReader returns an initialized TurtleFileReader, initialized 29 | // with the afero file system provided provided as an argument 30 | func NewTurtleFileReader(fileSystem afero.Fs) *TurtleFileReader { 31 | return &TurtleFileReader{ 32 | InFileName: make(chan string, BUFSIZE), 33 | OutTriple: make(chan rdf.Triple, BUFSIZE), 34 | fs: fileSystem, 35 | } 36 | } 37 | 38 | // Run runs the TurtleFileReader process. It does not spawn a separate 39 | // go-routine, so you have to prepend the go keyword when calling it, in order 40 | // to have it run in a separate go-routine. 41 | func (p *TurtleFileReader) Run() { 42 | defer close(p.OutTriple) 43 | 44 | flowbase.Debug.Println("Starting loop") 45 | for fileName := range p.InFileName { 46 | flowbase.Debug.Printf("Starting processing file %s\n", fileName) 47 | fh, err := p.fs.Open(fileName) 48 | if err != nil { 49 | log.Fatal(err) 50 | } 51 | defer fh.Close() 52 | 53 | dec := rdf.NewTripleDecoder(fh, rdf.Turtle) 54 | for triple, err := dec.Decode(); err != io.EOF; triple, err = dec.Decode() { 55 | if err != nil { 56 | log.Fatal("Could not encode to triple: ", err.Error()) 57 | } else if triple.Subj != nil && triple.Pred != nil && triple.Obj != nil { 58 | p.OutTriple <- triple 59 | } else { 60 | log.Fatal("Something was encoded as nil in the triple:", triple) 61 | } 62 | } 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /components/ttlfilereader_test.go: -------------------------------------------------------------------------------- 1 | package components 2 | 3 | import ( 4 | "github.com/flowbase/flowbase" 5 | "github.com/spf13/afero" 6 | "testing" 7 | ) 8 | 9 | // TestNewOSFileReader tests NewOSTurtleFileReader 10 | func TestNewTurtleFileReader(t *testing.T) { 11 | flowbase.InitLogWarning() 12 | 13 | fr := NewOsTurtleFileReader() 14 | if fr.InFileName == nil { 15 | t.Error("In-port InFileName not initialized in New FileReader") 16 | } 17 | if fr.OutTriple == nil { 18 | t.Error("In-port InFileName not initialized in New FileReader") 19 | } 20 | 21 | go func() { 22 | fr.InFileName <- "teststring" 23 | }() 24 | teststr1 := <-fr.InFileName 25 | if teststr1 != "teststring" { 26 | t.Error("In-port InFileName is not a string channel") 27 | fr.InFileName <- "teststring" 28 | } 29 | } 30 | 31 | // Tests the main behavior of the TurtleFileReader process 32 | func TestTurtleFileReader(t *testing.T) { 33 | flowbase.InitLogWarning() 34 | 35 | s1 := "http://example.org/s1" 36 | p1 := "http://example.org/p1" 37 | o1 := "string1" 38 | s2 := "http://example.org/p2" 39 | p2 := "http://example.org/p2" 40 | o2 := "string2" 41 | triple1 := "<" + s1 + "> <" + p1 + "> \"" + o1 + "\" ." 42 | triple2 := "<" + s2 + "> <" + p2 + "> \"" + o2 + "\" ." 43 | testContent := triple1 + "\n" + triple2 44 | 45 | fs := afero.NewMemMapFs() 46 | 47 | testFileName := "testfile.ttl" 48 | f, err := fs.Create(testFileName) 49 | if err != nil { 50 | t.Errorf("Could not create file %s in memory file system", testFileName) 51 | } 52 | f.WriteString(testContent) 53 | f.Close() 54 | 55 | fr := NewTurtleFileReader(fs) 56 | go func() { 57 | defer close(fr.InFileName) 58 | fr.InFileName <- testFileName 59 | }() 60 | 61 | go fr.Run() 62 | 63 | outTriple1 := <-fr.OutTriple 64 | outTriple2 := <-fr.OutTriple 65 | 66 | if outTriple1.Subj.String() != s1 { 67 | t.Error("Subject of first triple is wrong") 68 | } 69 | if outTriple1.Pred.String() != p1 { 70 | t.Error("Predicate of first triple is wrong") 71 | } 72 | if outTriple1.Obj.String() != o1 { 73 | t.Error("Object of first triple is wrong") 74 | } 75 | if outTriple2.Subj.String() != s2 { 76 | t.Error("Subject of second triple is wrong") 77 | } 78 | if outTriple2.Pred.String() != p2 { 79 | t.Error("Predicate of second triple is wrong") 80 | } 81 | if outTriple2.Obj.String() != o2 { 82 | t.Error("Object of second triple is wrong") 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /components/wikipageprinter.go: -------------------------------------------------------------------------------- 1 | package components 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/flowbase/flowbase" 7 | ) 8 | 9 | type WikiPagePrinter struct { 10 | In chan *WikiPage 11 | } 12 | 13 | func NewWikiPagePrinter() *WikiPagePrinter { 14 | return &WikiPagePrinter{ 15 | In: make(chan *WikiPage, flowbase.BUFSIZE), 16 | } 17 | } 18 | 19 | func (p *WikiPagePrinter) Run() { 20 | for page := range p.In { 21 | fmt.Println("Title:", page.Title) 22 | for _, fact := range page.Facts { 23 | fmt.Print(fact.asWikiFact()) 24 | } 25 | for _, cat := range page.Categories { 26 | fmt.Print(cat.asWikiString()) 27 | } 28 | fmt.Println("") // Print an empty line 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/rdfio/rdf2smw 2 | 3 | go 1.23.0 4 | 5 | toolchain go1.24.0 6 | 7 | require ( 8 | github.com/flowbase/flowbase v0.1.0 9 | github.com/knakk/rdf v0.0.0-20190304171630-8521bf4c5042 10 | github.com/spf13/afero v1.14.0 11 | ) 12 | 13 | require ( 14 | github.com/stretchr/testify v1.10.0 // indirect 15 | golang.org/x/text v0.23.0 // indirect 16 | ) 17 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 2 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 3 | github.com/flowbase/flowbase v0.1.0 h1:/bdKbxZc1P+Kdh+ro03uky+2bqlO3KWkghMDJoVkT/Q= 4 | github.com/flowbase/flowbase v0.1.0/go.mod h1:Yq3H0kx4JWEumGeyYXjEzDiCxyh0RO00DSDXUupOzo4= 5 | github.com/knakk/rdf v0.0.0-20190304171630-8521bf4c5042 h1:Vzdm5hdlLdpJOKK+hKtkV5u7xGZmNW6aUBjGcTfwx84= 6 | github.com/knakk/rdf v0.0.0-20190304171630-8521bf4c5042/go.mod h1:fYE0718xXI13XMYLc6iHtvXudfyCGMsZ9hxSM1Ommpg= 7 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 8 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 9 | github.com/spf13/afero v1.14.0 h1:9tH6MapGnn/j0eb0yIXiLjERO8RB6xIVZRDCX7PtqWA= 10 | github.com/spf13/afero v1.14.0/go.mod h1:acJQ8t0ohCGuMN3O+Pv0V0hgMxNYDlvdk+VTfyZmbYo= 11 | github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= 12 | github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= 13 | golang.org/x/text v0.23.0 h1:D71I7dUrlY+VX0gQShAThNGHFxZ13dGLBHQLVl1mJlY= 14 | golang.org/x/text v0.23.0/go.mod h1:/BLNzu4aZCJ1+kcD0DNRotWKage4q2rGVAg4o22unh4= 15 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 16 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 17 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | /* 2 | rdf2smw is a commandline tool to convert from RDF data to MediaWiki XML Dump 3 | files, for import using MediaWiki's built in importDump.php script. 4 | 5 | Usage 6 | 7 | ./rdf2smw -in -out 8 | 9 | Flags 10 | 11 | -in Input file in RDF N-triples format 12 | -out Output file in (MediaWiki) XML format 13 | 14 | Example usage 15 | 16 | ./rdf2smw -in mydata.nt -out mydata.xml 17 | 18 | For importing the generated XML Dumps into MediaWiki, see this page: 19 | https://www.mediawiki.org/wiki/Manual:Importing_XML_dumps 20 | */ 21 | package main 22 | 23 | import ( 24 | "flag" 25 | "fmt" 26 | "github.com/rdfio/rdf2smw/components" 27 | "os" 28 | 29 | str "strings" 30 | 31 | "github.com/flowbase/flowbase" 32 | ) 33 | 34 | const ( 35 | BUFSIZE = 16 36 | ) 37 | 38 | func main() { 39 | //flowbase.InitLogDebug() 40 | 41 | inFileName := flag.String("in", "", "The input file name") 42 | outFileName := flag.String("out", "", "The output file name") 43 | flag.Parse() 44 | 45 | doExit := false 46 | if *inFileName == "" { 47 | fmt.Println("No filename specified to --in") 48 | doExit = true 49 | } else if *outFileName == "" { 50 | fmt.Println("No filename specified to --out") 51 | doExit = true 52 | } 53 | 54 | if doExit { 55 | os.Exit(1) 56 | } 57 | 58 | // ------------------------------------------ 59 | // Initialize processes 60 | // ------------------------------------------ 61 | 62 | // Create a pipeline runner 63 | net := flowbase.NewNet() 64 | 65 | // Read in-file 66 | ttlFileRead := components.NewOsTurtleFileReader() 67 | net.AddProcess(ttlFileRead) 68 | 69 | // TripleAggregator 70 | aggregator := components.NewTripleAggregator() 71 | net.AddProcess(aggregator) 72 | 73 | // Create an subject-indexed "index" of all triples 74 | indexCreator := components.NewResourceIndexCreator() 75 | net.AddProcess(indexCreator) 76 | 77 | // Fan-out the triple index to the converter and serializer 78 | indexFanOut := components.NewResourceIndexFanOut() 79 | net.AddProcess(indexFanOut) 80 | 81 | // Serialize the index back to individual subject-tripleaggregates 82 | indexToAggr := components.NewResourceIndexToTripleAggregates() 83 | net.AddProcess(indexToAggr) 84 | 85 | // Convert TripleAggregate to WikiPage 86 | triplesToWikiConverter := components.NewTripleAggregateToWikiPageConverter() 87 | net.AddProcess(triplesToWikiConverter) 88 | 89 | //categoryFilterer := components.NewCategoryFilterer([]string{"DataEntry"}) 90 | //net.AddProcess(categoryFilterer) 91 | 92 | // Pretty-print wiki page data 93 | //wikiPagePrinter := components.NewWikiPagePrinter() 94 | //net.AddProcess(wikiPagePrinter) 95 | 96 | useTemplates := true 97 | xmlCreator := components.NewMWXMLCreator(useTemplates) 98 | net.AddProcess(xmlCreator) 99 | 100 | //printer := components.NewStringPrinter() 101 | //net.AddProcess(printer) 102 | templateWriter := components.NewStringFileWriter(str.Replace(*outFileName, ".xml", "_templates.xml", 1)) 103 | net.AddProcess(templateWriter) 104 | 105 | propertyWriter := components.NewStringFileWriter(str.Replace(*outFileName, ".xml", "_properties.xml", 1)) 106 | net.AddProcess(propertyWriter) 107 | 108 | pageWriter := components.NewStringFileWriter(*outFileName) 109 | net.AddProcess(pageWriter) 110 | 111 | snk := flowbase.NewSink() 112 | net.AddProcess(snk) 113 | 114 | // ------------------------------------------ 115 | // Connect network 116 | // ------------------------------------------ 117 | 118 | ttlFileRead.OutTriple = aggregator.In 119 | 120 | aggregator.Out = indexCreator.In 121 | 122 | indexCreator.Out = indexFanOut.In 123 | indexFanOut.Out["serialize"] = indexToAggr.In 124 | indexFanOut.Out["conv"] = triplesToWikiConverter.InIndex 125 | 126 | indexToAggr.Out = triplesToWikiConverter.InAggregate 127 | 128 | //triplesToWikiConverter.OutPage = categoryFilterer.In 129 | //categoryFilterer.Out = xmlCreator.InWikiPage 130 | 131 | triplesToWikiConverter.OutPage = xmlCreator.InWikiPage 132 | 133 | xmlCreator.OutTemplates = templateWriter.In 134 | xmlCreator.OutProperties = propertyWriter.In 135 | xmlCreator.OutPages = pageWriter.In 136 | 137 | snk.Connect(templateWriter.OutDone) 138 | snk.Connect(propertyWriter.OutDone) 139 | snk.Connect(pageWriter.OutDone) 140 | 141 | // ------------------------------------------ 142 | // Send in-data and run 143 | // ------------------------------------------ 144 | 145 | go func() { 146 | defer close(ttlFileRead.InFileName) 147 | ttlFileRead.InFileName <- *inFileName 148 | }() 149 | 150 | net.Run() 151 | } 152 | -------------------------------------------------------------------------------- /testcov.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -ex 3 | for pkg in "github.com/rdfio/rdf2smw" "github.com/rdfio/rdf2smw/components"; do 4 | touch profile_tmp.cov 5 | go test -v -covermode=count -coverprofile=profile_tmp.cov $pkg || ERROR="Error testing $pkg" 6 | tail -n +2 profile_tmp.cov >> cover.out || exit "Unable to append coverage for $pkg" 7 | done 8 | --------------------------------------------------------------------------------