├── README.md └── rdf2rdf.go /README.md: -------------------------------------------------------------------------------- 1 | ## rdf2rdf 2 | CLI tool to convert between different RDF serialization formats. 3 | 4 | Primarly made to test and showcase the capabilites of the [rdf package](https://github.com/knakk/rdf). 5 | 6 | ## Status 7 | 8 | Currently supported input formats: RDF/XML, N-Triples, N-Quads, Turtle. 9 | 10 | Currently supported output formats: N-Triples, Turtle. 11 | 12 | More formats are coming soon. 13 | 14 | ## Installation 15 | Install as you would any other Go package: 16 | 17 | go get -u github.com/knakk/rdf2rdf 18 | 19 | Provided that `GOPATH/bin` is on your `PATH`, you're good to go. 20 | 21 | When the tool has proven stable and complete, I can provide binaries for the most common OS'es for download. 22 | 23 | ## Usage 24 |
25 | rdf2rdf 26 | ------- 27 | Convert between different RDF serialization formats. 28 | 29 | Usage: 30 | rdf2rdf -in=input.xml -out=output.ttl 31 | 32 | Options: 33 | -h --help Show this message. 34 | -in Input file. 35 | -out Output file. 36 | -stream=true Streaming mode. 37 | -v=false Verbose mode (shows progress indicator) 38 | 39 | By default the converter is streaming both input and output, emitting 40 | converted triples/quads as soon as they are available. This ensures you can 41 | convert huge files with minimum memory footprint. However, if you have 42 | small datasets you can choose to load all data into memory before conversion. 43 | This makes it possible to sort the data, remove duplicate triples, and 44 | potentially generate more compact Turtle serializations, maximizing predicate 45 | and object lists. Do this by setting the flag stream=false. 46 | 47 | Conversion from a quad-format to a triple-format will disregard the triple's 48 | context (graph). Conversion from a triple-format to a quad-format is not 49 | supported. 50 | 51 | Input and ouput formats are determined by file extensions, according to 52 | the following table: 53 | 54 | Format | File extension 55 | ----------|------------------- 56 | N-Triples | .nt 57 | N-Quads | .nq 58 | RDF/XML | .rdf .rdfxml .xml 59 | Turtle | .ttl 60 | 61 |-------------------------------------------------------------------------------- /rdf2rdf.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "fmt" 6 | "io" 7 | "log" 8 | "os" 9 | "time" 10 | "unicode/utf8" 11 | 12 | "github.com/knakk/rdf" 13 | "github.com/mitchellh/ioprogress" 14 | ) 15 | 16 | var usage = `rdf2rdf 17 | ------- 18 | Convert between different RDF serialization formats. 19 | 20 | Usage: 21 | rdf2rdf -in=input.xml -out=output.ttl 22 | 23 | Options: 24 | -h --help Show this message. 25 | -in Input file. 26 | -out Output file. 27 | -stream=true Streaming mode. 28 | -v=false Verbose mode (shows progress indicator) 29 | 30 | By default the converter is streaming both input and output, emitting 31 | converted triples/quads as soon as they are available. This ensures you can 32 | convert huge files with minimum memory footprint. However, if you have 33 | small datasets you can choose to load all data into memory before conversion. 34 | This makes it possible to sort the data, remove duplicate triples, and 35 | potentially generate more compact Turtle serializations, maximizing predicate 36 | and object lists. Do this by setting the flag stream=false. 37 | 38 | Conversion from a quad-format to a triple-format will disregard the triple's 39 | context (graph). Conversion from a triple-format to a quad-format is not 40 | supported. 41 | 42 | Input and ouput formats are determined by file extensions, according to 43 | the following table: 44 | 45 | Format | File extension 46 | ----------|------------------- 47 | N-Triples | .nt 48 | N-Quads | .nq 49 | RDF/XML | .rdf .rdfxml .xml 50 | Turtle | .ttl 51 | 52 | ` 53 | 54 | func main() { 55 | log.SetFlags(0) 56 | log.SetPrefix("ERROR: ") 57 | flag.Usage = func() { 58 | fmt.Fprintf(os.Stderr, usage) 59 | } 60 | input := flag.String("in", "", "Input file") 61 | output := flag.String("out", "", "Output file") 62 | verbose := flag.Bool("v", false, "Verbose mode") 63 | stream := flag.Bool("stream", true, "Streaming mode") 64 | flag.Parse() 65 | 66 | if *input == "" || *output == "" { 67 | fmt.Println("Usage:") 68 | flag.PrintDefaults() 69 | os.Exit(1) 70 | } 71 | 72 | inFile, err := os.Open(*input) 73 | if err != nil { 74 | log.Fatal(err) 75 | } 76 | defer inFile.Close() 77 | 78 | stat, err := inFile.Stat() 79 | if err != nil { 80 | log.Fatal(err) 81 | } 82 | 83 | var inFileRdr io.Reader 84 | if *verbose { 85 | inFileRdr = &ioprogress.Reader{ 86 | Reader: inFile, 87 | Size: stat.Size(), 88 | DrawInterval: time.Microsecond, 89 | DrawFunc: ioprogress.DrawTerminalf(os.Stdout, func(p, t int64) string { 90 | return ioprogress.DrawTextFormatBytes(p, t) 91 | }), 92 | } 93 | } else { 94 | inFileRdr = inFile 95 | } 96 | 97 | outFile, err := os.Create(*output) 98 | if err != nil { 99 | log.Fatal(err) 100 | } 101 | defer outFile.Close() 102 | 103 | inExt := fileExtension(*input) 104 | outExt := fileExtension(*output) 105 | 106 | if inExt == outExt { 107 | log.Fatal("No conversion necessary. Input and output formats are identical.") 108 | } 109 | 110 | var inFormat, outFormat rdf.Format 111 | 112 | switch inExt { 113 | case "nt": 114 | inFormat = rdf.NTriples 115 | case "nq": 116 | inFormat = rdf.NQuads 117 | case "ttl": 118 | inFormat = rdf.Turtle 119 | case "xml", "rdf", "rdfxml": 120 | inFormat = rdf.RDFXML 121 | case "": 122 | log.Fatal("Unknown file format. No file extension on input file.") 123 | default: 124 | log.Fatalf("Unsupported file exension on input file: %s", inFile.Name()) 125 | } 126 | 127 | switch outExt { 128 | case "nt": 129 | outFormat = rdf.NTriples 130 | case "nq": 131 | // No other quad-formats supported ATM 132 | log.Fatal("Serializing to N-Quads currently not supported.") 133 | case "ttl": 134 | outFormat = rdf.Turtle 135 | case "": 136 | log.Fatal("Unknown file format. No file extension on output file.") 137 | default: 138 | log.Fatalf("Unsupported file exension on output file: %s", outFile.Name()) 139 | } 140 | 141 | t0 := time.Now() 142 | n := tripleToTriple(inFileRdr, outFile, inFormat, outFormat, *stream) 143 | if *verbose { 144 | fmt.Printf("Done. Converted %d triples in %v.\n", n, time.Now().Sub(t0)) 145 | } 146 | } 147 | 148 | func tripleToTriple(inFile io.Reader, outFile io.Writer, inFormat, outFormat rdf.Format, stream bool) int { 149 | dec := rdf.NewTripleDecoder(inFile, inFormat) 150 | // TODO set base to file name? 151 | enc := rdf.NewTripleEncoder(outFile, outFormat) 152 | 153 | i := 0 154 | if stream { 155 | for t, err := dec.Decode(); err != io.EOF; t, err = dec.Decode() { 156 | if err != nil { 157 | log.Fatal(err) 158 | } 159 | err = enc.Encode(t) 160 | if err != nil { 161 | log.Fatal(err) 162 | } 163 | i++ 164 | } 165 | } else { 166 | tr, err := dec.DecodeAll() 167 | if err != nil { 168 | log.Fatal(err) 169 | } 170 | err = enc.EncodeAll(tr) 171 | if err != nil { 172 | log.Fatal(err) 173 | } 174 | i = len(tr) 175 | } 176 | err := enc.Close() 177 | if err != nil { 178 | log.Fatal(err) 179 | } 180 | return i 181 | } 182 | 183 | func fileExtension(s string) string { 184 | i := len(s) 185 | for i > 0 { 186 | r, w := utf8.DecodeLastRuneInString(s[0:i]) 187 | if r == '.' { 188 | return s[i:len(s)] 189 | } 190 | i -= w 191 | } 192 | return "not found" 193 | } 194 | --------------------------------------------------------------------------------