├── .gitignore ├── LICENSE ├── MUSICBRAINZ_README.md ├── README.old.md ├── example-export.py ├── examples ├── cars │ └── dumps │ │ ├── cars.csv │ │ └── manufacturers.csv └── musicbrainz │ ├── musicbrains_gist.adoc │ ├── musicbrainz2neo4j.sql │ └── queries.cyp ├── musicbrainz2neo4j-export.py ├── musicbrainz_schema.py ├── run_batchimport.py ├── sql2graph.conf.default └── sql2graph ├── __init__.py ├── export.py ├── export2.py ├── graph.py ├── schema.py └── schema2.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[cod] 2 | 3 | # C extensions 4 | *.so 5 | 6 | # Packages 7 | *.egg 8 | *.egg-info 9 | dist 10 | build 11 | eggs 12 | parts 13 | bin 14 | var 15 | sdist 16 | develop-eggs 17 | .installed.cfg 18 | lib 19 | lib64 20 | 21 | # Installer logs 22 | pip-log.txt 23 | 24 | # Unit test / coverage reports 25 | .coverage 26 | .tox 27 | nosetests.xml 28 | 29 | # Translations 30 | *.mo 31 | 32 | # Mr Developer 33 | .mr.developer.cfg 34 | .project 35 | .pydevproject 36 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2013 Paul Tremberth, Newlynn Labs 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /MUSICBRAINZ_README.md: -------------------------------------------------------------------------------- 1 | # Exporting the Musicbrainz database into Neo4j 2 | 3 | What you'll need: 4 | 5 | * Python (and psycopg2 (https://pypi.python.org/pypi/psycopg2)) 6 | * Postgresl 9.x 7 | * Lukáš Lalinský's `mbslave` project (https://bitbucket.org/lalinsky/mbslave) 8 | * Neo4j 2.0.x (Milestone 2.0.0-M06 Community edition is ok): http://www.neo4j.org/download 9 | * Michael Hunger's `batch-import` (https://github.com/jexp/batch-import) 10 | 11 | ## Create a mirror of MusicBrainz Postgresl database 12 | 13 | I recommend Lukáš Lalinský's mbslave (https://bitbucket.org/lalinsky/mbslave) 14 | 15 | ### mbslave setup 16 | 17 | ```shell 18 | $ git clone git@bitbucket.org:lalinsky/mbslave.git 19 | $ cd mbslave/ 20 | ``` 21 | 22 | Follow mbslave's `README.md`. 23 | 24 | (For step 2, I usually create a new cluster just for MusicBrainz: 25 | 26 | As root, 27 | 28 | ```shell 29 | # mkdir musicbrainz 30 | # cd musicbrainz/ 31 | # mkdir dbdata 32 | # chown postgres:postgres dbdata 33 | # su postgres 34 | ``` 35 | 36 | As 'postgres' user, 37 | 38 | ```shell 39 | postgres@host$ pg_createcluster -d $PWD/dbdata --locale C -e UTF8 9.1 musicbrainz 40 | postgres@host$ pg_ctlcluster 9.1 musicbrainz start 41 | ``` 42 | 43 | Create a superuser, e.g. "mbadmin" and create an UTF8 database called "musicbrainz" 44 | 45 | ``` 46 | postgres@host$ createuser -p 5433 -P 47 | postgres@host$ createdb -p 5433 -l C -E UTF-8 -T template0 -O mbadmin musicbrainz 48 | ``` 49 | and `cp mbslabe.conf.default mbslave.conf` and adapt to your settings (database name, host and port number, password)) 50 | 51 | Continue following mbslave's instructions (steps 3, 4 and 5): prepare the schema, download dumps and import data 52 | 53 | I usually use only `mbdump.tar.bz2` (it's 1.5GB) 54 | 55 | After step 5, it should be sufficient to only run `./mbslave-remap-schema.py musicbrainz2neo4j.sql 63 | ``` 64 | 65 | And feed this script to `psql` on the musicbrainz database ; you can of course use mbslave for that too: 66 | ``` 67 | $ cd /path/to/mbslave 68 | $ cat /path/to/musicbrainz2neo4j.sql | ./mbslave-psql.py 69 | ``` 70 | 71 | By default, the generated TSV/CSV files are in /tmp: 72 | 73 | ``` 74 | user@host$ ls -la /tmp/musicbrainz__* 75 | -rw-r--r-- 1 postgres postgres 355724444 oct. 28 10:23 /tmp/musicbrainz__nodes__full.csv 76 | -rw-r--r-- 1 postgres postgres 138827549 oct. 28 10:23 /tmp/musicbrainz__rels__full.csv 77 | user@host$ 78 | ``` 79 | 80 | Size may vary based on what entities you choose to export: 81 | you can limit the entities you export using something like `python musicbrainz2neo4j-export.py --limit label,artist,url` 82 | 83 | By default, all core entities in MusicBrainz are exported 84 | 85 | ### Import into Neo4j 86 | 87 | ```shell 88 | $ cd /path/to/sql2graph/ 89 | $ python musicbrainz2neo4j-export.py > musicbrainz2neo4j.sql 90 | 91 | //export just 1000 rows per table 92 | $ python musicbrainz2neo4j-export.py --limit 1000 > musicbrainz2neo4j.sql 93 | $ cd /path/to/mbslave 94 | $ cat /path/to/musicbrainz2neo4j.sql | ./mbslave-psql.py 95 | ``` 96 | 97 | By default, the generated TSV/CSV files are in `/tmp/musicbrainz__nodes__full.csv` and `/tmp/musicbrainz__rels__full.csv`. 98 | 99 | Now, 100 | * use the batch-import project to import the csv files into neo4j, 101 | * make sure you swith to the "20" branch of batch-import (for labels support) 102 | * using a `mb_fulltext` and `mb_exact` index in a custom `./batch.properties` file, 103 | * putting the database under `./musicbrainz.db`. 104 | * (this will erase your current neo4j datastore) 105 | 106 | ```shell 107 | $ cd /path/to/jexp/batch-import 108 | $ git checkout -b neo4j-2.0 origin/20 109 | $ # build batch-import... 110 | $ # prepare a batch.properties file: 111 | $ echo -e "batch_import.node_index.mb_exact=exact\nbatch_import.node_index.mb_fulltext=fulltext" > batch.properties 112 | $ MAVEN_OPTS="-server -Xmx10G -Dfile.encoding=UTF-8" mvn exec:java -Dfile.encoding=UTF-8 -Dexec.mainClass="org.neo4j.batchimport.Importer" -Dexec.args="batch.properties musicbrainz.db /tmp/musicbrainz__nodes__full.csv /tmp/musicbrainz__rels__full.csv" 113 | ``` 114 | 115 | 116 | Finally, restart your Neo4j instance (you had stopped it before running the batch-import, right?) 117 | and play around with MusicBrainz data with the sample queries in examples/musicbrainz/queries.cyp 118 | -------------------------------------------------------------------------------- /README.old.md: -------------------------------------------------------------------------------- 1 | sql2graph 2 | ========= 3 | 4 | sql2graph is a python helper module to export CSV dumps from a relational database 5 | to a format that can be imported into a graph database. 6 | 7 | Currently, only Neo4J batch-import format is supported. 8 | (see https://github.com/jexp/batch-import) 9 | 10 | Instructions for Neo4J batch-import 11 | =================================== 12 | 13 | References: 14 | * https://github.com/jexp/batch-import 15 | * http://maxdemarzi.com/2012/02/28/batch-importer-part-1/ 16 | and http://maxdemarzi.com/2012/02/28/batch-importer-part-2/ 17 | 18 | Prerequisites: 19 | - Neo4J installed (http://www.neo4j.org/download) 20 | - batch-import installed and built (https://github.com/jexp/batch-import) 21 | 22 | 1) Export your SQL tables data as TAB-delimited CSV with fieldnames as first row. 23 | For example, using Postgresql, running something like this should work: 24 | 25 | COPY (SELECT * FROM mytable) TO stdout CSV HEADER DELIMITER E'\t'; 26 | 27 | You can then redirect the output to a file from the psql command line, 28 | or you can replace "stdout" above by a file path directly 29 | 30 | 2) Copy and rename sql2graph.conf.default to sql2graph.conf (or something else) 31 | and edit it. 32 | Set the dump filenames for all entities you want to import 33 | Optionally, define some index files. 34 | 35 | 3) Copy and rename example-export.py script 36 | and define the schema for the Nodes and their relations. 37 | If you want to index some fields for your entities, don't forget to add 38 | "index=True" on them 39 | See examples folder. 40 | 41 | Then run: 42 | 43 | python xyz-export.py --config sql2graph.conf -N nodes.csv -R relations.csv 44 | 45 | 4) You should now have: 46 | - a CSV file representing all nodes for all entities 47 | (nodes.csv if you ran the command above) 48 | - a CSV file representing the relations 49 | (relations.csv if you ran the command above) 50 | - depending on the field indexes you defined, a number of 51 | node index files for Neo4J (relation indexes not supported) 52 | 53 | (See https://github.com/jexp/batch-import for the various files and format details.) 54 | 55 | **Stop neo4j before running batch-import.** 56 | 57 | You have 2 options: 58 | - either run Neo4J batch-import manually supplying your files on the command line 59 | (again, see https://github.com/jexp/batch-import on how to do that) 60 | 61 | - or, run run_batchimport.py which uses the config file to run the command for you 62 | (check your sql2graph.conf BATCHIMPORT_SETTINGS section) 63 | 64 | python run_batchimport.py --config sql2graph.conf 65 | 66 | 5) (Re)start neo4j 67 | 68 | -------------------------------------------------------------------------------- /example-export.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # Copyright 2013 Paul Tremberth, Newlynn Labs 4 | # See LICENSE for details. 5 | 6 | import optparse 7 | import ConfigParser 8 | import os 9 | import sys 10 | import traceback 11 | import csv 12 | import sql2graph.export 13 | from sql2graph.schema import Field, IntField, Relation, Reference, Column, Entity, Property 14 | 15 | # ---------------------------------------------------------------------- 16 | 17 | SCHEMA = ( 18 | Entity('car', 19 | fields = [ 20 | IntField('pk', Column('id'), primary_key=True, index='cars'), 21 | Field('name', Column('name'), index='cars'), 22 | Field('begin', Column('begin'), index='cars'), 23 | Field('end', Column('end'), index='cars'), 24 | ], 25 | relations = [ 26 | Relation(Reference('car', 'id'), 27 | Reference('manufacturer', 'manufacturer'), 28 | [Property('rel_type', 'makes/made')]) 29 | ] 30 | ), 31 | Entity('manufacturer', 32 | fields = [ 33 | IntField('pk', Column('id'), primary_key=True, index='manufacturers'), 34 | Field('name', Column('name'), index='manufacturers'), 35 | ], 36 | ) 37 | ) 38 | 39 | DEFAULT_NODES_FILE = 'nodes.csv' 40 | DEFAULT_RELATIONS_FILE = 'relations.csv' 41 | 42 | def main(): 43 | 44 | option_parser = optparse.OptionParser() 45 | option_parser.add_option("-c", "--config", dest="configfile", help="configuration file", default=None) 46 | option_parser.add_option("-p", "--pretend", action="store_true", dest="pretend", help="just pretend; do not write anthing", default=False) 47 | option_parser.add_option("-N", "--nodes", dest="nodes_file", help="Nodes file", default=None) 48 | option_parser.add_option("-R", "--relations", dest="relations_file", help="Relations file", default=None) 49 | (options, args) = option_parser.parse_args() 50 | 51 | config_parser = ConfigParser.RawConfigParser() 52 | if options.configfile: 53 | config_parser.read(options.configfile) 54 | else: 55 | print "you must provide a config file" 56 | sys.exit(0) 57 | 58 | if config_parser.has_section('TABLE_DUMPS'): 59 | dump_tables = dict(( 60 | (entity, dump_file) 61 | for entity, dump_file in config_parser.items('TABLE_DUMPS') 62 | )) 63 | else: 64 | print "no TABLE_DUMPS section" 65 | raise SystemExit 66 | 67 | 68 | if config_parser.has_option('IMPORT_ORDER', 'order'): 69 | entity_order = [entity.strip() 70 | for entity in config_parser.get('IMPORT_ORDER', 'order').split(',')] 71 | else: 72 | print "no IMPORT_ORDER/order" 73 | raise SystemExit 74 | 75 | if config_parser.has_section('INDEX_FILES'): 76 | index_files = dict(( 77 | (index_name, index_file) 78 | for index_name, index_file in config_parser.items('INDEX_FILES') 79 | )) 80 | else: 81 | print "no INDEX_FILES section" 82 | index_files = None 83 | 84 | 85 | # check if all dump files exist 86 | for entity in entity_order: 87 | if dump_tables.get(entity): 88 | if not os.path.isfile(dump_tables.get(entity)): 89 | print "file %s does not exist" % dump_tables.get(entity) 90 | raise RuntimeError 91 | 92 | # For TAB-delimited CSV files, use dialect=csv.excel_tab 93 | # In this cars example, the CSV files use commas 94 | exporter = sql2graph.export.GraphExporter( 95 | schema=SCHEMA, format='neo4j', dialect=csv.excel) 96 | 97 | exporter.set_output_nodes_file( 98 | entity=sql2graph.export.MERGED, 99 | filename=options.nodes_file or DEFAULT_NODES_FILE) 100 | 101 | exporter.set_output_relations_file( 102 | entity=sql2graph.export.MERGED, 103 | filename=options.relations_file or DEFAULT_RELATIONS_FILE) 104 | 105 | for index_name, index_file in index_files.iteritems(): 106 | exporter.set_output_indexes_file(entity=index_name, filename=index_file) 107 | 108 | for entity_name in entity_order: 109 | if dump_tables.get(entity): 110 | exporter.feed_dumpfile(entity=entity_name, filename=dump_tables.get(entity_name)) 111 | 112 | exporter.run() 113 | 114 | 115 | if __name__ == '__main__': 116 | main() 117 | 118 | -------------------------------------------------------------------------------- /examples/cars/dumps/cars.csv: -------------------------------------------------------------------------------- 1 | id,name,manufacturer,begin,end 2 | 1,Aston Martin 2-Litre Sports (DB1),3,1948,1950 3 | 2,Aston Martin DB2,3,1950,1953 4 | 3,Aston Martin DB2/4,3,1953,1957 5 | 4,Aston Martin DB Mark III,3,1957,1959 6 | 5,Aston Martin DB4,3,1958,1963 7 | 6,365 Daytona,2,1968,1973 8 | 7,550 Maranello,2,1996,2001 9 | 8,575M Maranello,2,2002,2006 10 | 9,XK120,1,1948,1954 11 | 10,XK140,1,1954,1957 12 | 11,XK150,1,1957,1961 13 | 12,E-Type,1,1961,1974 14 | -------------------------------------------------------------------------------- /examples/cars/dumps/manufacturers.csv: -------------------------------------------------------------------------------- 1 | id,name 2 | 1,Jaguar 3 | 2,Ferrari 4 | 3,Aston Martin 5 | -------------------------------------------------------------------------------- /examples/musicbrainz/musicbrains_gist.adoc: -------------------------------------------------------------------------------- 1 | = A musicbrainz import into Neo4j 2 | 3 | image::http://musicbrainz.org/static/images/layout/sprites.png[] 4 | 5 | == The schema 6 | 7 | This import is done according to https://github.com/redapple/sql2graph/tree/master/examples/musicbrainz[The sql2graph muzicbrainz example] taking the This http://wiki.musicbrainz.org/-/images/thumb/5/52/ngs.png/700px-ngs.png[Musicbrainz database schema] and transforming it into something like the schema below. 8 | 9 | [source,cypher] 10 | ---- 11 | CREATE 12 | (usa:Country{name:'USA'}), 13 | (uk:Country{name:'UK'}), 14 | //madonna 15 | (madonna:Artist{name:'Madonna',mbid:'79239441-bfd5-4981-a70c-55c3f15c1287'}), 16 | (evita:Recording{name:"Eva and Magaldi / Eva Beware of the City"}), 17 | (e_and_m:Work{name:"Evita: Eva and Magaldi",mbid:"d7190526-69b9-4d9d-a786-983833c18c1a"}), 18 | (ac_mad:ArtistCredit{name:'Madonna'}), 19 | (madonna)-[:CREDITED_AS]->(ac_mad), 20 | (ac_mad)-[:CREDITED_ON]->evita-[:PERFORMANCE]->(e_and_m) 21 | 22 | ---- 23 | 24 | //graph 25 | 26 | //console 27 | -------------------------------------------------------------------------------- /examples/musicbrainz/musicbrainz2neo4j.sql: -------------------------------------------------------------------------------- 1 | 2 | -- Change TABs to spaces in "name" column for "track" and "work" tables 3 | -- somehow these TABs can make batch-import CSV parsing choked 4 | UPDATE track SET name=translate(name, E'\t', ' ') WHERE name LIKE E'%\t%'; 5 | UPDATE work SET name=translate(name, E'\t', ' ') WHERE name LIKE E'%\t%'; 6 | 7 | 8 | -- Create the mapping table 9 | -- between (entity, pk) tuples and incrementing node IDs 10 | 11 | 12 | DROP TABLE IF EXISTS entity_mapping; 13 | 14 | CREATE TEMPORARY TABLE entity_mapping AS 15 | ( 16 | 17 | SELECT 18 | kind AS entity, 19 | pk, 20 | row_number() OVER (ORDER BY kind, pk) as node_id 21 | FROM 22 | ( 23 | (SELECT 24 | 'area' as kind, 25 | area.id AS "pk" 26 | FROM 27 | area 28 | ORDER BY pk) 29 | 30 | UNION 31 | 32 | (SELECT 33 | 'area_alias' as kind, 34 | area_alias.id AS "pk" 35 | FROM 36 | area_alias 37 | ORDER BY pk) 38 | 39 | UNION 40 | 41 | (SELECT 42 | 'area_type' as kind, 43 | area_type.id AS "pk" 44 | FROM 45 | area_type 46 | ORDER BY pk) 47 | 48 | UNION 49 | 50 | (SELECT 51 | 'place' as kind, 52 | place.id AS "pk" 53 | FROM 54 | place 55 | ORDER BY pk) 56 | 57 | UNION 58 | 59 | (SELECT 60 | 'place_alias' as kind, 61 | place_alias.id AS "pk" 62 | FROM 63 | place_alias 64 | ORDER BY pk) 65 | 66 | UNION 67 | 68 | (SELECT 69 | 'artist' as kind, 70 | artist.id AS "pk" 71 | FROM 72 | artist 73 | ORDER BY pk) 74 | 75 | UNION 76 | 77 | (SELECT 78 | 'artist_alias' as kind, 79 | artist_alias.id AS "pk" 80 | FROM 81 | artist_alias 82 | ORDER BY pk) 83 | 84 | UNION 85 | 86 | (SELECT 87 | 'artist_type' as kind, 88 | artist_type.id AS "pk" 89 | FROM 90 | artist_type 91 | ORDER BY pk) 92 | 93 | UNION 94 | 95 | (SELECT 96 | 'artist_credit' as kind, 97 | artist_credit.id AS "pk" 98 | FROM 99 | artist_credit 100 | ORDER BY pk) 101 | 102 | UNION 103 | 104 | (SELECT 105 | 'gender' as kind, 106 | gender.id AS "pk" 107 | FROM 108 | gender 109 | ORDER BY pk) 110 | 111 | UNION 112 | 113 | (SELECT 114 | 'label' as kind, 115 | label.id AS "pk" 116 | FROM 117 | label 118 | ORDER BY pk) 119 | 120 | UNION 121 | 122 | (SELECT 123 | 'label_type' as kind, 124 | label_type.id AS "pk" 125 | FROM 126 | label_type 127 | ORDER BY pk) 128 | 129 | UNION 130 | 131 | (SELECT 132 | 'url' as kind, 133 | url.id AS "pk" 134 | FROM 135 | url 136 | ORDER BY pk) 137 | 138 | UNION 139 | 140 | (SELECT 141 | 'release_group' as kind, 142 | release_group.id AS "pk" 143 | FROM 144 | release_group 145 | ORDER BY pk) 146 | 147 | UNION 148 | 149 | (SELECT 150 | 'release_group_primary_type' as kind, 151 | release_group_primary_type.id AS "pk" 152 | FROM 153 | release_group_primary_type 154 | ORDER BY pk) 155 | 156 | UNION 157 | 158 | (SELECT 159 | 'release' as kind, 160 | release.id AS "pk" 161 | FROM 162 | release 163 | ORDER BY pk) 164 | 165 | UNION 166 | 167 | (SELECT 168 | 'release_packaging' as kind, 169 | release_packaging.id AS "pk" 170 | FROM 171 | release_packaging 172 | ORDER BY pk) 173 | 174 | UNION 175 | 176 | (SELECT 177 | 'release_status' as kind, 178 | release_status.id AS "pk" 179 | FROM 180 | release_status 181 | ORDER BY pk) 182 | 183 | UNION 184 | 185 | (SELECT 186 | 'recording' as kind, 187 | recording.id AS "pk" 188 | FROM 189 | recording 190 | ORDER BY pk) 191 | 192 | UNION 193 | 194 | (SELECT 195 | 'track' as kind, 196 | track.id AS "pk" 197 | FROM 198 | track 199 | ORDER BY pk) 200 | 201 | UNION 202 | 203 | (SELECT 204 | 'medium' as kind, 205 | medium.id AS "pk" 206 | FROM 207 | medium 208 | ORDER BY pk) 209 | 210 | UNION 211 | 212 | (SELECT 213 | 'medium_format' as kind, 214 | medium_format.id AS "pk" 215 | FROM 216 | medium_format 217 | ORDER BY pk) 218 | 219 | UNION 220 | 221 | (SELECT 222 | 'work' as kind, 223 | work.id AS "pk" 224 | FROM 225 | work 226 | ORDER BY pk) 227 | 228 | UNION 229 | 230 | (SELECT 231 | 'work_type' as kind, 232 | work_type.id AS "pk" 233 | FROM 234 | work_type 235 | ORDER BY pk) 236 | ) 237 | AS entity_union 238 | 239 | ); 240 | 241 | -- create index to speedup lookups 242 | CREATE INDEX ON entity_mapping (entity, pk); 243 | 244 | ANALYZE entity_mapping; 245 | 246 | 247 | 248 | COPY( 249 | 250 | SELECT 251 | wrapped.disambiguation AS disambiguation, 252 | wrapped.position AS position, 253 | translate( 254 | array_to_string( 255 | ARRAY[ 256 | initcap(wrapped.kind::text), 257 | initcap(wrapped.type::text), 258 | initcap(wrapped.format::text), 259 | initcap(wrapped.gender::text), 260 | initcap(wrapped.status::text), 261 | initcap(wrapped.packaging::text)], 262 | ','), 263 | ' _"/-', '') AS "l:label", 264 | wrapped.name AS "name:string:mb_fulltext", 265 | wrapped.length AS length, 266 | wrapped.mbid AS "mbid:string:mb_exact" 267 | FROM ( 268 | 269 | (SELECT 270 | 'area' as kind, 271 | area__type__area_type.name AS "type", 272 | 0 AS "position", 273 | area.name AS "name", 274 | area.id AS "pk", 275 | NULL AS "status", 276 | NULL AS "format", 277 | 0 AS "longitude", 278 | 0 AS "latitude", 279 | 0 AS "length", 280 | NULL AS "locale", 281 | 0 AS "code", 282 | area.gid AS "mbid", 283 | NULL AS "gender", 284 | NULL AS "disambiguation", 285 | NULL AS "packaging" 286 | FROM 287 | area 288 | LEFT JOIN area_type AS area__type__area_type ON area__type__area_type.id = area.type 289 | ORDER BY pk) 290 | 291 | UNION 292 | 293 | (SELECT 294 | 'area_alias' as kind, 295 | area_alias__type__area_alias_type.name AS "type", 296 | 0 AS "position", 297 | area_alias.name AS "name", 298 | area_alias.id AS "pk", 299 | NULL AS "status", 300 | NULL AS "format", 301 | 0 AS "longitude", 302 | 0 AS "latitude", 303 | 0 AS "length", 304 | area_alias.locale AS "locale", 305 | 0 AS "code", 306 | NULL AS "mbid", 307 | NULL AS "gender", 308 | NULL AS "disambiguation", 309 | NULL AS "packaging" 310 | FROM 311 | area_alias 312 | LEFT JOIN area_alias_type AS area_alias__type__area_alias_type ON area_alias__type__area_alias_type.id = area_alias.type 313 | ORDER BY pk) 314 | 315 | UNION 316 | 317 | (SELECT 318 | 'area_type' as kind, 319 | NULL AS "type", 320 | 0 AS "position", 321 | area_type.name AS "name", 322 | area_type.id AS "pk", 323 | NULL AS "status", 324 | NULL AS "format", 325 | 0 AS "longitude", 326 | 0 AS "latitude", 327 | 0 AS "length", 328 | NULL AS "locale", 329 | 0 AS "code", 330 | NULL AS "mbid", 331 | NULL AS "gender", 332 | NULL AS "disambiguation", 333 | NULL AS "packaging" 334 | FROM 335 | area_type 336 | ORDER BY pk) 337 | 338 | UNION 339 | 340 | (SELECT 341 | 'place' as kind, 342 | place__type__place_type.name AS "type", 343 | 0 AS "position", 344 | place.name AS "name", 345 | place.id AS "pk", 346 | NULL AS "status", 347 | NULL AS "format", 348 | place.coordinates[1] AS "longitude", 349 | place.coordinates[0] AS "latitude", 350 | 0 AS "length", 351 | NULL AS "locale", 352 | 0 AS "code", 353 | place.gid AS "mbid", 354 | NULL AS "gender", 355 | NULL AS "disambiguation", 356 | NULL AS "packaging" 357 | FROM 358 | place 359 | LEFT JOIN place_type AS place__type__place_type ON place__type__place_type.id = place.type 360 | ORDER BY pk) 361 | 362 | UNION 363 | 364 | (SELECT 365 | 'place_alias' as kind, 366 | place_alias__type__place_alias_type.name AS "type", 367 | 0 AS "position", 368 | place_alias.name AS "name", 369 | place_alias.id AS "pk", 370 | NULL AS "status", 371 | NULL AS "format", 372 | 0 AS "longitude", 373 | 0 AS "latitude", 374 | 0 AS "length", 375 | place_alias.locale AS "locale", 376 | 0 AS "code", 377 | NULL AS "mbid", 378 | NULL AS "gender", 379 | NULL AS "disambiguation", 380 | NULL AS "packaging" 381 | FROM 382 | place_alias 383 | LEFT JOIN place_alias_type AS place_alias__type__place_alias_type ON place_alias__type__place_alias_type.id = place_alias.type 384 | ORDER BY pk) 385 | 386 | UNION 387 | 388 | (SELECT 389 | 'artist' as kind, 390 | artist__type__artist_type.name AS "type", 391 | 0 AS "position", 392 | artist.name AS "name", 393 | artist.id AS "pk", 394 | NULL AS "status", 395 | NULL AS "format", 396 | 0 AS "longitude", 397 | 0 AS "latitude", 398 | 0 AS "length", 399 | NULL AS "locale", 400 | 0 AS "code", 401 | artist.gid AS "mbid", 402 | artist__gender__gender.name AS "gender", 403 | artist.comment AS "disambiguation", 404 | NULL AS "packaging" 405 | FROM 406 | artist 407 | LEFT JOIN artist_type AS artist__type__artist_type ON artist__type__artist_type.id = artist.type 408 | LEFT JOIN gender AS artist__gender__gender ON artist__gender__gender.id = artist.gender 409 | ORDER BY pk) 410 | 411 | UNION 412 | 413 | (SELECT 414 | 'artist_alias' as kind, 415 | artist_alias__type__artist_alias_type.name AS "type", 416 | 0 AS "position", 417 | artist_alias.name AS "name", 418 | artist_alias.id AS "pk", 419 | NULL AS "status", 420 | NULL AS "format", 421 | 0 AS "longitude", 422 | 0 AS "latitude", 423 | 0 AS "length", 424 | NULL AS "locale", 425 | 0 AS "code", 426 | NULL AS "mbid", 427 | NULL AS "gender", 428 | NULL AS "disambiguation", 429 | NULL AS "packaging" 430 | FROM 431 | artist_alias 432 | LEFT JOIN artist_alias_type AS artist_alias__type__artist_alias_type ON artist_alias__type__artist_alias_type.id = artist_alias.type 433 | ORDER BY pk) 434 | 435 | UNION 436 | 437 | (SELECT 438 | 'artist_type' as kind, 439 | NULL AS "type", 440 | 0 AS "position", 441 | artist_type.name AS "name", 442 | artist_type.id AS "pk", 443 | NULL AS "status", 444 | NULL AS "format", 445 | 0 AS "longitude", 446 | 0 AS "latitude", 447 | 0 AS "length", 448 | NULL AS "locale", 449 | 0 AS "code", 450 | NULL AS "mbid", 451 | NULL AS "gender", 452 | NULL AS "disambiguation", 453 | NULL AS "packaging" 454 | FROM 455 | artist_type 456 | ORDER BY pk) 457 | 458 | UNION 459 | 460 | (SELECT 461 | 'artist_credit' as kind, 462 | NULL AS "type", 463 | 0 AS "position", 464 | artist_credit.name AS "name", 465 | artist_credit.id AS "pk", 466 | NULL AS "status", 467 | NULL AS "format", 468 | 0 AS "longitude", 469 | 0 AS "latitude", 470 | 0 AS "length", 471 | NULL AS "locale", 472 | 0 AS "code", 473 | NULL AS "mbid", 474 | NULL AS "gender", 475 | NULL AS "disambiguation", 476 | NULL AS "packaging" 477 | FROM 478 | artist_credit 479 | ORDER BY pk) 480 | 481 | UNION 482 | 483 | (SELECT 484 | 'gender' as kind, 485 | NULL AS "type", 486 | 0 AS "position", 487 | gender.name AS "name", 488 | gender.id AS "pk", 489 | NULL AS "status", 490 | NULL AS "format", 491 | 0 AS "longitude", 492 | 0 AS "latitude", 493 | 0 AS "length", 494 | NULL AS "locale", 495 | 0 AS "code", 496 | NULL AS "mbid", 497 | NULL AS "gender", 498 | NULL AS "disambiguation", 499 | NULL AS "packaging" 500 | FROM 501 | gender 502 | ORDER BY pk) 503 | 504 | UNION 505 | 506 | (SELECT 507 | 'label' as kind, 508 | label__type__label_type.name AS "type", 509 | 0 AS "position", 510 | label.name AS "name", 511 | label.id AS "pk", 512 | NULL AS "status", 513 | NULL AS "format", 514 | 0 AS "longitude", 515 | 0 AS "latitude", 516 | 0 AS "length", 517 | NULL AS "locale", 518 | label.label_code AS "code", 519 | label.gid AS "mbid", 520 | NULL AS "gender", 521 | label.comment AS "disambiguation", 522 | NULL AS "packaging" 523 | FROM 524 | label 525 | LEFT JOIN label_type AS label__type__label_type ON label__type__label_type.id = label.type 526 | ORDER BY pk) 527 | 528 | UNION 529 | 530 | (SELECT 531 | 'label_type' as kind, 532 | NULL AS "type", 533 | 0 AS "position", 534 | label_type.name AS "name", 535 | label_type.id AS "pk", 536 | NULL AS "status", 537 | NULL AS "format", 538 | 0 AS "longitude", 539 | 0 AS "latitude", 540 | 0 AS "length", 541 | NULL AS "locale", 542 | 0 AS "code", 543 | NULL AS "mbid", 544 | NULL AS "gender", 545 | NULL AS "disambiguation", 546 | NULL AS "packaging" 547 | FROM 548 | label_type 549 | ORDER BY pk) 550 | 551 | UNION 552 | 553 | (SELECT 554 | 'url' as kind, 555 | NULL AS "type", 556 | 0 AS "position", 557 | url.url AS "name", 558 | url.id AS "pk", 559 | NULL AS "status", 560 | NULL AS "format", 561 | 0 AS "longitude", 562 | 0 AS "latitude", 563 | 0 AS "length", 564 | NULL AS "locale", 565 | 0 AS "code", 566 | url.gid AS "mbid", 567 | NULL AS "gender", 568 | NULL AS "disambiguation", 569 | NULL AS "packaging" 570 | FROM 571 | url 572 | ORDER BY pk) 573 | 574 | UNION 575 | 576 | (SELECT 577 | 'release_group' as kind, 578 | release_group__type__release_group_primary_type.name AS "type", 579 | 0 AS "position", 580 | release_group.name AS "name", 581 | release_group.id AS "pk", 582 | NULL AS "status", 583 | NULL AS "format", 584 | 0 AS "longitude", 585 | 0 AS "latitude", 586 | 0 AS "length", 587 | NULL AS "locale", 588 | 0 AS "code", 589 | release_group.gid AS "mbid", 590 | NULL AS "gender", 591 | release_group.comment AS "disambiguation", 592 | NULL AS "packaging" 593 | FROM 594 | release_group 595 | LEFT JOIN release_group_primary_type AS release_group__type__release_group_primary_type ON release_group__type__release_group_primary_type.id = release_group.type 596 | ORDER BY pk) 597 | 598 | UNION 599 | 600 | (SELECT 601 | 'release_group_primary_type' as kind, 602 | NULL AS "type", 603 | 0 AS "position", 604 | release_group_primary_type.name AS "name", 605 | release_group_primary_type.id AS "pk", 606 | NULL AS "status", 607 | NULL AS "format", 608 | 0 AS "longitude", 609 | 0 AS "latitude", 610 | 0 AS "length", 611 | NULL AS "locale", 612 | 0 AS "code", 613 | NULL AS "mbid", 614 | NULL AS "gender", 615 | NULL AS "disambiguation", 616 | NULL AS "packaging" 617 | FROM 618 | release_group_primary_type 619 | ORDER BY pk) 620 | 621 | UNION 622 | 623 | (SELECT 624 | 'release' as kind, 625 | NULL AS "type", 626 | 0 AS "position", 627 | release.name AS "name", 628 | release.id AS "pk", 629 | release__status__release_status.name AS "status", 630 | NULL AS "format", 631 | 0 AS "longitude", 632 | 0 AS "latitude", 633 | 0 AS "length", 634 | NULL AS "locale", 635 | 0 AS "code", 636 | release.gid AS "mbid", 637 | NULL AS "gender", 638 | release.comment AS "disambiguation", 639 | release__packaging__release_packaging.name AS "packaging" 640 | FROM 641 | release 642 | LEFT JOIN release_status AS release__status__release_status ON release__status__release_status.id = release.status 643 | LEFT JOIN release_packaging AS release__packaging__release_packaging ON release__packaging__release_packaging.id = release.packaging 644 | ORDER BY pk) 645 | 646 | UNION 647 | 648 | (SELECT 649 | 'release_packaging' as kind, 650 | NULL AS "type", 651 | 0 AS "position", 652 | release_packaging.name AS "name", 653 | release_packaging.id AS "pk", 654 | NULL AS "status", 655 | NULL AS "format", 656 | 0 AS "longitude", 657 | 0 AS "latitude", 658 | 0 AS "length", 659 | NULL AS "locale", 660 | 0 AS "code", 661 | NULL AS "mbid", 662 | NULL AS "gender", 663 | NULL AS "disambiguation", 664 | NULL AS "packaging" 665 | FROM 666 | release_packaging 667 | ORDER BY pk) 668 | 669 | UNION 670 | 671 | (SELECT 672 | 'release_status' as kind, 673 | NULL AS "type", 674 | 0 AS "position", 675 | release_status.name AS "name", 676 | release_status.id AS "pk", 677 | NULL AS "status", 678 | NULL AS "format", 679 | 0 AS "longitude", 680 | 0 AS "latitude", 681 | 0 AS "length", 682 | NULL AS "locale", 683 | 0 AS "code", 684 | NULL AS "mbid", 685 | NULL AS "gender", 686 | NULL AS "disambiguation", 687 | NULL AS "packaging" 688 | FROM 689 | release_status 690 | ORDER BY pk) 691 | 692 | UNION 693 | 694 | (SELECT 695 | 'recording' as kind, 696 | NULL AS "type", 697 | 0 AS "position", 698 | recording.name AS "name", 699 | recording.id AS "pk", 700 | NULL AS "status", 701 | NULL AS "format", 702 | 0 AS "longitude", 703 | 0 AS "latitude", 704 | 0 AS "length", 705 | NULL AS "locale", 706 | 0 AS "code", 707 | recording.gid AS "mbid", 708 | NULL AS "gender", 709 | recording.comment AS "disambiguation", 710 | NULL AS "packaging" 711 | FROM 712 | recording 713 | ORDER BY pk) 714 | 715 | UNION 716 | 717 | (SELECT 718 | 'track' as kind, 719 | NULL AS "type", 720 | track.position AS "position", 721 | track.name AS "name", 722 | track.id AS "pk", 723 | NULL AS "status", 724 | NULL AS "format", 725 | 0 AS "longitude", 726 | 0 AS "latitude", 727 | track.length AS "length", 728 | NULL AS "locale", 729 | 0 AS "code", 730 | track.gid AS "mbid", 731 | NULL AS "gender", 732 | NULL AS "disambiguation", 733 | NULL AS "packaging" 734 | FROM 735 | track 736 | ORDER BY pk) 737 | 738 | UNION 739 | 740 | (SELECT 741 | 'medium' as kind, 742 | NULL AS "type", 743 | 0 AS "position", 744 | medium.name AS "name", 745 | medium.id AS "pk", 746 | NULL AS "status", 747 | medium__format__medium_format.name AS "format", 748 | 0 AS "longitude", 749 | 0 AS "latitude", 750 | 0 AS "length", 751 | NULL AS "locale", 752 | 0 AS "code", 753 | NULL AS "mbid", 754 | NULL AS "gender", 755 | NULL AS "disambiguation", 756 | NULL AS "packaging" 757 | FROM 758 | medium 759 | LEFT JOIN medium_format AS medium__format__medium_format ON medium__format__medium_format.id = medium.format 760 | ORDER BY pk) 761 | 762 | UNION 763 | 764 | (SELECT 765 | 'medium_format' as kind, 766 | NULL AS "type", 767 | 0 AS "position", 768 | medium_format.name AS "name", 769 | medium_format.id AS "pk", 770 | NULL AS "status", 771 | NULL AS "format", 772 | 0 AS "longitude", 773 | 0 AS "latitude", 774 | 0 AS "length", 775 | NULL AS "locale", 776 | 0 AS "code", 777 | NULL AS "mbid", 778 | NULL AS "gender", 779 | NULL AS "disambiguation", 780 | NULL AS "packaging" 781 | FROM 782 | medium_format 783 | ORDER BY pk) 784 | 785 | UNION 786 | 787 | (SELECT 788 | 'work' as kind, 789 | work__type__work_type.name AS "type", 790 | 0 AS "position", 791 | work.name AS "name", 792 | work.id AS "pk", 793 | NULL AS "status", 794 | NULL AS "format", 795 | 0 AS "longitude", 796 | 0 AS "latitude", 797 | 0 AS "length", 798 | NULL AS "locale", 799 | 0 AS "code", 800 | work.gid AS "mbid", 801 | NULL AS "gender", 802 | work.comment AS "disambiguation", 803 | NULL AS "packaging" 804 | FROM 805 | work 806 | LEFT JOIN work_type AS work__type__work_type ON work__type__work_type.id = work.type 807 | ORDER BY pk) 808 | 809 | UNION 810 | 811 | (SELECT 812 | 'work_type' as kind, 813 | NULL AS "type", 814 | 0 AS "position", 815 | work_type.name AS "name", 816 | work_type.id AS "pk", 817 | NULL AS "status", 818 | NULL AS "format", 819 | 0 AS "longitude", 820 | 0 AS "latitude", 821 | 0 AS "length", 822 | NULL AS "locale", 823 | 0 AS "code", 824 | NULL AS "mbid", 825 | NULL AS "gender", 826 | NULL AS "disambiguation", 827 | NULL AS "packaging" 828 | FROM 829 | work_type 830 | ORDER BY pk) 831 | ORDER BY kind, pk 832 | 833 | ) 834 | AS wrapped 835 | 836 | ) 837 | TO '/tmp/musicbrainz__nodes__full.csv' CSV HEADER 838 | DELIMITER E'\t' 839 | ENCODING 'UTF8'; 840 | 841 | 842 | COPY( 843 | (SELECT 844 | start_entity.node_id AS start, 845 | end_entity.node_id AS end, 846 | 'HAS_ALIAS' AS rel_type, 847 | 0 AS "day", 848 | 0 AS "month", 849 | NULL AS "catalog_number", 850 | NULL AS "join", 851 | 0 AS "year", 852 | 0 AS "position" 853 | FROM 854 | area_alias 855 | 856 | JOIN entity_mapping start_entity 857 | ON ( 858 | start_entity.pk = area_alias.area 859 | AND 860 | start_entity.entity = 'area' 861 | ) 862 | 863 | JOIN entity_mapping end_entity 864 | ON ( 865 | end_entity.pk = area_alias.id 866 | AND 867 | end_entity.entity = 'area_alias' 868 | )) 869 | 870 | UNION 871 | 872 | (SELECT 873 | start_entity.node_id AS start, 874 | end_entity.node_id AS end, 875 | 'HAS_ALIAS' AS rel_type, 876 | 0 AS "day", 877 | 0 AS "month", 878 | NULL AS "catalog_number", 879 | NULL AS "join", 880 | 0 AS "year", 881 | 0 AS "position" 882 | FROM 883 | place_alias 884 | 885 | JOIN entity_mapping start_entity 886 | ON ( 887 | start_entity.pk = place_alias.place 888 | AND 889 | start_entity.entity = 'place' 890 | ) 891 | 892 | JOIN entity_mapping end_entity 893 | ON ( 894 | end_entity.pk = place_alias.id 895 | AND 896 | end_entity.entity = 'place_alias' 897 | )) 898 | 899 | UNION 900 | 901 | (SELECT 902 | start_entity.node_id AS start, 903 | end_entity.node_id AS end, 904 | 'FROM_AREA' AS rel_type, 905 | 0 AS "day", 906 | 0 AS "month", 907 | NULL AS "catalog_number", 908 | NULL AS "join", 909 | 0 AS "year", 910 | 0 AS "position" 911 | FROM 912 | artist 913 | 914 | JOIN entity_mapping start_entity 915 | ON ( 916 | start_entity.pk = artist.id 917 | AND 918 | start_entity.entity = 'artist' 919 | ) 920 | 921 | JOIN entity_mapping end_entity 922 | ON ( 923 | end_entity.pk = artist.area 924 | AND 925 | end_entity.entity = 'area' 926 | )) 927 | 928 | UNION 929 | 930 | (SELECT 931 | start_entity.node_id AS start, 932 | end_entity.node_id AS end, 933 | 'BEGAN_IN_AREA' AS rel_type, 934 | 0 AS "day", 935 | 0 AS "month", 936 | NULL AS "catalog_number", 937 | NULL AS "join", 938 | 0 AS "year", 939 | 0 AS "position" 940 | FROM 941 | artist 942 | 943 | JOIN entity_mapping start_entity 944 | ON ( 945 | start_entity.pk = artist.id 946 | AND 947 | start_entity.entity = 'artist' 948 | ) 949 | 950 | JOIN entity_mapping end_entity 951 | ON ( 952 | end_entity.pk = artist.begin_area 953 | AND 954 | end_entity.entity = 'area' 955 | )) 956 | 957 | UNION 958 | 959 | (SELECT 960 | start_entity.node_id AS start, 961 | end_entity.node_id AS end, 962 | 'ENDED_IN_AREA' AS rel_type, 963 | 0 AS "day", 964 | 0 AS "month", 965 | NULL AS "catalog_number", 966 | NULL AS "join", 967 | 0 AS "year", 968 | 0 AS "position" 969 | FROM 970 | artist 971 | 972 | JOIN entity_mapping start_entity 973 | ON ( 974 | start_entity.pk = artist.id 975 | AND 976 | start_entity.entity = 'artist' 977 | ) 978 | 979 | JOIN entity_mapping end_entity 980 | ON ( 981 | end_entity.pk = artist.end_area 982 | AND 983 | end_entity.entity = 'area' 984 | )) 985 | 986 | UNION 987 | 988 | (SELECT 989 | start_entity.node_id AS start, 990 | end_entity.node_id AS end, 991 | 'HAS_ALIAS' AS rel_type, 992 | 0 AS "day", 993 | 0 AS "month", 994 | NULL AS "catalog_number", 995 | NULL AS "join", 996 | 0 AS "year", 997 | 0 AS "position" 998 | FROM 999 | artist_alias 1000 | 1001 | JOIN entity_mapping start_entity 1002 | ON ( 1003 | start_entity.pk = artist_alias.artist 1004 | AND 1005 | start_entity.entity = 'artist' 1006 | ) 1007 | 1008 | JOIN entity_mapping end_entity 1009 | ON ( 1010 | end_entity.pk = artist_alias.id 1011 | AND 1012 | end_entity.entity = 'artist_alias' 1013 | )) 1014 | 1015 | UNION 1016 | 1017 | (SELECT 1018 | start_entity.node_id AS start, 1019 | end_entity.node_id AS end, 1020 | 'CREDITED_AS' AS rel_type, 1021 | 0 AS "day", 1022 | 0 AS "month", 1023 | NULL AS "catalog_number", 1024 | artist_credit_name.join_phrase AS "join", 1025 | 0 AS "year", 1026 | artist_credit_name.position AS "position" 1027 | FROM 1028 | artist_credit_name 1029 | 1030 | JOIN entity_mapping start_entity 1031 | ON ( 1032 | start_entity.pk = artist_credit_name.artist 1033 | AND 1034 | start_entity.entity = 'artist' 1035 | ) 1036 | 1037 | JOIN entity_mapping end_entity 1038 | ON ( 1039 | end_entity.pk = artist_credit_name.artist_credit 1040 | AND 1041 | end_entity.entity = 'artist_credit' 1042 | )) 1043 | 1044 | UNION 1045 | 1046 | (SELECT 1047 | start_entity.node_id AS start, 1048 | end_entity.node_id AS end, 1049 | 'FROM_AREA' AS rel_type, 1050 | 0 AS "day", 1051 | 0 AS "month", 1052 | NULL AS "catalog_number", 1053 | NULL AS "join", 1054 | 0 AS "year", 1055 | 0 AS "position" 1056 | FROM 1057 | label 1058 | 1059 | JOIN entity_mapping start_entity 1060 | ON ( 1061 | start_entity.pk = label.id 1062 | AND 1063 | start_entity.entity = 'label' 1064 | ) 1065 | 1066 | JOIN entity_mapping end_entity 1067 | ON ( 1068 | end_entity.pk = label.area 1069 | AND 1070 | end_entity.entity = 'area' 1071 | )) 1072 | 1073 | UNION 1074 | 1075 | (SELECT 1076 | start_entity.node_id AS start, 1077 | end_entity.node_id AS end, 1078 | 'CREDITED_ON' AS rel_type, 1079 | 0 AS "day", 1080 | 0 AS "month", 1081 | NULL AS "catalog_number", 1082 | NULL AS "join", 1083 | 0 AS "year", 1084 | 0 AS "position" 1085 | FROM 1086 | release_group 1087 | 1088 | JOIN entity_mapping start_entity 1089 | ON ( 1090 | start_entity.pk = release_group.artist_credit 1091 | AND 1092 | start_entity.entity = 'artist_credit' 1093 | ) 1094 | 1095 | JOIN entity_mapping end_entity 1096 | ON ( 1097 | end_entity.pk = release_group.id 1098 | AND 1099 | end_entity.entity = 'release_group' 1100 | )) 1101 | 1102 | UNION 1103 | 1104 | (SELECT 1105 | start_entity.node_id AS start, 1106 | end_entity.node_id AS end, 1107 | 'CREDITED_ON' AS rel_type, 1108 | 0 AS "day", 1109 | 0 AS "month", 1110 | NULL AS "catalog_number", 1111 | NULL AS "join", 1112 | 0 AS "year", 1113 | 0 AS "position" 1114 | FROM 1115 | release 1116 | 1117 | JOIN entity_mapping start_entity 1118 | ON ( 1119 | start_entity.pk = release.artist_credit 1120 | AND 1121 | start_entity.entity = 'artist_credit' 1122 | ) 1123 | 1124 | JOIN entity_mapping end_entity 1125 | ON ( 1126 | end_entity.pk = release.id 1127 | AND 1128 | end_entity.entity = 'release' 1129 | )) 1130 | 1131 | UNION 1132 | 1133 | (SELECT 1134 | start_entity.node_id AS start, 1135 | end_entity.node_id AS end, 1136 | 'PART_OF' AS rel_type, 1137 | 0 AS "day", 1138 | 0 AS "month", 1139 | NULL AS "catalog_number", 1140 | NULL AS "join", 1141 | 0 AS "year", 1142 | 0 AS "position" 1143 | FROM 1144 | release 1145 | 1146 | JOIN entity_mapping start_entity 1147 | ON ( 1148 | start_entity.pk = release.id 1149 | AND 1150 | start_entity.entity = 'release' 1151 | ) 1152 | 1153 | JOIN entity_mapping end_entity 1154 | ON ( 1155 | end_entity.pk = release.release_group 1156 | AND 1157 | end_entity.entity = 'release_group' 1158 | )) 1159 | 1160 | UNION 1161 | 1162 | (SELECT 1163 | start_entity.node_id AS start, 1164 | end_entity.node_id AS end, 1165 | 'RELEASED_IN' AS rel_type, 1166 | release_country.date_day AS "day", 1167 | release_country.date_month AS "month", 1168 | NULL AS "catalog_number", 1169 | NULL AS "join", 1170 | release_country.date_year AS "year", 1171 | 0 AS "position" 1172 | FROM 1173 | release_country 1174 | 1175 | JOIN entity_mapping start_entity 1176 | ON ( 1177 | start_entity.pk = release_country.release 1178 | AND 1179 | start_entity.entity = 'release' 1180 | ) 1181 | 1182 | JOIN entity_mapping end_entity 1183 | ON ( 1184 | end_entity.pk = release_country.country 1185 | AND 1186 | end_entity.entity = 'area' 1187 | )) 1188 | 1189 | UNION 1190 | 1191 | (SELECT 1192 | start_entity.node_id AS start, 1193 | end_entity.node_id AS end, 1194 | 'RELEASED_ON' AS rel_type, 1195 | 0 AS "day", 1196 | 0 AS "month", 1197 | release_label.catalog_number AS "catalog_number", 1198 | NULL AS "join", 1199 | 0 AS "year", 1200 | 0 AS "position" 1201 | FROM 1202 | release_label 1203 | 1204 | JOIN entity_mapping start_entity 1205 | ON ( 1206 | start_entity.pk = release_label.release 1207 | AND 1208 | start_entity.entity = 'release' 1209 | ) 1210 | 1211 | JOIN entity_mapping end_entity 1212 | ON ( 1213 | end_entity.pk = release_label.label 1214 | AND 1215 | end_entity.entity = 'label' 1216 | )) 1217 | 1218 | UNION 1219 | 1220 | (SELECT 1221 | start_entity.node_id AS start, 1222 | end_entity.node_id AS end, 1223 | 'CREDITED_ON' AS rel_type, 1224 | 0 AS "day", 1225 | 0 AS "month", 1226 | NULL AS "catalog_number", 1227 | NULL AS "join", 1228 | 0 AS "year", 1229 | 0 AS "position" 1230 | FROM 1231 | recording 1232 | 1233 | JOIN entity_mapping start_entity 1234 | ON ( 1235 | start_entity.pk = recording.artist_credit 1236 | AND 1237 | start_entity.entity = 'artist_credit' 1238 | ) 1239 | 1240 | JOIN entity_mapping end_entity 1241 | ON ( 1242 | end_entity.pk = recording.id 1243 | AND 1244 | end_entity.entity = 'recording' 1245 | )) 1246 | 1247 | UNION 1248 | 1249 | (SELECT 1250 | start_entity.node_id AS start, 1251 | end_entity.node_id AS end, 1252 | 'IS_RECORDING' AS rel_type, 1253 | 0 AS "day", 1254 | 0 AS "month", 1255 | NULL AS "catalog_number", 1256 | NULL AS "join", 1257 | 0 AS "year", 1258 | 0 AS "position" 1259 | FROM 1260 | track 1261 | 1262 | JOIN entity_mapping start_entity 1263 | ON ( 1264 | start_entity.pk = track.id 1265 | AND 1266 | start_entity.entity = 'track' 1267 | ) 1268 | 1269 | JOIN entity_mapping end_entity 1270 | ON ( 1271 | end_entity.pk = track.recording 1272 | AND 1273 | end_entity.entity = 'recording' 1274 | )) 1275 | 1276 | UNION 1277 | 1278 | (SELECT 1279 | start_entity.node_id AS start, 1280 | end_entity.node_id AS end, 1281 | 'APPEARS_ON' AS rel_type, 1282 | 0 AS "day", 1283 | 0 AS "month", 1284 | NULL AS "catalog_number", 1285 | NULL AS "join", 1286 | 0 AS "year", 1287 | 0 AS "position" 1288 | FROM 1289 | track 1290 | 1291 | JOIN entity_mapping start_entity 1292 | ON ( 1293 | start_entity.pk = track.id 1294 | AND 1295 | start_entity.entity = 'track' 1296 | ) 1297 | 1298 | JOIN entity_mapping end_entity 1299 | ON ( 1300 | end_entity.pk = track.medium 1301 | AND 1302 | end_entity.entity = 'medium' 1303 | )) 1304 | 1305 | UNION 1306 | 1307 | (SELECT 1308 | start_entity.node_id AS start, 1309 | end_entity.node_id AS end, 1310 | 'CREDITED_ON' AS rel_type, 1311 | 0 AS "day", 1312 | 0 AS "month", 1313 | NULL AS "catalog_number", 1314 | NULL AS "join", 1315 | 0 AS "year", 1316 | 0 AS "position" 1317 | FROM 1318 | track 1319 | 1320 | JOIN entity_mapping start_entity 1321 | ON ( 1322 | start_entity.pk = track.artist_credit 1323 | AND 1324 | start_entity.entity = 'artist_credit' 1325 | ) 1326 | 1327 | JOIN entity_mapping end_entity 1328 | ON ( 1329 | end_entity.pk = track.id 1330 | AND 1331 | end_entity.entity = 'track' 1332 | )) 1333 | 1334 | UNION 1335 | 1336 | (SELECT 1337 | start_entity.node_id AS start, 1338 | end_entity.node_id AS end, 1339 | 'RELEASED_ON_MEDIUM' AS rel_type, 1340 | 0 AS "day", 1341 | 0 AS "month", 1342 | NULL AS "catalog_number", 1343 | NULL AS "join", 1344 | 0 AS "year", 1345 | 0 AS "position" 1346 | FROM 1347 | medium 1348 | 1349 | JOIN entity_mapping start_entity 1350 | ON ( 1351 | start_entity.pk = medium.release 1352 | AND 1353 | start_entity.entity = 'release' 1354 | ) 1355 | 1356 | JOIN entity_mapping end_entity 1357 | ON ( 1358 | end_entity.pk = medium.id 1359 | AND 1360 | end_entity.entity = 'medium' 1361 | )) 1362 | 1363 | UNION 1364 | 1365 | (SELECT 1366 | start_entity.node_id AS start, 1367 | end_entity.node_id AS end, 1368 | 'PARENT_FORMAT' AS rel_type, 1369 | 0 AS "day", 1370 | 0 AS "month", 1371 | NULL AS "catalog_number", 1372 | NULL AS "join", 1373 | 0 AS "year", 1374 | 0 AS "position" 1375 | FROM 1376 | medium_format 1377 | 1378 | JOIN entity_mapping start_entity 1379 | ON ( 1380 | start_entity.pk = medium_format.id 1381 | AND 1382 | start_entity.entity = 'medium_format' 1383 | ) 1384 | 1385 | JOIN entity_mapping end_entity 1386 | ON ( 1387 | end_entity.pk = medium_format.parent 1388 | AND 1389 | end_entity.entity = 'medium_format' 1390 | )) 1391 | 1392 | UNION 1393 | 1394 | (SELECT 1395 | start_entity.node_id AS start, 1396 | end_entity.node_id AS end, 1397 | translate(upper(l_area_area__link__link__link_type__link_type.name), ' ', '_') AS rel_type, 1398 | 0 AS "day", 1399 | 0 AS "month", 1400 | NULL AS "catalog_number", 1401 | NULL AS "join", 1402 | 0 AS "year", 1403 | 0 AS "position" 1404 | FROM 1405 | l_area_area 1406 | JOIN link AS l_area_area__link__link ON l_area_area__link__link.id = l_area_area.link 1407 | JOIN link_type AS l_area_area__link__link__link_type__link_type ON l_area_area__link__link__link_type__link_type.id = l_area_area__link__link.link_type 1408 | 1409 | JOIN entity_mapping start_entity 1410 | ON ( 1411 | start_entity.pk = l_area_area.entity0 1412 | AND 1413 | start_entity.entity = 'area' 1414 | ) 1415 | 1416 | JOIN entity_mapping end_entity 1417 | ON ( 1418 | end_entity.pk = l_area_area.entity1 1419 | AND 1420 | end_entity.entity = 'area' 1421 | )) 1422 | 1423 | UNION 1424 | 1425 | (SELECT 1426 | start_entity.node_id AS start, 1427 | end_entity.node_id AS end, 1428 | translate(upper(l_area_artist__link__link__link_type__link_type.name), ' ', '_') AS rel_type, 1429 | 0 AS "day", 1430 | 0 AS "month", 1431 | NULL AS "catalog_number", 1432 | NULL AS "join", 1433 | 0 AS "year", 1434 | 0 AS "position" 1435 | FROM 1436 | l_area_artist 1437 | JOIN link AS l_area_artist__link__link ON l_area_artist__link__link.id = l_area_artist.link 1438 | JOIN link_type AS l_area_artist__link__link__link_type__link_type ON l_area_artist__link__link__link_type__link_type.id = l_area_artist__link__link.link_type 1439 | 1440 | JOIN entity_mapping start_entity 1441 | ON ( 1442 | start_entity.pk = l_area_artist.entity0 1443 | AND 1444 | start_entity.entity = 'area' 1445 | ) 1446 | 1447 | JOIN entity_mapping end_entity 1448 | ON ( 1449 | end_entity.pk = l_area_artist.entity1 1450 | AND 1451 | end_entity.entity = 'artist' 1452 | )) 1453 | 1454 | UNION 1455 | 1456 | (SELECT 1457 | start_entity.node_id AS start, 1458 | end_entity.node_id AS end, 1459 | translate(upper(l_area_label__link__link__link_type__link_type.name), ' ', '_') AS rel_type, 1460 | 0 AS "day", 1461 | 0 AS "month", 1462 | NULL AS "catalog_number", 1463 | NULL AS "join", 1464 | 0 AS "year", 1465 | 0 AS "position" 1466 | FROM 1467 | l_area_label 1468 | JOIN link AS l_area_label__link__link ON l_area_label__link__link.id = l_area_label.link 1469 | JOIN link_type AS l_area_label__link__link__link_type__link_type ON l_area_label__link__link__link_type__link_type.id = l_area_label__link__link.link_type 1470 | 1471 | JOIN entity_mapping start_entity 1472 | ON ( 1473 | start_entity.pk = l_area_label.entity0 1474 | AND 1475 | start_entity.entity = 'area' 1476 | ) 1477 | 1478 | JOIN entity_mapping end_entity 1479 | ON ( 1480 | end_entity.pk = l_area_label.entity1 1481 | AND 1482 | end_entity.entity = 'label' 1483 | )) 1484 | 1485 | UNION 1486 | 1487 | (SELECT 1488 | start_entity.node_id AS start, 1489 | end_entity.node_id AS end, 1490 | translate(upper(l_area_work__link__link__link_type__link_type.name), ' ', '_') AS rel_type, 1491 | 0 AS "day", 1492 | 0 AS "month", 1493 | NULL AS "catalog_number", 1494 | NULL AS "join", 1495 | 0 AS "year", 1496 | 0 AS "position" 1497 | FROM 1498 | l_area_work 1499 | JOIN link AS l_area_work__link__link ON l_area_work__link__link.id = l_area_work.link 1500 | JOIN link_type AS l_area_work__link__link__link_type__link_type ON l_area_work__link__link__link_type__link_type.id = l_area_work__link__link.link_type 1501 | 1502 | JOIN entity_mapping start_entity 1503 | ON ( 1504 | start_entity.pk = l_area_work.entity0 1505 | AND 1506 | start_entity.entity = 'area' 1507 | ) 1508 | 1509 | JOIN entity_mapping end_entity 1510 | ON ( 1511 | end_entity.pk = l_area_work.entity1 1512 | AND 1513 | end_entity.entity = 'work' 1514 | )) 1515 | 1516 | UNION 1517 | 1518 | (SELECT 1519 | start_entity.node_id AS start, 1520 | end_entity.node_id AS end, 1521 | translate(upper(l_area_url__link__link__link_type__link_type.name), ' ', '_') AS rel_type, 1522 | 0 AS "day", 1523 | 0 AS "month", 1524 | NULL AS "catalog_number", 1525 | NULL AS "join", 1526 | 0 AS "year", 1527 | 0 AS "position" 1528 | FROM 1529 | l_area_url 1530 | JOIN link AS l_area_url__link__link ON l_area_url__link__link.id = l_area_url.link 1531 | JOIN link_type AS l_area_url__link__link__link_type__link_type ON l_area_url__link__link__link_type__link_type.id = l_area_url__link__link.link_type 1532 | 1533 | JOIN entity_mapping start_entity 1534 | ON ( 1535 | start_entity.pk = l_area_url.entity0 1536 | AND 1537 | start_entity.entity = 'area' 1538 | ) 1539 | 1540 | JOIN entity_mapping end_entity 1541 | ON ( 1542 | end_entity.pk = l_area_url.entity1 1543 | AND 1544 | end_entity.entity = 'url' 1545 | )) 1546 | 1547 | UNION 1548 | 1549 | (SELECT 1550 | start_entity.node_id AS start, 1551 | end_entity.node_id AS end, 1552 | translate(upper(l_area_recording__link__link__link_type__link_type.name), ' ', '_') AS rel_type, 1553 | 0 AS "day", 1554 | 0 AS "month", 1555 | NULL AS "catalog_number", 1556 | NULL AS "join", 1557 | 0 AS "year", 1558 | 0 AS "position" 1559 | FROM 1560 | l_area_recording 1561 | JOIN link AS l_area_recording__link__link ON l_area_recording__link__link.id = l_area_recording.link 1562 | JOIN link_type AS l_area_recording__link__link__link_type__link_type ON l_area_recording__link__link__link_type__link_type.id = l_area_recording__link__link.link_type 1563 | 1564 | JOIN entity_mapping start_entity 1565 | ON ( 1566 | start_entity.pk = l_area_recording.entity0 1567 | AND 1568 | start_entity.entity = 'area' 1569 | ) 1570 | 1571 | JOIN entity_mapping end_entity 1572 | ON ( 1573 | end_entity.pk = l_area_recording.entity1 1574 | AND 1575 | end_entity.entity = 'recording' 1576 | )) 1577 | 1578 | UNION 1579 | 1580 | (SELECT 1581 | start_entity.node_id AS start, 1582 | end_entity.node_id AS end, 1583 | translate(upper(l_area_release__link__link__link_type__link_type.name), ' ', '_') AS rel_type, 1584 | 0 AS "day", 1585 | 0 AS "month", 1586 | NULL AS "catalog_number", 1587 | NULL AS "join", 1588 | 0 AS "year", 1589 | 0 AS "position" 1590 | FROM 1591 | l_area_release 1592 | JOIN link AS l_area_release__link__link ON l_area_release__link__link.id = l_area_release.link 1593 | JOIN link_type AS l_area_release__link__link__link_type__link_type ON l_area_release__link__link__link_type__link_type.id = l_area_release__link__link.link_type 1594 | 1595 | JOIN entity_mapping start_entity 1596 | ON ( 1597 | start_entity.pk = l_area_release.entity0 1598 | AND 1599 | start_entity.entity = 'area' 1600 | ) 1601 | 1602 | JOIN entity_mapping end_entity 1603 | ON ( 1604 | end_entity.pk = l_area_release.entity1 1605 | AND 1606 | end_entity.entity = 'release' 1607 | )) 1608 | 1609 | UNION 1610 | 1611 | (SELECT 1612 | start_entity.node_id AS start, 1613 | end_entity.node_id AS end, 1614 | translate(upper(l_area_release_group__link__link__link_type__link_type.name), ' ', '_') AS rel_type, 1615 | 0 AS "day", 1616 | 0 AS "month", 1617 | NULL AS "catalog_number", 1618 | NULL AS "join", 1619 | 0 AS "year", 1620 | 0 AS "position" 1621 | FROM 1622 | l_area_release_group 1623 | JOIN link AS l_area_release_group__link__link ON l_area_release_group__link__link.id = l_area_release_group.link 1624 | JOIN link_type AS l_area_release_group__link__link__link_type__link_type ON l_area_release_group__link__link__link_type__link_type.id = l_area_release_group__link__link.link_type 1625 | 1626 | JOIN entity_mapping start_entity 1627 | ON ( 1628 | start_entity.pk = l_area_release_group.entity0 1629 | AND 1630 | start_entity.entity = 'area' 1631 | ) 1632 | 1633 | JOIN entity_mapping end_entity 1634 | ON ( 1635 | end_entity.pk = l_area_release_group.entity1 1636 | AND 1637 | end_entity.entity = 'release_group' 1638 | )) 1639 | 1640 | UNION 1641 | 1642 | (SELECT 1643 | start_entity.node_id AS start, 1644 | end_entity.node_id AS end, 1645 | translate(upper(l_area_place__link__link__link_type__link_type.name), ' ', '_') AS rel_type, 1646 | 0 AS "day", 1647 | 0 AS "month", 1648 | NULL AS "catalog_number", 1649 | NULL AS "join", 1650 | 0 AS "year", 1651 | 0 AS "position" 1652 | FROM 1653 | l_area_place 1654 | JOIN link AS l_area_place__link__link ON l_area_place__link__link.id = l_area_place.link 1655 | JOIN link_type AS l_area_place__link__link__link_type__link_type ON l_area_place__link__link__link_type__link_type.id = l_area_place__link__link.link_type 1656 | 1657 | JOIN entity_mapping start_entity 1658 | ON ( 1659 | start_entity.pk = l_area_place.entity0 1660 | AND 1661 | start_entity.entity = 'area' 1662 | ) 1663 | 1664 | JOIN entity_mapping end_entity 1665 | ON ( 1666 | end_entity.pk = l_area_place.entity1 1667 | AND 1668 | end_entity.entity = 'place' 1669 | )) 1670 | 1671 | UNION 1672 | 1673 | (SELECT 1674 | start_entity.node_id AS start, 1675 | end_entity.node_id AS end, 1676 | translate(upper(l_artist_artist__link__link__link_type__link_type.name), ' ', '_') AS rel_type, 1677 | 0 AS "day", 1678 | 0 AS "month", 1679 | NULL AS "catalog_number", 1680 | NULL AS "join", 1681 | 0 AS "year", 1682 | 0 AS "position" 1683 | FROM 1684 | l_artist_artist 1685 | JOIN link AS l_artist_artist__link__link ON l_artist_artist__link__link.id = l_artist_artist.link 1686 | JOIN link_type AS l_artist_artist__link__link__link_type__link_type ON l_artist_artist__link__link__link_type__link_type.id = l_artist_artist__link__link.link_type 1687 | 1688 | JOIN entity_mapping start_entity 1689 | ON ( 1690 | start_entity.pk = l_artist_artist.entity0 1691 | AND 1692 | start_entity.entity = 'artist' 1693 | ) 1694 | 1695 | JOIN entity_mapping end_entity 1696 | ON ( 1697 | end_entity.pk = l_artist_artist.entity1 1698 | AND 1699 | end_entity.entity = 'artist' 1700 | )) 1701 | 1702 | UNION 1703 | 1704 | (SELECT 1705 | start_entity.node_id AS start, 1706 | end_entity.node_id AS end, 1707 | translate(upper(l_artist_label__link__link__link_type__link_type.name), ' ', '_') AS rel_type, 1708 | 0 AS "day", 1709 | 0 AS "month", 1710 | NULL AS "catalog_number", 1711 | NULL AS "join", 1712 | 0 AS "year", 1713 | 0 AS "position" 1714 | FROM 1715 | l_artist_label 1716 | JOIN link AS l_artist_label__link__link ON l_artist_label__link__link.id = l_artist_label.link 1717 | JOIN link_type AS l_artist_label__link__link__link_type__link_type ON l_artist_label__link__link__link_type__link_type.id = l_artist_label__link__link.link_type 1718 | 1719 | JOIN entity_mapping start_entity 1720 | ON ( 1721 | start_entity.pk = l_artist_label.entity0 1722 | AND 1723 | start_entity.entity = 'artist' 1724 | ) 1725 | 1726 | JOIN entity_mapping end_entity 1727 | ON ( 1728 | end_entity.pk = l_artist_label.entity1 1729 | AND 1730 | end_entity.entity = 'label' 1731 | )) 1732 | 1733 | UNION 1734 | 1735 | (SELECT 1736 | start_entity.node_id AS start, 1737 | end_entity.node_id AS end, 1738 | translate(upper(l_artist_recording__link__link__link_type__link_type.name), ' ', '_') AS rel_type, 1739 | 0 AS "day", 1740 | 0 AS "month", 1741 | NULL AS "catalog_number", 1742 | NULL AS "join", 1743 | 0 AS "year", 1744 | 0 AS "position" 1745 | FROM 1746 | l_artist_recording 1747 | JOIN link AS l_artist_recording__link__link ON l_artist_recording__link__link.id = l_artist_recording.link 1748 | JOIN link_type AS l_artist_recording__link__link__link_type__link_type ON l_artist_recording__link__link__link_type__link_type.id = l_artist_recording__link__link.link_type 1749 | 1750 | JOIN entity_mapping start_entity 1751 | ON ( 1752 | start_entity.pk = l_artist_recording.entity0 1753 | AND 1754 | start_entity.entity = 'artist' 1755 | ) 1756 | 1757 | JOIN entity_mapping end_entity 1758 | ON ( 1759 | end_entity.pk = l_artist_recording.entity1 1760 | AND 1761 | end_entity.entity = 'recording' 1762 | )) 1763 | 1764 | UNION 1765 | 1766 | (SELECT 1767 | start_entity.node_id AS start, 1768 | end_entity.node_id AS end, 1769 | translate(upper(l_artist_release__link__link__link_type__link_type.name), ' ', '_') AS rel_type, 1770 | 0 AS "day", 1771 | 0 AS "month", 1772 | NULL AS "catalog_number", 1773 | NULL AS "join", 1774 | 0 AS "year", 1775 | 0 AS "position" 1776 | FROM 1777 | l_artist_release 1778 | JOIN link AS l_artist_release__link__link ON l_artist_release__link__link.id = l_artist_release.link 1779 | JOIN link_type AS l_artist_release__link__link__link_type__link_type ON l_artist_release__link__link__link_type__link_type.id = l_artist_release__link__link.link_type 1780 | 1781 | JOIN entity_mapping start_entity 1782 | ON ( 1783 | start_entity.pk = l_artist_release.entity0 1784 | AND 1785 | start_entity.entity = 'artist' 1786 | ) 1787 | 1788 | JOIN entity_mapping end_entity 1789 | ON ( 1790 | end_entity.pk = l_artist_release.entity1 1791 | AND 1792 | end_entity.entity = 'release' 1793 | )) 1794 | 1795 | UNION 1796 | 1797 | (SELECT 1798 | start_entity.node_id AS start, 1799 | end_entity.node_id AS end, 1800 | translate(upper(l_artist_release_group__link__link__link_type__link_type.name), ' ', '_') AS rel_type, 1801 | 0 AS "day", 1802 | 0 AS "month", 1803 | NULL AS "catalog_number", 1804 | NULL AS "join", 1805 | 0 AS "year", 1806 | 0 AS "position" 1807 | FROM 1808 | l_artist_release_group 1809 | JOIN link AS l_artist_release_group__link__link ON l_artist_release_group__link__link.id = l_artist_release_group.link 1810 | JOIN link_type AS l_artist_release_group__link__link__link_type__link_type ON l_artist_release_group__link__link__link_type__link_type.id = l_artist_release_group__link__link.link_type 1811 | 1812 | JOIN entity_mapping start_entity 1813 | ON ( 1814 | start_entity.pk = l_artist_release_group.entity0 1815 | AND 1816 | start_entity.entity = 'artist' 1817 | ) 1818 | 1819 | JOIN entity_mapping end_entity 1820 | ON ( 1821 | end_entity.pk = l_artist_release_group.entity1 1822 | AND 1823 | end_entity.entity = 'release_group' 1824 | )) 1825 | 1826 | UNION 1827 | 1828 | (SELECT 1829 | start_entity.node_id AS start, 1830 | end_entity.node_id AS end, 1831 | translate(upper(l_artist_url__link__link__link_type__link_type.name), ' ', '_') AS rel_type, 1832 | 0 AS "day", 1833 | 0 AS "month", 1834 | NULL AS "catalog_number", 1835 | NULL AS "join", 1836 | 0 AS "year", 1837 | 0 AS "position" 1838 | FROM 1839 | l_artist_url 1840 | JOIN link AS l_artist_url__link__link ON l_artist_url__link__link.id = l_artist_url.link 1841 | JOIN link_type AS l_artist_url__link__link__link_type__link_type ON l_artist_url__link__link__link_type__link_type.id = l_artist_url__link__link.link_type 1842 | 1843 | JOIN entity_mapping start_entity 1844 | ON ( 1845 | start_entity.pk = l_artist_url.entity0 1846 | AND 1847 | start_entity.entity = 'artist' 1848 | ) 1849 | 1850 | JOIN entity_mapping end_entity 1851 | ON ( 1852 | end_entity.pk = l_artist_url.entity1 1853 | AND 1854 | end_entity.entity = 'url' 1855 | )) 1856 | 1857 | UNION 1858 | 1859 | (SELECT 1860 | start_entity.node_id AS start, 1861 | end_entity.node_id AS end, 1862 | translate(upper(l_artist_work__link__link__link_type__link_type.name), ' ', '_') AS rel_type, 1863 | 0 AS "day", 1864 | 0 AS "month", 1865 | NULL AS "catalog_number", 1866 | NULL AS "join", 1867 | 0 AS "year", 1868 | 0 AS "position" 1869 | FROM 1870 | l_artist_work 1871 | JOIN link AS l_artist_work__link__link ON l_artist_work__link__link.id = l_artist_work.link 1872 | JOIN link_type AS l_artist_work__link__link__link_type__link_type ON l_artist_work__link__link__link_type__link_type.id = l_artist_work__link__link.link_type 1873 | 1874 | JOIN entity_mapping start_entity 1875 | ON ( 1876 | start_entity.pk = l_artist_work.entity0 1877 | AND 1878 | start_entity.entity = 'artist' 1879 | ) 1880 | 1881 | JOIN entity_mapping end_entity 1882 | ON ( 1883 | end_entity.pk = l_artist_work.entity1 1884 | AND 1885 | end_entity.entity = 'work' 1886 | )) 1887 | 1888 | UNION 1889 | 1890 | (SELECT 1891 | start_entity.node_id AS start, 1892 | end_entity.node_id AS end, 1893 | translate(upper(l_artist_place__link__link__link_type__link_type.name), ' ', '_') AS rel_type, 1894 | 0 AS "day", 1895 | 0 AS "month", 1896 | NULL AS "catalog_number", 1897 | NULL AS "join", 1898 | 0 AS "year", 1899 | 0 AS "position" 1900 | FROM 1901 | l_artist_place 1902 | JOIN link AS l_artist_place__link__link ON l_artist_place__link__link.id = l_artist_place.link 1903 | JOIN link_type AS l_artist_place__link__link__link_type__link_type ON l_artist_place__link__link__link_type__link_type.id = l_artist_place__link__link.link_type 1904 | 1905 | JOIN entity_mapping start_entity 1906 | ON ( 1907 | start_entity.pk = l_artist_place.entity0 1908 | AND 1909 | start_entity.entity = 'artist' 1910 | ) 1911 | 1912 | JOIN entity_mapping end_entity 1913 | ON ( 1914 | end_entity.pk = l_artist_place.entity1 1915 | AND 1916 | end_entity.entity = 'place' 1917 | )) 1918 | 1919 | UNION 1920 | 1921 | (SELECT 1922 | start_entity.node_id AS start, 1923 | end_entity.node_id AS end, 1924 | translate(upper(l_label_label__link__link__link_type__link_type.name), ' ', '_') AS rel_type, 1925 | 0 AS "day", 1926 | 0 AS "month", 1927 | NULL AS "catalog_number", 1928 | NULL AS "join", 1929 | 0 AS "year", 1930 | 0 AS "position" 1931 | FROM 1932 | l_label_label 1933 | JOIN link AS l_label_label__link__link ON l_label_label__link__link.id = l_label_label.link 1934 | JOIN link_type AS l_label_label__link__link__link_type__link_type ON l_label_label__link__link__link_type__link_type.id = l_label_label__link__link.link_type 1935 | 1936 | JOIN entity_mapping start_entity 1937 | ON ( 1938 | start_entity.pk = l_label_label.entity0 1939 | AND 1940 | start_entity.entity = 'label' 1941 | ) 1942 | 1943 | JOIN entity_mapping end_entity 1944 | ON ( 1945 | end_entity.pk = l_label_label.entity1 1946 | AND 1947 | end_entity.entity = 'label' 1948 | )) 1949 | 1950 | UNION 1951 | 1952 | (SELECT 1953 | start_entity.node_id AS start, 1954 | end_entity.node_id AS end, 1955 | translate(upper(l_label_recording__link__link__link_type__link_type.name), ' ', '_') AS rel_type, 1956 | 0 AS "day", 1957 | 0 AS "month", 1958 | NULL AS "catalog_number", 1959 | NULL AS "join", 1960 | 0 AS "year", 1961 | 0 AS "position" 1962 | FROM 1963 | l_label_recording 1964 | JOIN link AS l_label_recording__link__link ON l_label_recording__link__link.id = l_label_recording.link 1965 | JOIN link_type AS l_label_recording__link__link__link_type__link_type ON l_label_recording__link__link__link_type__link_type.id = l_label_recording__link__link.link_type 1966 | 1967 | JOIN entity_mapping start_entity 1968 | ON ( 1969 | start_entity.pk = l_label_recording.entity0 1970 | AND 1971 | start_entity.entity = 'label' 1972 | ) 1973 | 1974 | JOIN entity_mapping end_entity 1975 | ON ( 1976 | end_entity.pk = l_label_recording.entity1 1977 | AND 1978 | end_entity.entity = 'recording' 1979 | )) 1980 | 1981 | UNION 1982 | 1983 | (SELECT 1984 | start_entity.node_id AS start, 1985 | end_entity.node_id AS end, 1986 | translate(upper(l_label_release__link__link__link_type__link_type.name), ' ', '_') AS rel_type, 1987 | 0 AS "day", 1988 | 0 AS "month", 1989 | NULL AS "catalog_number", 1990 | NULL AS "join", 1991 | 0 AS "year", 1992 | 0 AS "position" 1993 | FROM 1994 | l_label_release 1995 | JOIN link AS l_label_release__link__link ON l_label_release__link__link.id = l_label_release.link 1996 | JOIN link_type AS l_label_release__link__link__link_type__link_type ON l_label_release__link__link__link_type__link_type.id = l_label_release__link__link.link_type 1997 | 1998 | JOIN entity_mapping start_entity 1999 | ON ( 2000 | start_entity.pk = l_label_release.entity0 2001 | AND 2002 | start_entity.entity = 'label' 2003 | ) 2004 | 2005 | JOIN entity_mapping end_entity 2006 | ON ( 2007 | end_entity.pk = l_label_release.entity1 2008 | AND 2009 | end_entity.entity = 'release' 2010 | )) 2011 | 2012 | UNION 2013 | 2014 | (SELECT 2015 | start_entity.node_id AS start, 2016 | end_entity.node_id AS end, 2017 | translate(upper(l_label_release_group__link__link__link_type__link_type.name), ' ', '_') AS rel_type, 2018 | 0 AS "day", 2019 | 0 AS "month", 2020 | NULL AS "catalog_number", 2021 | NULL AS "join", 2022 | 0 AS "year", 2023 | 0 AS "position" 2024 | FROM 2025 | l_label_release_group 2026 | JOIN link AS l_label_release_group__link__link ON l_label_release_group__link__link.id = l_label_release_group.link 2027 | JOIN link_type AS l_label_release_group__link__link__link_type__link_type ON l_label_release_group__link__link__link_type__link_type.id = l_label_release_group__link__link.link_type 2028 | 2029 | JOIN entity_mapping start_entity 2030 | ON ( 2031 | start_entity.pk = l_label_release_group.entity0 2032 | AND 2033 | start_entity.entity = 'label' 2034 | ) 2035 | 2036 | JOIN entity_mapping end_entity 2037 | ON ( 2038 | end_entity.pk = l_label_release_group.entity1 2039 | AND 2040 | end_entity.entity = 'release_group' 2041 | )) 2042 | 2043 | UNION 2044 | 2045 | (SELECT 2046 | start_entity.node_id AS start, 2047 | end_entity.node_id AS end, 2048 | translate(upper(l_label_url__link__link__link_type__link_type.name), ' ', '_') AS rel_type, 2049 | 0 AS "day", 2050 | 0 AS "month", 2051 | NULL AS "catalog_number", 2052 | NULL AS "join", 2053 | 0 AS "year", 2054 | 0 AS "position" 2055 | FROM 2056 | l_label_url 2057 | JOIN link AS l_label_url__link__link ON l_label_url__link__link.id = l_label_url.link 2058 | JOIN link_type AS l_label_url__link__link__link_type__link_type ON l_label_url__link__link__link_type__link_type.id = l_label_url__link__link.link_type 2059 | 2060 | JOIN entity_mapping start_entity 2061 | ON ( 2062 | start_entity.pk = l_label_url.entity0 2063 | AND 2064 | start_entity.entity = 'label' 2065 | ) 2066 | 2067 | JOIN entity_mapping end_entity 2068 | ON ( 2069 | end_entity.pk = l_label_url.entity1 2070 | AND 2071 | end_entity.entity = 'url' 2072 | )) 2073 | 2074 | UNION 2075 | 2076 | (SELECT 2077 | start_entity.node_id AS start, 2078 | end_entity.node_id AS end, 2079 | translate(upper(l_label_work__link__link__link_type__link_type.name), ' ', '_') AS rel_type, 2080 | 0 AS "day", 2081 | 0 AS "month", 2082 | NULL AS "catalog_number", 2083 | NULL AS "join", 2084 | 0 AS "year", 2085 | 0 AS "position" 2086 | FROM 2087 | l_label_work 2088 | JOIN link AS l_label_work__link__link ON l_label_work__link__link.id = l_label_work.link 2089 | JOIN link_type AS l_label_work__link__link__link_type__link_type ON l_label_work__link__link__link_type__link_type.id = l_label_work__link__link.link_type 2090 | 2091 | JOIN entity_mapping start_entity 2092 | ON ( 2093 | start_entity.pk = l_label_work.entity0 2094 | AND 2095 | start_entity.entity = 'label' 2096 | ) 2097 | 2098 | JOIN entity_mapping end_entity 2099 | ON ( 2100 | end_entity.pk = l_label_work.entity1 2101 | AND 2102 | end_entity.entity = 'work' 2103 | )) 2104 | 2105 | UNION 2106 | 2107 | (SELECT 2108 | start_entity.node_id AS start, 2109 | end_entity.node_id AS end, 2110 | translate(upper(l_label_place__link__link__link_type__link_type.name), ' ', '_') AS rel_type, 2111 | 0 AS "day", 2112 | 0 AS "month", 2113 | NULL AS "catalog_number", 2114 | NULL AS "join", 2115 | 0 AS "year", 2116 | 0 AS "position" 2117 | FROM 2118 | l_label_place 2119 | JOIN link AS l_label_place__link__link ON l_label_place__link__link.id = l_label_place.link 2120 | JOIN link_type AS l_label_place__link__link__link_type__link_type ON l_label_place__link__link__link_type__link_type.id = l_label_place__link__link.link_type 2121 | 2122 | JOIN entity_mapping start_entity 2123 | ON ( 2124 | start_entity.pk = l_label_place.entity0 2125 | AND 2126 | start_entity.entity = 'label' 2127 | ) 2128 | 2129 | JOIN entity_mapping end_entity 2130 | ON ( 2131 | end_entity.pk = l_label_place.entity1 2132 | AND 2133 | end_entity.entity = 'place' 2134 | )) 2135 | 2136 | UNION 2137 | 2138 | (SELECT 2139 | start_entity.node_id AS start, 2140 | end_entity.node_id AS end, 2141 | translate(upper(l_place_place__link__link__link_type__link_type.name), ' ', '_') AS rel_type, 2142 | 0 AS "day", 2143 | 0 AS "month", 2144 | NULL AS "catalog_number", 2145 | NULL AS "join", 2146 | 0 AS "year", 2147 | 0 AS "position" 2148 | FROM 2149 | l_place_place 2150 | JOIN link AS l_place_place__link__link ON l_place_place__link__link.id = l_place_place.link 2151 | JOIN link_type AS l_place_place__link__link__link_type__link_type ON l_place_place__link__link__link_type__link_type.id = l_place_place__link__link.link_type 2152 | 2153 | JOIN entity_mapping start_entity 2154 | ON ( 2155 | start_entity.pk = l_place_place.entity0 2156 | AND 2157 | start_entity.entity = 'place' 2158 | ) 2159 | 2160 | JOIN entity_mapping end_entity 2161 | ON ( 2162 | end_entity.pk = l_place_place.entity1 2163 | AND 2164 | end_entity.entity = 'place' 2165 | )) 2166 | 2167 | UNION 2168 | 2169 | (SELECT 2170 | start_entity.node_id AS start, 2171 | end_entity.node_id AS end, 2172 | translate(upper(l_place_recording__link__link__link_type__link_type.name), ' ', '_') AS rel_type, 2173 | 0 AS "day", 2174 | 0 AS "month", 2175 | NULL AS "catalog_number", 2176 | NULL AS "join", 2177 | 0 AS "year", 2178 | 0 AS "position" 2179 | FROM 2180 | l_place_recording 2181 | JOIN link AS l_place_recording__link__link ON l_place_recording__link__link.id = l_place_recording.link 2182 | JOIN link_type AS l_place_recording__link__link__link_type__link_type ON l_place_recording__link__link__link_type__link_type.id = l_place_recording__link__link.link_type 2183 | 2184 | JOIN entity_mapping start_entity 2185 | ON ( 2186 | start_entity.pk = l_place_recording.entity0 2187 | AND 2188 | start_entity.entity = 'place' 2189 | ) 2190 | 2191 | JOIN entity_mapping end_entity 2192 | ON ( 2193 | end_entity.pk = l_place_recording.entity1 2194 | AND 2195 | end_entity.entity = 'recording' 2196 | )) 2197 | 2198 | UNION 2199 | 2200 | (SELECT 2201 | start_entity.node_id AS start, 2202 | end_entity.node_id AS end, 2203 | translate(upper(l_place_release__link__link__link_type__link_type.name), ' ', '_') AS rel_type, 2204 | 0 AS "day", 2205 | 0 AS "month", 2206 | NULL AS "catalog_number", 2207 | NULL AS "join", 2208 | 0 AS "year", 2209 | 0 AS "position" 2210 | FROM 2211 | l_place_release 2212 | JOIN link AS l_place_release__link__link ON l_place_release__link__link.id = l_place_release.link 2213 | JOIN link_type AS l_place_release__link__link__link_type__link_type ON l_place_release__link__link__link_type__link_type.id = l_place_release__link__link.link_type 2214 | 2215 | JOIN entity_mapping start_entity 2216 | ON ( 2217 | start_entity.pk = l_place_release.entity0 2218 | AND 2219 | start_entity.entity = 'place' 2220 | ) 2221 | 2222 | JOIN entity_mapping end_entity 2223 | ON ( 2224 | end_entity.pk = l_place_release.entity1 2225 | AND 2226 | end_entity.entity = 'release' 2227 | )) 2228 | 2229 | UNION 2230 | 2231 | (SELECT 2232 | start_entity.node_id AS start, 2233 | end_entity.node_id AS end, 2234 | translate(upper(l_place_release_group__link__link__link_type__link_type.name), ' ', '_') AS rel_type, 2235 | 0 AS "day", 2236 | 0 AS "month", 2237 | NULL AS "catalog_number", 2238 | NULL AS "join", 2239 | 0 AS "year", 2240 | 0 AS "position" 2241 | FROM 2242 | l_place_release_group 2243 | JOIN link AS l_place_release_group__link__link ON l_place_release_group__link__link.id = l_place_release_group.link 2244 | JOIN link_type AS l_place_release_group__link__link__link_type__link_type ON l_place_release_group__link__link__link_type__link_type.id = l_place_release_group__link__link.link_type 2245 | 2246 | JOIN entity_mapping start_entity 2247 | ON ( 2248 | start_entity.pk = l_place_release_group.entity0 2249 | AND 2250 | start_entity.entity = 'place' 2251 | ) 2252 | 2253 | JOIN entity_mapping end_entity 2254 | ON ( 2255 | end_entity.pk = l_place_release_group.entity1 2256 | AND 2257 | end_entity.entity = 'release_group' 2258 | )) 2259 | 2260 | UNION 2261 | 2262 | (SELECT 2263 | start_entity.node_id AS start, 2264 | end_entity.node_id AS end, 2265 | translate(upper(l_place_url__link__link__link_type__link_type.name), ' ', '_') AS rel_type, 2266 | 0 AS "day", 2267 | 0 AS "month", 2268 | NULL AS "catalog_number", 2269 | NULL AS "join", 2270 | 0 AS "year", 2271 | 0 AS "position" 2272 | FROM 2273 | l_place_url 2274 | JOIN link AS l_place_url__link__link ON l_place_url__link__link.id = l_place_url.link 2275 | JOIN link_type AS l_place_url__link__link__link_type__link_type ON l_place_url__link__link__link_type__link_type.id = l_place_url__link__link.link_type 2276 | 2277 | JOIN entity_mapping start_entity 2278 | ON ( 2279 | start_entity.pk = l_place_url.entity0 2280 | AND 2281 | start_entity.entity = 'place' 2282 | ) 2283 | 2284 | JOIN entity_mapping end_entity 2285 | ON ( 2286 | end_entity.pk = l_place_url.entity1 2287 | AND 2288 | end_entity.entity = 'url' 2289 | )) 2290 | 2291 | UNION 2292 | 2293 | (SELECT 2294 | start_entity.node_id AS start, 2295 | end_entity.node_id AS end, 2296 | translate(upper(l_place_work__link__link__link_type__link_type.name), ' ', '_') AS rel_type, 2297 | 0 AS "day", 2298 | 0 AS "month", 2299 | NULL AS "catalog_number", 2300 | NULL AS "join", 2301 | 0 AS "year", 2302 | 0 AS "position" 2303 | FROM 2304 | l_place_work 2305 | JOIN link AS l_place_work__link__link ON l_place_work__link__link.id = l_place_work.link 2306 | JOIN link_type AS l_place_work__link__link__link_type__link_type ON l_place_work__link__link__link_type__link_type.id = l_place_work__link__link.link_type 2307 | 2308 | JOIN entity_mapping start_entity 2309 | ON ( 2310 | start_entity.pk = l_place_work.entity0 2311 | AND 2312 | start_entity.entity = 'place' 2313 | ) 2314 | 2315 | JOIN entity_mapping end_entity 2316 | ON ( 2317 | end_entity.pk = l_place_work.entity1 2318 | AND 2319 | end_entity.entity = 'work' 2320 | )) 2321 | 2322 | UNION 2323 | 2324 | (SELECT 2325 | start_entity.node_id AS start, 2326 | end_entity.node_id AS end, 2327 | translate(upper(l_recording_recording__link__link__link_type__link_type.name), ' ', '_') AS rel_type, 2328 | 0 AS "day", 2329 | 0 AS "month", 2330 | NULL AS "catalog_number", 2331 | NULL AS "join", 2332 | 0 AS "year", 2333 | 0 AS "position" 2334 | FROM 2335 | l_recording_recording 2336 | JOIN link AS l_recording_recording__link__link ON l_recording_recording__link__link.id = l_recording_recording.link 2337 | JOIN link_type AS l_recording_recording__link__link__link_type__link_type ON l_recording_recording__link__link__link_type__link_type.id = l_recording_recording__link__link.link_type 2338 | 2339 | JOIN entity_mapping start_entity 2340 | ON ( 2341 | start_entity.pk = l_recording_recording.entity0 2342 | AND 2343 | start_entity.entity = 'recording' 2344 | ) 2345 | 2346 | JOIN entity_mapping end_entity 2347 | ON ( 2348 | end_entity.pk = l_recording_recording.entity1 2349 | AND 2350 | end_entity.entity = 'recording' 2351 | )) 2352 | 2353 | UNION 2354 | 2355 | (SELECT 2356 | start_entity.node_id AS start, 2357 | end_entity.node_id AS end, 2358 | translate(upper(l_recording_release__link__link__link_type__link_type.name), ' ', '_') AS rel_type, 2359 | 0 AS "day", 2360 | 0 AS "month", 2361 | NULL AS "catalog_number", 2362 | NULL AS "join", 2363 | 0 AS "year", 2364 | 0 AS "position" 2365 | FROM 2366 | l_recording_release 2367 | JOIN link AS l_recording_release__link__link ON l_recording_release__link__link.id = l_recording_release.link 2368 | JOIN link_type AS l_recording_release__link__link__link_type__link_type ON l_recording_release__link__link__link_type__link_type.id = l_recording_release__link__link.link_type 2369 | 2370 | JOIN entity_mapping start_entity 2371 | ON ( 2372 | start_entity.pk = l_recording_release.entity0 2373 | AND 2374 | start_entity.entity = 'recording' 2375 | ) 2376 | 2377 | JOIN entity_mapping end_entity 2378 | ON ( 2379 | end_entity.pk = l_recording_release.entity1 2380 | AND 2381 | end_entity.entity = 'release' 2382 | )) 2383 | 2384 | UNION 2385 | 2386 | (SELECT 2387 | start_entity.node_id AS start, 2388 | end_entity.node_id AS end, 2389 | translate(upper(l_recording_release_group__link__link__link_type__link_type.name), ' ', '_') AS rel_type, 2390 | 0 AS "day", 2391 | 0 AS "month", 2392 | NULL AS "catalog_number", 2393 | NULL AS "join", 2394 | 0 AS "year", 2395 | 0 AS "position" 2396 | FROM 2397 | l_recording_release_group 2398 | JOIN link AS l_recording_release_group__link__link ON l_recording_release_group__link__link.id = l_recording_release_group.link 2399 | JOIN link_type AS l_recording_release_group__link__link__link_type__link_type ON l_recording_release_group__link__link__link_type__link_type.id = l_recording_release_group__link__link.link_type 2400 | 2401 | JOIN entity_mapping start_entity 2402 | ON ( 2403 | start_entity.pk = l_recording_release_group.entity0 2404 | AND 2405 | start_entity.entity = 'recording' 2406 | ) 2407 | 2408 | JOIN entity_mapping end_entity 2409 | ON ( 2410 | end_entity.pk = l_recording_release_group.entity1 2411 | AND 2412 | end_entity.entity = 'release_group' 2413 | )) 2414 | 2415 | UNION 2416 | 2417 | (SELECT 2418 | start_entity.node_id AS start, 2419 | end_entity.node_id AS end, 2420 | translate(upper(l_recording_url__link__link__link_type__link_type.name), ' ', '_') AS rel_type, 2421 | 0 AS "day", 2422 | 0 AS "month", 2423 | NULL AS "catalog_number", 2424 | NULL AS "join", 2425 | 0 AS "year", 2426 | 0 AS "position" 2427 | FROM 2428 | l_recording_url 2429 | JOIN link AS l_recording_url__link__link ON l_recording_url__link__link.id = l_recording_url.link 2430 | JOIN link_type AS l_recording_url__link__link__link_type__link_type ON l_recording_url__link__link__link_type__link_type.id = l_recording_url__link__link.link_type 2431 | 2432 | JOIN entity_mapping start_entity 2433 | ON ( 2434 | start_entity.pk = l_recording_url.entity0 2435 | AND 2436 | start_entity.entity = 'recording' 2437 | ) 2438 | 2439 | JOIN entity_mapping end_entity 2440 | ON ( 2441 | end_entity.pk = l_recording_url.entity1 2442 | AND 2443 | end_entity.entity = 'url' 2444 | )) 2445 | 2446 | UNION 2447 | 2448 | (SELECT 2449 | start_entity.node_id AS start, 2450 | end_entity.node_id AS end, 2451 | translate(upper(l_recording_work__link__link__link_type__link_type.name), ' ', '_') AS rel_type, 2452 | 0 AS "day", 2453 | 0 AS "month", 2454 | NULL AS "catalog_number", 2455 | NULL AS "join", 2456 | 0 AS "year", 2457 | 0 AS "position" 2458 | FROM 2459 | l_recording_work 2460 | JOIN link AS l_recording_work__link__link ON l_recording_work__link__link.id = l_recording_work.link 2461 | JOIN link_type AS l_recording_work__link__link__link_type__link_type ON l_recording_work__link__link__link_type__link_type.id = l_recording_work__link__link.link_type 2462 | 2463 | JOIN entity_mapping start_entity 2464 | ON ( 2465 | start_entity.pk = l_recording_work.entity0 2466 | AND 2467 | start_entity.entity = 'recording' 2468 | ) 2469 | 2470 | JOIN entity_mapping end_entity 2471 | ON ( 2472 | end_entity.pk = l_recording_work.entity1 2473 | AND 2474 | end_entity.entity = 'work' 2475 | )) 2476 | 2477 | UNION 2478 | 2479 | (SELECT 2480 | start_entity.node_id AS start, 2481 | end_entity.node_id AS end, 2482 | translate(upper(l_release_release__link__link__link_type__link_type.name), ' ', '_') AS rel_type, 2483 | 0 AS "day", 2484 | 0 AS "month", 2485 | NULL AS "catalog_number", 2486 | NULL AS "join", 2487 | 0 AS "year", 2488 | 0 AS "position" 2489 | FROM 2490 | l_release_release 2491 | JOIN link AS l_release_release__link__link ON l_release_release__link__link.id = l_release_release.link 2492 | JOIN link_type AS l_release_release__link__link__link_type__link_type ON l_release_release__link__link__link_type__link_type.id = l_release_release__link__link.link_type 2493 | 2494 | JOIN entity_mapping start_entity 2495 | ON ( 2496 | start_entity.pk = l_release_release.entity0 2497 | AND 2498 | start_entity.entity = 'release' 2499 | ) 2500 | 2501 | JOIN entity_mapping end_entity 2502 | ON ( 2503 | end_entity.pk = l_release_release.entity1 2504 | AND 2505 | end_entity.entity = 'release' 2506 | )) 2507 | 2508 | UNION 2509 | 2510 | (SELECT 2511 | start_entity.node_id AS start, 2512 | end_entity.node_id AS end, 2513 | translate(upper(l_release_release_group__link__link__link_type__link_type.name), ' ', '_') AS rel_type, 2514 | 0 AS "day", 2515 | 0 AS "month", 2516 | NULL AS "catalog_number", 2517 | NULL AS "join", 2518 | 0 AS "year", 2519 | 0 AS "position" 2520 | FROM 2521 | l_release_release_group 2522 | JOIN link AS l_release_release_group__link__link ON l_release_release_group__link__link.id = l_release_release_group.link 2523 | JOIN link_type AS l_release_release_group__link__link__link_type__link_type ON l_release_release_group__link__link__link_type__link_type.id = l_release_release_group__link__link.link_type 2524 | 2525 | JOIN entity_mapping start_entity 2526 | ON ( 2527 | start_entity.pk = l_release_release_group.entity0 2528 | AND 2529 | start_entity.entity = 'release' 2530 | ) 2531 | 2532 | JOIN entity_mapping end_entity 2533 | ON ( 2534 | end_entity.pk = l_release_release_group.entity1 2535 | AND 2536 | end_entity.entity = 'release_group' 2537 | )) 2538 | 2539 | UNION 2540 | 2541 | (SELECT 2542 | start_entity.node_id AS start, 2543 | end_entity.node_id AS end, 2544 | translate(upper(l_release_url__link__link__link_type__link_type.name), ' ', '_') AS rel_type, 2545 | 0 AS "day", 2546 | 0 AS "month", 2547 | NULL AS "catalog_number", 2548 | NULL AS "join", 2549 | 0 AS "year", 2550 | 0 AS "position" 2551 | FROM 2552 | l_release_url 2553 | JOIN link AS l_release_url__link__link ON l_release_url__link__link.id = l_release_url.link 2554 | JOIN link_type AS l_release_url__link__link__link_type__link_type ON l_release_url__link__link__link_type__link_type.id = l_release_url__link__link.link_type 2555 | 2556 | JOIN entity_mapping start_entity 2557 | ON ( 2558 | start_entity.pk = l_release_url.entity0 2559 | AND 2560 | start_entity.entity = 'release' 2561 | ) 2562 | 2563 | JOIN entity_mapping end_entity 2564 | ON ( 2565 | end_entity.pk = l_release_url.entity1 2566 | AND 2567 | end_entity.entity = 'url' 2568 | )) 2569 | 2570 | UNION 2571 | 2572 | (SELECT 2573 | start_entity.node_id AS start, 2574 | end_entity.node_id AS end, 2575 | translate(upper(l_release_work__link__link__link_type__link_type.name), ' ', '_') AS rel_type, 2576 | 0 AS "day", 2577 | 0 AS "month", 2578 | NULL AS "catalog_number", 2579 | NULL AS "join", 2580 | 0 AS "year", 2581 | 0 AS "position" 2582 | FROM 2583 | l_release_work 2584 | JOIN link AS l_release_work__link__link ON l_release_work__link__link.id = l_release_work.link 2585 | JOIN link_type AS l_release_work__link__link__link_type__link_type ON l_release_work__link__link__link_type__link_type.id = l_release_work__link__link.link_type 2586 | 2587 | JOIN entity_mapping start_entity 2588 | ON ( 2589 | start_entity.pk = l_release_work.entity0 2590 | AND 2591 | start_entity.entity = 'release' 2592 | ) 2593 | 2594 | JOIN entity_mapping end_entity 2595 | ON ( 2596 | end_entity.pk = l_release_work.entity1 2597 | AND 2598 | end_entity.entity = 'work' 2599 | )) 2600 | 2601 | UNION 2602 | 2603 | (SELECT 2604 | start_entity.node_id AS start, 2605 | end_entity.node_id AS end, 2606 | translate(upper(l_release_group_release_group__link__link__link_type__link_type.name), ' ', '_') AS rel_type, 2607 | 0 AS "day", 2608 | 0 AS "month", 2609 | NULL AS "catalog_number", 2610 | NULL AS "join", 2611 | 0 AS "year", 2612 | 0 AS "position" 2613 | FROM 2614 | l_release_group_release_group 2615 | JOIN link AS l_release_group_release_group__link__link ON l_release_group_release_group__link__link.id = l_release_group_release_group.link 2616 | JOIN link_type AS l_release_group_release_group__link__link__link_type__link_type ON l_release_group_release_group__link__link__link_type__link_type.id = l_release_group_release_group__link__link.link_type 2617 | 2618 | JOIN entity_mapping start_entity 2619 | ON ( 2620 | start_entity.pk = l_release_group_release_group.entity0 2621 | AND 2622 | start_entity.entity = 'release_group' 2623 | ) 2624 | 2625 | JOIN entity_mapping end_entity 2626 | ON ( 2627 | end_entity.pk = l_release_group_release_group.entity1 2628 | AND 2629 | end_entity.entity = 'release_group' 2630 | )) 2631 | 2632 | UNION 2633 | 2634 | (SELECT 2635 | start_entity.node_id AS start, 2636 | end_entity.node_id AS end, 2637 | translate(upper(l_release_group_url__link__link__link_type__link_type.name), ' ', '_') AS rel_type, 2638 | 0 AS "day", 2639 | 0 AS "month", 2640 | NULL AS "catalog_number", 2641 | NULL AS "join", 2642 | 0 AS "year", 2643 | 0 AS "position" 2644 | FROM 2645 | l_release_group_url 2646 | JOIN link AS l_release_group_url__link__link ON l_release_group_url__link__link.id = l_release_group_url.link 2647 | JOIN link_type AS l_release_group_url__link__link__link_type__link_type ON l_release_group_url__link__link__link_type__link_type.id = l_release_group_url__link__link.link_type 2648 | 2649 | JOIN entity_mapping start_entity 2650 | ON ( 2651 | start_entity.pk = l_release_group_url.entity0 2652 | AND 2653 | start_entity.entity = 'release_group' 2654 | ) 2655 | 2656 | JOIN entity_mapping end_entity 2657 | ON ( 2658 | end_entity.pk = l_release_group_url.entity1 2659 | AND 2660 | end_entity.entity = 'url' 2661 | )) 2662 | 2663 | UNION 2664 | 2665 | (SELECT 2666 | start_entity.node_id AS start, 2667 | end_entity.node_id AS end, 2668 | translate(upper(l_release_group_work__link__link__link_type__link_type.name), ' ', '_') AS rel_type, 2669 | 0 AS "day", 2670 | 0 AS "month", 2671 | NULL AS "catalog_number", 2672 | NULL AS "join", 2673 | 0 AS "year", 2674 | 0 AS "position" 2675 | FROM 2676 | l_release_group_work 2677 | JOIN link AS l_release_group_work__link__link ON l_release_group_work__link__link.id = l_release_group_work.link 2678 | JOIN link_type AS l_release_group_work__link__link__link_type__link_type ON l_release_group_work__link__link__link_type__link_type.id = l_release_group_work__link__link.link_type 2679 | 2680 | JOIN entity_mapping start_entity 2681 | ON ( 2682 | start_entity.pk = l_release_group_work.entity0 2683 | AND 2684 | start_entity.entity = 'release_group' 2685 | ) 2686 | 2687 | JOIN entity_mapping end_entity 2688 | ON ( 2689 | end_entity.pk = l_release_group_work.entity1 2690 | AND 2691 | end_entity.entity = 'work' 2692 | )) 2693 | 2694 | UNION 2695 | 2696 | (SELECT 2697 | start_entity.node_id AS start, 2698 | end_entity.node_id AS end, 2699 | translate(upper(l_url_url__link__link__link_type__link_type.name), ' ', '_') AS rel_type, 2700 | 0 AS "day", 2701 | 0 AS "month", 2702 | NULL AS "catalog_number", 2703 | NULL AS "join", 2704 | 0 AS "year", 2705 | 0 AS "position" 2706 | FROM 2707 | l_url_url 2708 | JOIN link AS l_url_url__link__link ON l_url_url__link__link.id = l_url_url.link 2709 | JOIN link_type AS l_url_url__link__link__link_type__link_type ON l_url_url__link__link__link_type__link_type.id = l_url_url__link__link.link_type 2710 | 2711 | JOIN entity_mapping start_entity 2712 | ON ( 2713 | start_entity.pk = l_url_url.entity0 2714 | AND 2715 | start_entity.entity = 'url' 2716 | ) 2717 | 2718 | JOIN entity_mapping end_entity 2719 | ON ( 2720 | end_entity.pk = l_url_url.entity1 2721 | AND 2722 | end_entity.entity = 'url' 2723 | )) 2724 | 2725 | UNION 2726 | 2727 | (SELECT 2728 | start_entity.node_id AS start, 2729 | end_entity.node_id AS end, 2730 | translate(upper(l_url_work__link__link__link_type__link_type.name), ' ', '_') AS rel_type, 2731 | 0 AS "day", 2732 | 0 AS "month", 2733 | NULL AS "catalog_number", 2734 | NULL AS "join", 2735 | 0 AS "year", 2736 | 0 AS "position" 2737 | FROM 2738 | l_url_work 2739 | JOIN link AS l_url_work__link__link ON l_url_work__link__link.id = l_url_work.link 2740 | JOIN link_type AS l_url_work__link__link__link_type__link_type ON l_url_work__link__link__link_type__link_type.id = l_url_work__link__link.link_type 2741 | 2742 | JOIN entity_mapping start_entity 2743 | ON ( 2744 | start_entity.pk = l_url_work.entity0 2745 | AND 2746 | start_entity.entity = 'url' 2747 | ) 2748 | 2749 | JOIN entity_mapping end_entity 2750 | ON ( 2751 | end_entity.pk = l_url_work.entity1 2752 | AND 2753 | end_entity.entity = 'work' 2754 | )) 2755 | 2756 | UNION 2757 | 2758 | (SELECT 2759 | start_entity.node_id AS start, 2760 | end_entity.node_id AS end, 2761 | translate(upper(l_work_work__link__link__link_type__link_type.name), ' ', '_') AS rel_type, 2762 | 0 AS "day", 2763 | 0 AS "month", 2764 | NULL AS "catalog_number", 2765 | NULL AS "join", 2766 | 0 AS "year", 2767 | 0 AS "position" 2768 | FROM 2769 | l_work_work 2770 | JOIN link AS l_work_work__link__link ON l_work_work__link__link.id = l_work_work.link 2771 | JOIN link_type AS l_work_work__link__link__link_type__link_type ON l_work_work__link__link__link_type__link_type.id = l_work_work__link__link.link_type 2772 | 2773 | JOIN entity_mapping start_entity 2774 | ON ( 2775 | start_entity.pk = l_work_work.entity0 2776 | AND 2777 | start_entity.entity = 'work' 2778 | ) 2779 | 2780 | JOIN entity_mapping end_entity 2781 | ON ( 2782 | end_entity.pk = l_work_work.entity1 2783 | AND 2784 | end_entity.entity = 'work' 2785 | )) 2786 | ) 2787 | TO '/tmp/musicbrainz__rels__full.csv' CSV HEADER 2788 | DELIMITER E'\t' 2789 | ENCODING 'UTF8'; 2790 | 2791 | -------------------------------------------------------------------------------- /examples/musicbrainz/queries.cyp: -------------------------------------------------------------------------------- 1 | // Labels and artists 2 | 3 | // American artists signed on British record labels 4 | START usa=node:mb_fulltext(name="United States"), 5 | gb=node:mb_fulltext(name="United Kingdom") 6 | MATCH (usa:Country), (gb:Country), 7 | (a:Artist)-[:FROM_AREA]-(usa), 8 | (a:Artist)-[:RECORDING_CONTRACT]-(l:Label), 9 | (l)-[:FROM_AREA]-(gb) 10 | RETURN a,l,usa,gb 11 | 12 | // Countries and their anthem 13 | MATCH (c:Country) 14 | WITH c 15 | MATCH c-[:ANTHEM]-(w:Work) 16 | RETURN c.name, w.name; 17 | 18 | 19 | // Countries without an anthem (in MusicBrainz) 20 | MATCH (c:Country) 21 | WHERE NOT c-[:ANTHEM]-(:Work) 22 | RETURN c.name; 23 | 24 | 25 | // list all Album releases for an artist by name 26 | START abba=node:mb_fulltext(name='ABBA') 27 | MATCH (abba:Artist) 28 | WITH abba 29 | MATCH (abba)-[:CREDITED_AS]->()-[:CREDITED_ON]->(rg:ReleaseGroup:Album), 30 | rg-[:PART_OF]-(r:Release:Official) 31 | RETURN rg.name, count(r); 32 | 33 | //even faster is you know the cedit name 34 | START abba=node:mb_fulltext(name='ABBA') 35 | MATCH (abba:ArtistCredit) 36 | WITH abba 37 | MATCH abba-[:CREDITED_ON]->(rg:ReleaseGroup:Album), 38 | rg-[:PART_OF]-(r:Release:Official) 39 | RETURN rg.name, count(r); 40 | 41 | //oder by DESCending number of releases 42 | START ledzep=node:mb_fulltext(name='Led Zeppelin') 43 | MATCH (ledzep:ArtistCredit) 44 | WITH ledzep 45 | MATCH ledzep-[:CREDITED_ON]->(rg:ReleaseGroup:Album), 46 | rg-[:PART_OF]-(r:Release:Official) 47 | RETURN rg.name, count(r) AS sum 48 | ORDER BY sum DESC; 49 | 50 | // war of first names 51 | START ac=node:mb_fulltext("name:Bob") 52 | MATCH (ac:ArtistCredit) 53 | WITH ac 54 | MATCH ac-[:CREDITED_ON]->(rg:ReleaseGroup:Album), 55 | rg-[:PART_OF]-(r:Release:Official) 56 | RETURN ac.name, count(r) AS sum 57 | ORDER BY sum DESC LIMIT 10; 58 | +---------------------------------------------------------+ 59 | | ac.name | sum | 60 | +---------------------------------------------------------+ 61 | | "Bob Dylan" | 45 | 62 | | "Bob Seger & The Silver Bullet Band" | 10 | 63 | | "Bob Rivers" | 9 | 64 | | "Bob Mould" | 9 | 65 | | "Bob & Tom" | 8 | 66 | | "bob hund" | 6 | 67 | | "Bob Zany" | 1 | 68 | | "Bob Hansson & Institutet för Höghastighetskonst" | 1 | 69 | | "Bob James" | 1 | 70 | | "Bob Schneider" | 1 | 71 | +---------------------------------------------------------+ 72 | 73 | 74 | // WHo released the most? 75 | MATCH (ac:ArtistCredit)-[:CREDITED_ON]->(rg:ReleaseGroup:Album), 76 | rg-[:PART_OF]-(r:Release:Official) 77 | RETURN ac.name, count(r) AS sum 78 | ORDER BY sum DESC LIMIT 10; 79 | 80 | +----------------------------------+ 81 | | ac.name | sum | 82 | +----------------------------------+ 83 | | "Various Artists" | 2097 | 84 | | "King Crimson" | 95 | 85 | | "Jethro Tull" | 93 | 86 | | "David Bowie" | 84 | 87 | | "Miles Davis" | 68 | 88 | | "Uriah Heep" | 68 | 89 | | "The Legendary Pink Dots" | 64 | 90 | | "The Fall" | 56 | 91 | | "Joe Jackson" | 55 | 92 | | "Eric Clapton" | 54 | 93 | +----------------------------------+ 94 | 95 | // Christina Aguilera - b202beb7-99bd-47e7-8b72-195c8d72ebdd 96 | // Britney Spears - 45a663b5-b1cb-4a91-bff6-2bef7bbfdd76 97 | // Katy Pery - 122d63fc-8671-43e4-9752-34e846d62a9c 98 | // Lady Gaga - 650e7db6-b795-4eb5-a702-5ea2fc46c848 99 | // Madonna - 79239441-bfd5-4981-a70c-55c3f15c1287 100 | // Beyoncé - 859d0860-d480-4efd-970c-c05d5f1776b8 101 | 102 | START a=node:mb_exact(mbid="79239441-bfd5-4981-a70c-55c3f15c1287") 103 | MATCH (a)-[credit:CREDITED_AS]-(ac), 104 | (ac)-[:CREDITED_ON]-(rec:Recording)-[r2]-(w:Work), 105 | (tr:Track)-[:IS_RECORDING]-(rec) 106 | RETURN a.name, count(tr) AS nb_tracks, type(r2), w.name 107 | ORDER BY nb_tracks DESC 108 | LIMIT 10; 109 | 110 | 111 | // most prolific writers/composers for some pop artists, Madonna 112 | START a=node:mb_exact(mbid="45a663b5-b1cb-4a91-bff6-2bef7bbfdd76") 113 | MATCH (a)-[:CREDITED_AS]-(ac), 114 | (ac)-[:CREDITED_ON]-(rec:Recording), 115 | (rec)--(w:Work), 116 | (tr)-[:IS_RECORDING]-(rec) 117 | WITH a, w, count(tr) AS nb_tracks 118 | ORDER BY nb_tracks DESC 119 | MATCH p=w-[z:WRITER|COMPOSER]-(writer) 120 | WHERE writer <> a 121 | WITH writer, count(p) AS nb_part 122 | RETURN writer.name, nb_part 123 | ORDER BY nb_part DESC; 124 | 125 | // most prolific writers/composers alltogether 126 | // this could take long! 127 | MATCH (a:Artist)-[:CREDITED_AS]-(ac), 128 | (ac)-[:CREDITED_ON]-(rec:Recording), 129 | (rec)--(w:Work), 130 | (tr)-[:IS_RECORDING]-(rec) 131 | WITH a, w, count(tr) AS nb_tracks 132 | ORDER BY nb_tracks DESC 133 | MATCH p=w-[z:WRITER|COMPOSER]-(writer) 134 | WHERE writer <> a 135 | WITH writer, count(p) AS nb_part 136 | RETURN writer.name, nb_part 137 | ORDER BY nb_part DESC; 138 | -------------------------------------------------------------------------------- /musicbrainz2neo4j-export.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | import sys 5 | import optparse 6 | from sql2graph.export2 import SQL2GraphExporter 7 | from musicbrainz_schema import mbschema, mbentities, make_link_entity_list 8 | 9 | # ---------------------------------------------------------------------- 10 | 11 | DEFAULT_NODES_FILE='/tmp/musicbrainz__nodes__full.csv' 12 | DEFAULT_RELS_FILE='/tmp/musicbrainz__rels__full.csv' 13 | MULTIPLE_FILES = False 14 | 15 | option_parser = optparse.OptionParser() 16 | 17 | option_parser.add_option("--exclude", dest="excluded_entities", 18 | help="Entities to exclude", default=None) 19 | option_parser.add_option("--include", dest="included_entities", 20 | help="Entities to include", default=None) 21 | 22 | option_parser.add_option("--nodes", dest="nodes_filename", 23 | help="Nodes file", default=DEFAULT_NODES_FILE) 24 | option_parser.add_option("--relations", dest="relations_filename", 25 | help="Relationships file", default=DEFAULT_RELS_FILE) 26 | option_parser.add_option("--multiple", action="store_true", 27 | dest="multiple_files", 28 | help="whether to output multiple nodes files and relationships files", 29 | default=MULTIPLE_FILES) 30 | option_parser.add_option("--limit", type="int", dest="limit", default=None) 31 | (options, args) = option_parser.parse_args() 32 | 33 | # this is used to concatenate some columns as a comma-separated 34 | # list of Labels for batch-import (branch Neo4j 2.0) 35 | def concat_translate(*args): 36 | return """translate( 37 | array_to_string( 38 | %s, 39 | ','), 40 | ' _"/-', '')""" % ( 41 | "ARRAY[\n %s]" % ",\n ".join( 42 | ["initcap(%s)" % col 43 | for col in args]) 44 | ) 45 | 46 | class MusicBrainzExporter(SQL2GraphExporter): 47 | nodes_header_override = { 48 | "mbid": '"mbid:string:mb_exact"', 49 | "pk": '"pk:long:mb_exact"', 50 | "name": '"name:string:mb_fulltext"', 51 | "kind": None, 52 | "type": None, 53 | "format": None, 54 | "gender": None, 55 | "status": None, 56 | "packaging": None, 57 | ( 58 | "kind", 59 | "type", 60 | "format", 61 | "gender", 62 | "status", 63 | "packaging", 64 | ): (concat_translate, '"l:label"',), 65 | #"latitude": '"latitude:float"', 66 | #"longitude": '"longitude:float"', 67 | "latitude": None, 68 | "longitude": None, 69 | "code": None, 70 | "pk": None, 71 | "locale": None, 72 | } 73 | 74 | if all([options.included_entities, options.excluded_entities]): 75 | raise RuntimeError("You must choose between including or exluding entities") 76 | if options.excluded_entities is not None: 77 | options.excluded_entities = options.excluded_entities.split(',') 78 | entities = [e for e in mbentities if e not in options.excluded_entities] 79 | 80 | elif options.included_entities is not None: 81 | options.included_entities = options.included_entities.split(',') 82 | entities = options.included_entities 83 | entities += make_link_entity_list(entities) 84 | else: 85 | entities = mbentities 86 | exporter = MusicBrainzExporter(mbschema, entities, strict=False) 87 | 88 | exporter.set_nodes_filename(options.nodes_filename) 89 | exporter.set_rels_filename(options.relations_filename) 90 | 91 | if options.limit: 92 | exporter.set_entity_export_limit(options.limit) 93 | 94 | print r""" 95 | -- Change TABs to spaces in "name" column for "track" and "work" tables 96 | -- somehow these TABs can make batch-import CSV parsing choked 97 | UPDATE track SET name=translate(name, E'\t', ' ') WHERE name LIKE E'%\t%'; 98 | UPDATE work SET name=translate(name, E'\t', ' ') WHERE name LIKE E'%\t%'; 99 | """ 100 | print exporter.create_mapping_table_query(multiple=options.multiple_files) 101 | print exporter.create_nodes_query(multiple=options.multiple_files) 102 | print exporter.create_relationships_query(multiple=options.multiple_files) 103 | -------------------------------------------------------------------------------- /musicbrainz_schema.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from sql2graph.schema2 import SchemaHelper, Schema, Column, ForeignColumn, \ 5 | Property, IntegerProperty, Relation, Entity, Reference 6 | 7 | # ---------------------------------------------------------------------- 8 | def text_to_rel_type(s): 9 | return "translate(upper(%s), ' ', '_')" % s 10 | 11 | def make_link_entity(start_entity, end_entity): 12 | return Entity('l_%s_%s' % (start_entity, end_entity), 13 | fields=[], 14 | relations = [ 15 | Relation( 16 | Column('link', 17 | ForeignColumn('link', 'link_type', 18 | ForeignColumn('link_type', 'name')), 19 | function = text_to_rel_type), 20 | start=Reference(start_entity, Column('entity0')), 21 | end=Reference(end_entity, Column('entity1'),), 22 | properties=[]) 23 | ] 24 | ) 25 | 26 | def make_link_entity_list(entities): 27 | entity_set = set(entities) 28 | return ["l_%s_%s" % (e0, e1) 29 | for e0, e1 in linked_entities 30 | if set([e0, e1]) & entity_set] 31 | 32 | mbentities = [ 33 | 'area', 34 | 'area_alias', 35 | 'area_type', 36 | 37 | 'place', 38 | 'place_alias', 39 | 40 | 'artist', 41 | 'artist_alias', 42 | 'artist_type', 43 | 'artist_credit', 44 | 'artist_credit_name', 45 | 'gender', 46 | 47 | 'label', 48 | 'label_type', 49 | 50 | 'url', 51 | 52 | 'release_group', 53 | 'release_group_primary_type', 54 | 55 | 'release', 56 | 'release_country', 57 | 'release_packaging', 58 | 'release_status', 59 | 'release_label', 60 | 61 | 'recording', 62 | 'track', 63 | 64 | 'medium', 65 | 'medium_format', 66 | 67 | 'work', 68 | 'work_type', 69 | ] 70 | 71 | linked_entities = ( 72 | ('area', 'area'), 73 | ('area', 'artist'), 74 | ('area', 'label'), 75 | ('area', 'work'), 76 | ('area', 'url'), 77 | ('area', 'recording'), 78 | ('area', 'release'), 79 | ('area', 'release_group'), 80 | ('area', 'place'), 81 | 82 | ('artist', 'artist'), 83 | ('artist', 'label'), 84 | ('artist', 'recording'), 85 | ('artist', 'release'), 86 | ('artist', 'release_group'), 87 | ('artist', 'url'), 88 | ('artist', 'work'), 89 | ('artist', 'place'), 90 | 91 | ('label', 'label'), 92 | ('label', 'recording'), 93 | ('label', 'release'), 94 | ('label', 'release_group'), 95 | ('label', 'url'), 96 | ('label', 'work'), 97 | ('label', 'place'), 98 | 99 | ('place', 'place'), 100 | ('place', 'recording'), 101 | ('place', 'release'), 102 | ('place', 'release_group'), 103 | ('place', 'url'), 104 | ('place', 'work'), 105 | 106 | ('recording', 'recording'), 107 | ('recording', 'release'), 108 | ('recording', 'release_group'), 109 | ('recording', 'url'), 110 | ('recording', 'work'), 111 | 112 | ('release', 'release'), 113 | ('release', 'release_group'), 114 | ('release', 'url'), 115 | ('release', 'work'), 116 | 117 | ('release_group', 'release_group'), 118 | ('release_group', 'url'), 119 | ('release_group', 'work'), 120 | 121 | ('url', 'url'), 122 | ('url', 'work'), 123 | 124 | ('work', 'work'), 125 | ) 126 | 127 | mbentities.extend(make_link_entity_list(mbentities)) 128 | 129 | mbschema = Schema([ 130 | Entity('area_type', [ 131 | IntegerProperty('pk', Column('id')), 132 | Property('name', Column('name')), 133 | ], 134 | ), 135 | Entity('area', [ 136 | IntegerProperty('pk', Column('id')), 137 | Property('mbid', Column('gid')), 138 | Property('name', Column('name')), 139 | Property('type', Column('type', ForeignColumn('area_type', 'name', null=True))), 140 | ], 141 | ), 142 | Entity('area_alias', 143 | [ 144 | IntegerProperty('pk', Column('id')), 145 | Property('name', Column('name')), 146 | Property('type', Column('type', ForeignColumn('area_alias_type', 'name', null=True))), 147 | Property('locale', Column('locale')), 148 | ], 149 | [ 150 | Relation( 151 | 'HAS_ALIAS', 152 | start=Reference('area', Column('area')), 153 | end=Reference('area_alias', Column('id')), 154 | properties=[] 155 | ), 156 | ] 157 | ), 158 | Entity('place', [ 159 | IntegerProperty('pk', Column('id')), 160 | Property('mbid', Column('gid')), 161 | Property('name', Column('name')), 162 | Property('type', Column('type', ForeignColumn('place_type', 'name', null=True))), 163 | IntegerProperty('latitude', Column('coordinates[0]')), 164 | IntegerProperty('longitude', Column('coordinates[1]')), 165 | ], 166 | ), 167 | Entity('place_alias', 168 | [ 169 | IntegerProperty('pk', Column('id')), 170 | Property('name', Column('name')), 171 | Property('type', Column('type', ForeignColumn('place_alias_type', 'name', null=True))), 172 | Property('locale', Column('locale')), 173 | ], 174 | [ 175 | Relation( 176 | 'HAS_ALIAS', 177 | start=Reference('place', Column('place')), 178 | end=Reference('place_alias', Column('id')), 179 | properties=[] 180 | ), 181 | ] 182 | ), 183 | Entity('artist', 184 | [ 185 | IntegerProperty('pk', Column('id')), 186 | Property('mbid', Column('gid')), 187 | Property('disambiguation', Column('comment')), 188 | Property('name', Column('name')), 189 | Property('type', Column('type', ForeignColumn('artist_type', 'name', null=True))), 190 | 191 | #Property('sort_name', Column('sort_name', ForeignColumn('artist_name', 'name'))), 192 | #Property('country', Column('country', ForeignColumn('country', 'name', null=True))), 193 | #Property('country', Column('country', ForeignColumn('country', 'iso_code', null=True))), 194 | Property('gender', Column('gender', ForeignColumn('gender', 'name', null=True))), 195 | ], 196 | [ 197 | Relation( 198 | 'FROM_AREA', 199 | start=Reference('artist', Column('id')), 200 | end=Reference('area', Column('area')), 201 | properties=[] 202 | ), 203 | Relation( 204 | 'BEGAN_IN_AREA', 205 | start=Reference('artist', Column('id')), 206 | end=Reference('area', Column('begin_area')), 207 | properties=[] 208 | ), 209 | Relation( 210 | 'ENDED_IN_AREA', 211 | start=Reference('artist', Column('id')), 212 | end=Reference('area', Column('end_area')), 213 | properties=[] 214 | ), 215 | ], 216 | ), 217 | Entity('artist_alias', 218 | [ 219 | IntegerProperty('pk', Column('id')), 220 | Property('name', Column('name')), 221 | Property('type', Column('type', ForeignColumn('artist_alias_type', 'name', null=True))), 222 | ], 223 | [ 224 | Relation( 225 | 'HAS_ALIAS', 226 | start=Reference('artist', Column('artist')), 227 | end=Reference('artist_alias', Column('id')), 228 | properties=[] 229 | ), 230 | ] 231 | ), 232 | Entity('artist_type', 233 | [ 234 | IntegerProperty('pk', Column('id')), 235 | Property('name', Column('name')), 236 | ], 237 | ), 238 | Entity('artist_credit', 239 | fields = [ 240 | IntegerProperty('pk', Column('id')), 241 | Property('name', Column('name')), 242 | ] 243 | ), 244 | Entity('artist_credit_name', 245 | fields=[], 246 | relations = [ 247 | Relation( 248 | 'CREDITED_AS', 249 | start=Reference('artist', Column('artist')), 250 | end=Reference('artist_credit', Column('artist_credit')), 251 | properties=[ 252 | IntegerProperty('position', Column('position')), 253 | Property('join', Column('join_phrase')), 254 | ] 255 | ), 256 | ] 257 | ), 258 | Entity('gender', [ 259 | IntegerProperty('pk', Column('id')), 260 | Property('name', Column('name')), 261 | ]), 262 | Entity('label', 263 | [ 264 | IntegerProperty('pk', Column('id')), 265 | Property('mbid', Column('gid')), 266 | Property('disambiguation', Column('comment')), 267 | IntegerProperty('code', Column('label_code')), 268 | Property('name', Column('name')), 269 | Property('type', Column('type', ForeignColumn('label_type', 'name', null=True))), 270 | 271 | #Property('sort_name', Column('sort_name', ForeignColumn('label_name', 'name'))), 272 | #Property('country', Column('country', ForeignColumn('country', 'name', null=True))), 273 | #Property('country', Column('country', ForeignColumn('country', 'iso_code', null=True))), 274 | ], 275 | [ 276 | Relation( 277 | 'FROM_AREA', 278 | start=Reference('label', Column('id')), 279 | end=Reference('area', Column('area')), 280 | properties=[] 281 | ), 282 | ], 283 | ), 284 | Entity('label_type', 285 | [ 286 | IntegerProperty('pk', Column('id')), 287 | Property('name', Column('name')), 288 | ] 289 | ), 290 | Entity('work', 291 | [ 292 | IntegerProperty('pk', Column('id')), 293 | Property('mbid', Column('gid')), 294 | Property('disambiguation', Column('comment')), 295 | Property('name', Column('name')), 296 | Property('type', Column('type', ForeignColumn('work_type', 'name', null=True))), 297 | ], 298 | # no relationships 299 | [] 300 | ), 301 | Entity('work_type', 302 | [ 303 | IntegerProperty('pk', Column('id')), 304 | Property('name', Column('name')), 305 | ] 306 | ), 307 | Entity('release_group', 308 | [ 309 | IntegerProperty('pk', Column('id')), 310 | Property('mbid', Column('gid')), 311 | Property('disambiguation', Column('comment')), 312 | Property('name', Column('name')), 313 | Property('type', Column('type', ForeignColumn('release_group_primary_type', 'name', null=True))), 314 | #Property('artist', Column('artist_credit', ForeignColumn('artist_credit', 'name', ForeignColumn('artist_name', 'name')))), 315 | ], 316 | [ 317 | Relation( 318 | 'CREDITED_ON', 319 | start=Reference('artist_credit', Column('artist_credit')), 320 | end=Reference('release_group', Column('id')), 321 | properties=[] 322 | ), 323 | ] 324 | ), 325 | Entity('release_group_primary_type', 326 | [ 327 | IntegerProperty('pk', Column('id')), 328 | Property('name', Column('name')), 329 | ] 330 | ), 331 | Entity('release', 332 | [ 333 | IntegerProperty('pk', Column('id')), 334 | Property('mbid', Column('gid')), 335 | Property('disambiguation', Column('comment')), 336 | Property('name', Column('name')), 337 | Property('status', Column('status', ForeignColumn('release_status', 'name', null=True))), 338 | Property('packaging', Column('packaging', ForeignColumn('release_packaging', 'name', null=True))), 339 | 340 | #Property('barcode', Column('barcode')), 341 | #Property('type', Column('release_group', ForeignColumn('release_group', 'type', ForeignColumn('release_group_primary_type', 'name', null=True)))), 342 | #Property('artist', Column('artist_credit', ForeignColumn('artist_credit', 'name', ForeignColumn('artist_name', 'name')))), 343 | #Property('country', Column('country', ForeignColumn('country', 'name', null=True))), 344 | #Property('country', Column('country', ForeignColumn('country', 'iso_code', null=True))), 345 | #Property('alias', Column('release_group', ForeignColumn('release_group', 'name', ForeignColumn('release_name', 'name')))), 346 | ], 347 | [ 348 | Relation( 349 | 'CREDITED_ON', 350 | start=Reference('artist_credit', Column('artist_credit')), 351 | end=Reference('release', Column('id')), 352 | properties=[] 353 | ), 354 | Relation( 355 | 'PART_OF', 356 | start=Reference('release', Column('id')), 357 | end=Reference('release_group', Column('release_group')), 358 | properties=[] 359 | ), 360 | ] 361 | ), 362 | Entity('release_status', 363 | [ 364 | IntegerProperty('pk', Column('id')), 365 | Property('name', Column('name')), 366 | ] 367 | ), 368 | Entity('release_label', 369 | [], 370 | [ 371 | Relation( 372 | 'RELEASED_ON', 373 | start=Reference('release', Column('release')), 374 | end=Reference('label', Column('label')), 375 | properties=[ 376 | Property('catalog_number', Column('catalog_number')), 377 | ] 378 | ), 379 | ] 380 | ), 381 | Entity('release_packaging', 382 | [ 383 | IntegerProperty('pk', Column('id')), 384 | Property('name', Column('name')), 385 | ] 386 | ), 387 | Entity('release_country', 388 | # do not create nodes 389 | [], 390 | [ 391 | Relation( 392 | 'RELEASED_IN', 393 | start=Reference('release', Column('release')), 394 | end=Reference('area', 395 | Column('country', 396 | ForeignColumn('country_area', 'area'))), 397 | properties=[ 398 | IntegerProperty('year', Column('date_year')), 399 | IntegerProperty('month', Column('date_month')), 400 | IntegerProperty('day', Column('date_day')), 401 | ] 402 | ), 403 | ] 404 | ), 405 | Entity('medium', 406 | [ 407 | IntegerProperty('pk', Column('id')), 408 | Property('name', Column('name')), 409 | Property('format', Column('format', ForeignColumn('medium_format', 'name', null=True))), 410 | ], 411 | [ 412 | Relation( 413 | 'RELEASED_ON_MEDIUM', 414 | start=Reference('release', Column('release')), 415 | end=Reference('medium', Column('id')), 416 | properties=[] 417 | ), 418 | ] 419 | ), 420 | Entity('medium_format', 421 | [ 422 | IntegerProperty('pk', Column('id')), 423 | Property('name', Column('name')), 424 | ], 425 | [ 426 | Relation( 427 | 'PARENT_FORMAT', 428 | start=Reference('medium_format', Column('id')), 429 | end=Reference('medium_format', Column('parent')), 430 | properties=[] 431 | ), 432 | ] 433 | ), 434 | Entity('recording', 435 | [ 436 | IntegerProperty('pk', Column('id')), 437 | Property('mbid', Column('gid')), 438 | Property('disambiguation', Column('comment')), 439 | Property('name', Column('name')), 440 | 441 | #Property('artist', Column('artist_credit', ForeignColumn('artist_credit', 'name', ForeignColumn('artist_name', 'name')))), 442 | ], 443 | [ 444 | Relation( 445 | 'CREDITED_ON', 446 | start=Reference('artist_credit', Column('artist_credit')), 447 | end=Reference('recording', Column('id')), 448 | properties=[] 449 | ), 450 | ] 451 | ), 452 | Entity('track', 453 | [ 454 | IntegerProperty('pk', Column('id')), 455 | Property('mbid', Column('gid')), 456 | Property('name', Column('name')), 457 | IntegerProperty('position', Column('position')), 458 | IntegerProperty('length', Column('length')), 459 | ], 460 | [ 461 | Relation( 462 | 'IS_RECORDING', 463 | start=Reference('track', Column('id')), 464 | end=Reference('recording', Column('recording')), 465 | properties=[] 466 | ), 467 | Relation( 468 | 'APPEARS_ON', 469 | start=Reference('track', Column('id')), 470 | end=Reference('medium', Column('medium')), 471 | properties=[] 472 | ), 473 | Relation( 474 | 'CREDITED_ON', 475 | start=Reference('artist_credit', Column('artist_credit')), 476 | end=Reference('track', Column('id')), 477 | properties=[] 478 | ), 479 | ], 480 | ), 481 | Entity('url', 482 | [ 483 | IntegerProperty('pk', Column('id')), 484 | Property('mbid', Column('gid')), 485 | Property('name', Column('url')), 486 | ], 487 | ), 488 | ] 489 | + 490 | [make_link_entity(e0, e1) for (e0, e1) in linked_entities] 491 | ) 492 | 493 | -------------------------------------------------------------------------------- /run_batchimport.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # Copyright 2013 Paul Tremberth, Newlynn Labs 4 | # See LICENSE for details. 5 | 6 | import optparse 7 | import ConfigParser 8 | import os 9 | import traceback 10 | 11 | # ---------------------------------------------------------------------- 12 | 13 | def main(): 14 | 15 | option_parser = optparse.OptionParser() 16 | option_parser.add_option("-c", "--config", dest="configfile", help="configuration file", default=None) 17 | (options, args) = option_parser.parse_args() 18 | 19 | config_parser = ConfigParser.RawConfigParser() 20 | if options.configfile: 21 | config_parser.read(options.configfile) 22 | else: 23 | print "you must provide a config file" 24 | sys.exit(0) 25 | 26 | if config_parser.has_section('INDEX_FILES'): 27 | index_files = dict(( 28 | (index_name, index_file) 29 | for index_name, index_file in config_parser.items('INDEX_FILES') 30 | )) 31 | else: 32 | print "no INDEX_FILES section" 33 | index_files = None 34 | 35 | args = ['java'] 36 | args.append('-server') 37 | 38 | args.append('-Xmx1G') 39 | 40 | if config_parser.has_option('BATCHIMPORT_SETTINGS', 'jar_location'): 41 | args.append('-jar') 42 | args.append(config_parser.get('BATCHIMPORT_SETTINGS', 'jar_location')) 43 | 44 | if config_parser.has_option('BATCHIMPORT_SETTINGS', 'dbfile_location'): 45 | args.append(config_parser.get('BATCHIMPORT_SETTINGS', 'dbfile_location')) 46 | 47 | if config_parser.has_option('BATCHIMPORT_SETTINGS', 'nodes_file'): 48 | args.append(config_parser.get('BATCHIMPORT_SETTINGS', 'nodes_file')) 49 | if config_parser.has_option('BATCHIMPORT_SETTINGS', 'relations_file'): 50 | args.append(config_parser.get('BATCHIMPORT_SETTINGS', 'relations_file')) 51 | if config_parser.has_section('INDEX_FILES'): 52 | for index_name, index_file in config_parser.items('INDEX_FILES'): 53 | args.append('node_index') 54 | args.append(index_name) 55 | args.append('fulltext') 56 | args.append(index_file) 57 | 58 | # check what's being executed 59 | print " ".join(args) 60 | 61 | os.execvp("java", args) 62 | 63 | 64 | if __name__ == '__main__': 65 | main() 66 | 67 | -------------------------------------------------------------------------------- /sql2graph.conf.default: -------------------------------------------------------------------------------- 1 | # Example configuration file for sql2graph 2 | 3 | # define all entities that you want to import 4 | # and tjeir corresponding CSV dump file 5 | # entity_name=dump_file 6 | [TABLE_DUMPS] 7 | car=/home/user/sql2graph/examples/cars/dumps/cars.csv 8 | manufacturer=/home/user/sql2graph/examples/cars/dumps/manufacturers.csv 9 | 10 | # define in what order the dump file/entities should be processed 11 | # if an entity2 has relations to another entity1, put entity2 after entity1 12 | [IMPORT_ORDER] 13 | order=manufacturer, 14 | car 15 | 16 | # Optional index files (for Neo4J batch-import) 17 | # index_name=index_file 18 | # (index_name is the index defined in the schema's entity fields) 19 | [INDEX_FILES] 20 | cars=cars_index.csv 21 | manufacturers=manufacturers_index.csv 22 | 23 | # Optional parameters for running Neo4J batch-import 24 | # See https://github.com/jexp/batch-import 25 | [BATCHIMPORT_SETTINGS] 26 | jar_location=/home/user/tmp/neo4j-batch-import/target/batch-import-jar-with-dependencies.jar 27 | dbfile_location=/home/user/downloads/neo4j-community-x.x.x/data/graph.db 28 | nodes_file=cars_nodes.csv 29 | relations_file=cars_rels.csv 30 | -------------------------------------------------------------------------------- /sql2graph/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/redapple/sql2graph/b2792618f0f5345200a444f4d1b119c9605877cf/sql2graph/__init__.py -------------------------------------------------------------------------------- /sql2graph/export.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2013 Paul Tremberth, Newlynn Labs 4 | # See LICENSE for details. 5 | 6 | import graph 7 | import schema 8 | import bz2 9 | import csv 10 | import traceback 11 | import sys 12 | 13 | # ---------------------------------------------------------------------- 14 | MERGED = '***MERGED***' 15 | 16 | class CsvBatchWriter(object): 17 | 18 | CSV_BATCH_SIZE = 100000 19 | DEBUG = False 20 | 21 | def __init__(self, filename, batch_size=CSV_BATCH_SIZE): 22 | self.filename = filename 23 | self.fp = None 24 | self.csvwriter = None 25 | self.batch_size = batch_size 26 | self.output = [] 27 | self.so_far = 0 28 | 29 | def initialize(self, header_fields): 30 | self.fp = open(self.filename, 'wb') 31 | self.csvwriter = csv.DictWriter(self.fp, header_fields, dialect="excel-tab") 32 | self.csvwriter.writeheader() 33 | 34 | def append(self, elem): 35 | self.output.append(elem) 36 | self.test_flush() 37 | 38 | def extend(self, elems): 39 | self.output.extend(elems) 40 | self.test_flush() 41 | 42 | def test_flush(self): 43 | if len(self.output) >= self.batch_size: 44 | self.flush() 45 | 46 | def flush(self, delete_elements=False): 47 | if self.output: 48 | 49 | self.csvwriter.writerows(self.output) 50 | self.so_far += len(self.output) 51 | if self.DEBUG: 52 | print " == %d rows written to %s (total=%d) ==" % ( 53 | len(self.output), self.filename, self.so_far) 54 | 55 | if delete_elements: 56 | for n in self.output: 57 | del n 58 | self.output = [] 59 | 60 | def close(self): 61 | if self.fp: 62 | self.fp.close() 63 | 64 | 65 | class GraphExporter(object): 66 | 67 | CSV_BATCH_SIZE = 100000 68 | SUPPORTED_OUTPUT_FORMATS = ['neo4j'] 69 | DEBUG = True 70 | 71 | def __init__(self, schema, format, dialect=csv.excel_tab, pretend=False): 72 | 73 | # only supported format for now is Neo4j batch-import 74 | # see: https://github.com/jexp/batch-import 75 | if not self.supported_format(format): 76 | raise ValueError 77 | self.format = format 78 | self.dialect = dialect 79 | 80 | self.node_list = graph.NodeList() 81 | self.relation_list = graph.RelationList() 82 | self.nodes_csv_fields = None # used as CSV header column names 83 | self.rels_csv_fields = None # used as CSV header column names 84 | 85 | self.schema = dict((entity.name, entity) for entity in schema) 86 | 87 | self.dumpfiles = {} 88 | self.dumpfile_fields = {} 89 | self.entity_order = [] 90 | 91 | self.output_nodes_files = {} 92 | self.output_relations_files = {} 93 | self.output_indexes_files = {} 94 | 95 | self.pretend = pretend 96 | 97 | def supported_format(self, format): 98 | return format.lower() in [f.lower() for f in self.SUPPORTED_OUTPUT_FORMATS] 99 | 100 | def feed_dumpfile(self, entity, filename, fields=None): 101 | self.dumpfiles[entity] = filename 102 | if fields: 103 | self.dumpfile_fields[entity] = fields 104 | self.entity_order.append(entity) 105 | 106 | def set_output_nodes_file(self, entity, filename): 107 | self.output_nodes_files[entity] = filename 108 | 109 | def set_output_relations_file(self, entity, filename): 110 | self.output_relations_files[entity] = filename 111 | 112 | def set_output_indexes_file(self, entity, filename): 113 | self.output_indexes_files[entity] = filename 114 | 115 | def run(self): 116 | self.read_schema() 117 | #self.step_set_CSV_header_fields() 118 | self.export() 119 | 120 | def read_schema(self): 121 | self.read_nodes_csv_fields() 122 | self.read_rels_csv_fields() 123 | 124 | def read_nodes_csv_fields(self): 125 | # all Nodes SHOULD have their entity as a property 126 | fields_begin = ['kind'] 127 | 128 | node_properties = [] 129 | 130 | for entity_name, entity in self.schema.iteritems(): 131 | if entity_name not in self.entity_order: 132 | continue 133 | if entity.fields: 134 | for field in entity.fields: 135 | # the following could be used to add a column type to CSV header fields 136 | #node_properties.append( 137 | #"%s%s" % ( 138 | #field.name, 139 | #":%s" % field.db_field_type 140 | #if field.db_field_type else '')) 141 | node_properties.append(field.name) 142 | self.nodes_csv_fields = fields_begin + list(set(node_properties) - set(fields_begin)) 143 | 144 | def read_rels_csv_fields(self): 145 | fields_begin = ['start', 'end', 'rel_type'] 146 | rels_properties = [] 147 | for entity_name, entity in self.schema.iteritems(): 148 | if entity_name not in self.entity_order: 149 | continue 150 | if entity.relations: 151 | for rel in entity.relations: 152 | rels_properties.extend([prop.name for prop in rel.properties]) 153 | self.rels_csv_fields = fields_begin + list( 154 | set(rels_properties) - set(fields_begin)) 155 | 156 | def export(self): 157 | """ 158 | Read dump files and write nodes and relations at the same time 159 | """ 160 | # write all nodes in ONE file and all relations in ONE file 161 | # (works ONLY for Neo4j batch-import format) 162 | 163 | onodes_filename = self.output_nodes_files.get(MERGED) 164 | orels_filename = self.output_relations_files.get(MERGED) 165 | 166 | nodes_csv_writer, rels_csv_writer = None, None 167 | 168 | if onodes_filename: 169 | if not self.pretend: 170 | nodes_writer = CsvBatchWriter(onodes_filename, self.CSV_BATCH_SIZE) 171 | nodes_writer.initialize(self.nodes_csv_fields) 172 | 173 | if orels_filename: 174 | if not self.pretend: 175 | rels_writer = CsvBatchWriter(orels_filename, self.CSV_BATCH_SIZE) 176 | rels_writer.initialize(self.rels_csv_fields) 177 | 178 | index_writers = {} 179 | 180 | # loop on dump files in order 181 | if not self.entity_order: 182 | self.entity_order = list(self.dumpfiles.iterkeys()) 183 | 184 | for entity_name in self.entity_order: 185 | if not self.dumpfiles.get(entity_name) or not self.schema.get(entity_name): 186 | if self.DEBUG: 187 | print "no dump file or not schema configured for entity", entity_name 188 | continue 189 | 190 | if self.DEBUG: 191 | print "--- processing file", self.dumpfiles[entity_name] 192 | entity = self.schema.get(entity_name) 193 | with self.open_dumpfile(self.dumpfiles[entity_name]) as dumpfile: 194 | 195 | self.create_index_writers_if_needed(entity, index_writers) 196 | 197 | self.export_tabledump(entity, dumpfile, 198 | nodes_writer, rels_writer, index_writers) 199 | 200 | # pending relations if any 201 | if not self.pretend: 202 | self.export_rels_csv(writer=rels_csv_writer) 203 | 204 | # close all CSV writers 205 | if nodes_csv_writer: 206 | nodes_writer.close() 207 | 208 | if rels_csv_writer: 209 | rels_writer.close() 210 | 211 | for w in index_writers.itervalues(): 212 | w.close() 213 | 214 | 215 | def create_index_writers_if_needed(self, entity, index_writers): 216 | indexes = entity.get_indexed_fields() 217 | if indexes: 218 | for index_name, indexed_fields in indexes.iteritems(): 219 | if index_name not in index_writers: 220 | 221 | # check if output file has been configured for this index 222 | index_filename = self.output_indexes_files.get(index_name) 223 | if not index_filename: 224 | print "no output file for index %s" % index_name 225 | continue 226 | 227 | # add a "node id" field 228 | header_fields = ['node_id'] + [field.name for field in indexed_fields] 229 | 230 | index_writer = CsvBatchWriter(index_filename, self.CSV_BATCH_SIZE) 231 | index_writer.initialize(header_fields) 232 | 233 | index_writers[index_name] = index_writer 234 | 235 | def export_tabledump(self, entity, fp, 236 | nodes_writer, rels_writer, index_writers): 237 | 238 | stats = {'nodes': 0, 'rels': 0, 'indexed': 0} 239 | if not entity: 240 | print "know nothing about %s" % entity.name 241 | return 242 | 243 | PRINT_FREQUENCY = 25000 244 | 245 | # should we write something to one or more indexes? 246 | if index_writers: 247 | indexes = entity.get_indexed_fields() 248 | else: 249 | indexes = None 250 | 251 | node_id = 0 252 | 253 | # read CSV file line by line 254 | #print self.dialect 255 | csvreader = csv.DictReader(fp, dialect=self.dialect) 256 | for cnt, record in enumerate(csvreader, start=1): 257 | 258 | node = None 259 | 260 | # create a new node 261 | primary_key_field = entity.get_primary_key_field() 262 | if primary_key_field: 263 | 264 | node = graph.Node(record, entity) 265 | node_id = self.node_list.add_node(node) 266 | if not node_id: 267 | # FIXME: find something better 268 | raise LookupError 269 | 270 | # add it to the write queue 271 | nodes_writer.append(node.get_dict(self.nodes_csv_fields)) 272 | 273 | stats['nodes'] += 1 274 | 275 | if indexes: 276 | for index_name, indexed_fields in indexes.iteritems(): 277 | index_writers.get(index_name).append( 278 | node.get_dict( 279 | ['node_id'] + [field.name for field in indexed_fields])) 280 | stats['indexed'] += 1 281 | 282 | # add relations if needed 283 | new_rels = [rel.get_dict(self.rels_csv_fields) 284 | for rel in self.iter_relations(entity, record)] 285 | rels_writer.extend(new_rels) 286 | stats['rels'] += len(new_rels) 287 | 288 | # hint to gc; there's surely something prettier 289 | if node: 290 | del node 291 | del record 292 | 293 | if self.DEBUG: 294 | if not (cnt % PRINT_FREQUENCY): 295 | print "\r line %8d - nodes(%8d), rels(%8d), idx(%8d) -- last node ID %d" % ( 296 | cnt, stats['nodes'], stats['rels'], stats['indexed'], node_id), 297 | sys.stdout.flush() 298 | 299 | if self.DEBUG: 300 | print 301 | print " --> Finished with %8d of entity %s" % (cnt, entity.name) 302 | print "nodes(%8d), rels(%8d), idx(%8d) -- last node ID %d" % ( 303 | stats['nodes'], stats['rels'], stats['indexed'], node_id) 304 | 305 | # write everything that's pending 306 | writers = [nodes_writer, rels_writer] + list(index_writers.itervalues()) 307 | for w in writers: 308 | if w: 309 | w.flush() 310 | 311 | def iter_relations(self, entity, record): 312 | relation_definitions = entity.relations 313 | if not relation_definitions: 314 | return 315 | 316 | for reldef in relation_definitions: 317 | try: 318 | origin_column = record.get(reldef.origin.db_column) 319 | target_column = record.get(reldef.target.db_column) 320 | if not origin_column and reldef.origin.null: 321 | continue 322 | if not target_column and reldef.target.null: 323 | continue 324 | 325 | origin_node_pos, target_node_pos = None, None 326 | origin_node_pos = self.node_list.lookup_node_pos( 327 | reldef.origin.entity, 328 | int(origin_column)) 329 | target_node_pos = self.node_list.lookup_node_pos( 330 | reldef.target.entity, 331 | int(target_column)) 332 | 333 | if not origin_node_pos or not target_node_pos: 334 | continue 335 | # else: 336 | # FIXME: store an unresolved relation 337 | 338 | properties = {} 339 | for prop in reldef.properties: 340 | if isinstance(prop.value, schema.Column): 341 | properties[prop.name] = record.get(prop.value.name) 342 | else: 343 | properties[prop.name] = prop.value 344 | 345 | yield graph.Relation( 346 | origin_node_pos, 347 | target_node_pos, 348 | properties) 349 | except Exception, e: 350 | traceback.print_exc() 351 | raise e 352 | 353 | def resolve_relation(self, r): 354 | if not r.end: 355 | target_node_pos = self.node_list.lookup_node_pos( 356 | r.start_target_entity, r.start_fk) 357 | if target_node_pos: 358 | r.end = target_node_pos 359 | 360 | 361 | def export_rels_csv(self, fp=None, writer=None): 362 | BATCH_SIZE = 10000 363 | if not writer and fp: 364 | writer = csv.DictWriter(fp, self.rels_csv_fields, dialect="excel-tab") 365 | writer.writeheader() 366 | size = len(self.relation_list.relation_list) 367 | print "%d relations to write" % size 368 | output_relations = [] 369 | for cnt, rel in enumerate(self.relation_list.iter_rels(), start=1): 370 | print "\r %8d/%8d (%.1f%%)" % (cnt, size, 100*cnt/size), 371 | if not rel.end: 372 | self.resolve_relation(rel) 373 | output_relations.append(rel.get_dict()) 374 | del rel 375 | if not (cnt % BATCH_SIZE): 376 | self._flush_rows(writer, output_relations) 377 | output_relations = [] 378 | if output_relations: 379 | self._flush_rows(writer, output_relations) 380 | print 381 | 382 | @classmethod 383 | def open_dumpfile(cls, filename): 384 | if filename.endswith(('bz2',)): 385 | return bz2.BZ2File(filename, 'rb') 386 | else: 387 | return open(filename, 'rb') 388 | -------------------------------------------------------------------------------- /sql2graph/export2.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from sql2graph.schema2 import generate_iter_query 5 | from sql2graph.schema2 import SchemaHelper, Schema, Column, ForeignColumn, \ 6 | Property, IntegerProperty, Relation, Entity, Reference 7 | from sql2graph.schema2 import indent, generate_union_query 8 | 9 | class SQL2GraphExporter(object): 10 | 11 | # to change the TSV header line 12 | nodes_header_override = None 13 | rels_header_override = None # not used currently 14 | output_encoding = 'UTF8' 15 | 16 | def __init__(self, schema, entities, strict=True): 17 | self.cfg = None 18 | self.db = None 19 | self.strict = strict 20 | 21 | self.schema = SchemaHelper(schema, entities, strict=self.strict) 22 | self.entity_limit = None 23 | self.nodes_filename = None 24 | self.relations_filename = None 25 | 26 | self.all_properties = self.schema.fetch_all_fields(self.cfg, self.db) 27 | self.all_relations_properties = self.schema.fetch_all_relations_properties(self.cfg, self.db) 28 | 29 | self.check_nodes_header_override() 30 | 31 | def check_nodes_header_override(self): 32 | 33 | all_column_names = [cname for cname, ctype in self.all_properties] + ["kind"] 34 | for incols, outcols in self.nodes_header_override.items(): 35 | # simple column renaming 36 | if isinstance(incols, (str,)): 37 | if outcols is not None: 38 | if incols not in all_column_names: 39 | del self.nodes_header_override[incols] 40 | # merging columns 41 | elif isinstance(incols, (tuple,)): 42 | valid_columns = [c for c in incols if c in all_column_names] 43 | self.nodes_header_override[tuple(valid_columns)] = self.nodes_header_override.pop(incols) 44 | 45 | def set_nodes_filename(self, filename): 46 | self.nodes_filename = filename 47 | 48 | def set_rels_filename(self, filename): 49 | self.relations_filename = filename 50 | 51 | def set_entity_export_limit(self, limit): 52 | if limit: 53 | self.entity_limit = limit 54 | 55 | @classmethod 56 | def generate_tsvfile_output_query(cls, query, output_filename, modify_headers={}): 57 | 58 | if modify_headers: 59 | select_lines = [] 60 | 61 | for incols, outcols in modify_headers.items(): 62 | # simple column renaming 63 | if isinstance(incols, (str,)): 64 | if outcols is not None: 65 | select_lines.append("wrapped.%s AS %s" % (incols, outcols)) 66 | # merging columns 67 | elif isinstance(incols, (tuple,)): 68 | infunc, outname = outcols 69 | k = infunc(*["wrapped.%s::text" % c for c in incols]) 70 | select_lines.append("%s AS %s" % (k, outname)) 71 | 72 | select_lines = ",\n".join(select_lines) 73 | 74 | query= """ 75 | SELECT 76 | %(fields)s 77 | FROM ( 78 | %(query)s 79 | ) 80 | AS wrapped 81 | """ % dict(query=indent(query, ' '), fields=indent(select_lines, ' ')) 82 | 83 | return """ 84 | COPY( 85 | %(query)s 86 | ) 87 | TO '%(filename)s' CSV HEADER 88 | DELIMITER E'\\t' 89 | ENCODING '%(encoding)s'; 90 | """ % dict(query=indent(query, ' '), filename=output_filename, 91 | encoding=cls.output_encoding) 92 | 93 | 94 | # --- create temporary mapping table 95 | def create_mapping_table_query(self, multiple=False): 96 | print """ 97 | -- Create the mapping table 98 | -- between (entity, pk) tuples and incrementing node IDs 99 | """ 100 | node_queries = [] 101 | for columns, joins in self.schema.fetch_all(self.cfg, self.db, 102 | [(n,t) for n, t in self.all_properties if n in ('kind', 'pk')]): 103 | if columns and joins: 104 | node_queries.append(generate_iter_query(columns, joins, 105 | limit=self.entity_limit, order_by='pk')) 106 | 107 | if multiple: 108 | 109 | query = """ 110 | CREATE TEMPORARY TABLE entity_mapping 111 | ( 112 | node_id SERIAL, 113 | entity TEXT, 114 | pk BIGINT 115 | ); 116 | """ 117 | 118 | insert_entity_query = """ 119 | INSERT INTO entity_mapping 120 | (entity, pk) 121 | %s 122 | ORDER BY pk;\n""" 123 | for q in node_queries: 124 | query += insert_entity_query % indent(q, ' ') 125 | 126 | query += """-- create index to speedup lookups 127 | CREATE INDEX ON entity_mapping (entity, pk); 128 | 129 | ANALYZE entity_mapping; 130 | """ 131 | return query 132 | 133 | else: 134 | 135 | mapping_query = """ 136 | SELECT 137 | kind AS entity, 138 | pk, 139 | row_number() OVER (ORDER BY kind, pk) as node_id 140 | FROM 141 | ( 142 | %s 143 | ) 144 | AS entity_union \n""" % indent(generate_union_query(node_queries), ' ') 145 | 146 | temp_mapping_table = """ 147 | DROP TABLE IF EXISTS entity_mapping; 148 | 149 | CREATE TEMPORARY TABLE entity_mapping AS 150 | ( 151 | %s 152 | ); 153 | 154 | -- create index to speedup lookups 155 | CREATE INDEX ON entity_mapping (entity, pk); 156 | 157 | ANALYZE entity_mapping; 158 | 159 | """ % indent(mapping_query, ' ') 160 | 161 | return temp_mapping_table 162 | 163 | 164 | # --- save the full nodes tables to file 165 | def create_nodes_query(self, multiple=False): 166 | 167 | node_queries = [] 168 | for columns, joins in self.schema.fetch_all(self.cfg, self.db, 169 | self.all_properties if not multiple else []): 170 | if columns and joins: 171 | node_queries.append(generate_iter_query(columns, joins, 172 | limit=self.entity_limit, order_by='pk')) 173 | 174 | #node_queries = ["""\n%s\nORDER BY pk\n""" % q for q in node_queries] 175 | headers = None 176 | 177 | if self.nodes_header_override: 178 | # start with 1-to-1 name map 179 | headers = dict([(name, name) for (name, maptype) in self.all_properties]) 180 | 181 | # fix some headers 182 | headers.update(self.nodes_header_override) 183 | 184 | if multiple: 185 | qs = [] 186 | for i, q in enumerate(node_queries, start=1): 187 | qs.append( 188 | self.generate_tsvfile_output_query( 189 | """\n%s\nORDER BY pk\n""" % q, 190 | self.nodes_filename.replace('.csv', '.%04d.csv' % i), 191 | headers) 192 | ) 193 | return "\n".join(qs) 194 | else: 195 | #ordered_union_query = """\n%s\nORDER BY kind, pk\n""" % generate_union_query(node_queries) 196 | ordered_union_query = """\n%s\nORDER BY kind, pk\n""" % generate_union_query(node_queries) 197 | 198 | return self.generate_tsvfile_output_query( 199 | ordered_union_query, 200 | self.nodes_filename, 201 | headers) 202 | 203 | 204 | def create_relationships_query(self, multiple=False): 205 | 206 | rels_queries = [] 207 | 208 | if multiple: 209 | for relations in self.schema.fetch_all_relations(self.cfg, self.db): 210 | if not relations: 211 | continue 212 | for columns, joins in relations: 213 | rels_queries.append(generate_iter_query(columns, joins)) 214 | qs = [] 215 | for i, q in enumerate(rels_queries, start=1): 216 | qs.append( 217 | self.generate_tsvfile_output_query(q, 218 | self.relations_filename.replace('.csv', '.%04d.csv' % i))) 219 | return "\n".join(qs) 220 | else: 221 | for relations in self.schema.fetch_all_relations(self.cfg, self.db, self.all_relations_properties): 222 | if not relations: 223 | continue 224 | for columns, joins in relations: 225 | rels_queries.append(generate_iter_query(columns, joins)) 226 | return self.generate_tsvfile_output_query( 227 | generate_union_query(rels_queries), 228 | self.relations_filename) 229 | 230 | -------------------------------------------------------------------------------- /sql2graph/graph.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2013 Paul Tremberth, Newlynn Labs 4 | # See LICENSE for details. 5 | 6 | import collections 7 | import schema 8 | 9 | # ---------------------------------------------------------------------- 10 | 11 | class DictLookup(object): 12 | def __init__(self): 13 | self.container = {} 14 | 15 | def append(self, entity, entity_pk, node_position): 16 | if not self.container.get(entity): 17 | self.container[entity] = {} 18 | self.container[entity][entity_pk] = node_position 19 | 20 | def lookup(self, entity, entity_pk): 21 | if self.container.get(entity): 22 | if self.container[entity].get(entity_pk): 23 | return self.container[entity][entity_pk] 24 | 25 | def start_entity(self, entity): 26 | pass 27 | 28 | def stop_entity(self, entity): 29 | pass 30 | 31 | 32 | class Node(object): 33 | def __init__(self, record, entity): 34 | if not isinstance(record, dict): 35 | raise TypeError 36 | self.record = record 37 | 38 | if not isinstance(entity, schema.Entity) or not entity.get_primary_key_field(): 39 | raise TypeError 40 | self.entity = entity 41 | 42 | # convert record to properties per entity schema definition 43 | self.properties = {} 44 | for field in entity.fields: 45 | if field.column and isinstance(field.column, schema.Column): 46 | self.properties[field.name] = record.get(field.column.name) 47 | else: 48 | self.properties[field.name] = field.value 49 | 50 | #self.pk_column = pk_column 51 | #self.entity_name = entity.name 52 | #print entity.get_primary_key_field().column.name 53 | self.entity_pk = int(record.get(entity.get_primary_key_field().column.name)) 54 | 55 | self.properties['kind'] = self.entity.name 56 | self.properties['node_id'] = None 57 | 58 | def get_dict(self, fields): 59 | return dict((e, self.properties.get(e, '')) for e in fields) 60 | 61 | def save_node_id(self, node_id): 62 | self.properties['node_id'] = node_id 63 | 64 | def get_node_id(self): 65 | return self.properties['node_id'] 66 | 67 | def get_entity_pk(self): 68 | return self.entity_pk 69 | 70 | def __repr__(self): 71 | return "entity [%s/pk=%s]: %s" % (self.entity.name, self.entity_pk, str(self.properties)) 72 | 73 | 74 | class Relation(object): 75 | def __init__(self, start, end, properties): 76 | self.start = start 77 | self.end = end 78 | self.properties = properties 79 | 80 | # used when we could not resolve on the fly 81 | self.start_fk = None 82 | self.start_target_entity = None 83 | 84 | def get_dict(self, fields): 85 | 86 | if self.end: 87 | values = dict(( 88 | ('start', self.start), 89 | ('end', self.end), 90 | )) 91 | values.update(self.properties) 92 | return dict((e, values.get(e, '')) for e in fields) 93 | else: 94 | return None 95 | 96 | def set_deferred_resolution(self, fk, target_entity): 97 | self.start_fk = fk 98 | self.start_target_entity = target_entity 99 | 100 | def __repr__(self): 101 | return "(%d)-[%s]->(%d)" % ( 102 | self.start, self.property, self.end or 0,) 103 | 104 | 105 | class NodeList(object): 106 | 107 | def __init__(self): 108 | self.node_list_size = 0 109 | 110 | self.reverse_lookup = DictLookup() 111 | 112 | self.entity_fields = set() 113 | 114 | self.last_lookup = None 115 | self.last_lookup_result = None 116 | 117 | def get_all_fields(self): 118 | return self.entity_fields 119 | 120 | def update_entity_fields(self, record): 121 | self.entity_fields.update(record.iterkeys()) 122 | 123 | def add_node(self, node): 124 | """ 125 | Add a node to the node list 126 | Returns the node position in the list 127 | """ 128 | # self.node_list.append(node) 129 | # do not actually store the Node, 130 | # just pretend it is at position X (current size of list) 131 | self.node_list_size += 1 132 | 133 | node.save_node_id(self.node_list_size) 134 | 135 | node_pk = node.get_entity_pk() 136 | if node_pk: 137 | self.reverse_lookup.append(node.entity.name, node_pk, node.get_node_id()) 138 | else: 139 | print node, "has no entity PK" 140 | raise RuntimeError 141 | 142 | # reset last lookup cache 143 | self.last_lookup, self.last_lookup_result = None, None 144 | 145 | return node.get_node_id() 146 | 147 | def lookup_node_pos(self, entity_name, entity_pk): 148 | if self.last_lookup == (entity_name, entity_pk): 149 | return self.last_lookup_result 150 | else: 151 | result = self.reverse_lookup.lookup(entity_name, entity_pk) 152 | self.last_lookup = (entity_name, entity_pk) 153 | self.last_lookup_result = result 154 | 155 | return result 156 | 157 | def iter_nodes(self): 158 | while True: 159 | try: 160 | yield self.node_list.popleft() 161 | except: 162 | break 163 | 164 | 165 | class RelationList(object): 166 | 167 | def __init__(self): 168 | self.relation_list = collections.deque() 169 | 170 | def add_relation(self, relation): 171 | self.relation_list.append(relation) 172 | 173 | def iter_rels(self): 174 | while True: 175 | try: 176 | yield self.relation_list.popleft() 177 | except: 178 | break 179 | 180 | 181 | -------------------------------------------------------------------------------- /sql2graph/schema.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2013 Paul Tremberth, Newlynn Labs 4 | # See LICENSE for details. 5 | 6 | # inspired by https://bitbucket.org/lalinsky/mbslave 7 | # mbslave/search.py 8 | 9 | class Column(object): 10 | 11 | def __init__(self, name, default=None): 12 | self.name = name 13 | self.default = default 14 | 15 | def __repr__(self): 16 | return "Column(%s, default=%s)" % (self.name, self.default) 17 | 18 | class Field(object): 19 | 20 | db_field_type = None 21 | 22 | def __init__(self, name, value, primary_key=False, index=None): 23 | self.name = name 24 | if isinstance(value, Column): 25 | self.column = value 26 | self.value = None 27 | else: 28 | self.value = value 29 | self.primary_key = primary_key 30 | self.index=index 31 | 32 | def __repr__(self): 33 | if self.value: 34 | return "Field(%s, value=%s)" % (self.name, self.value) 35 | elif self.column: 36 | return "Field(%s, column=%s)" % (self.name, self.column) 37 | 38 | 39 | class IntField(Field): 40 | db_field_type = 'int' 41 | 42 | 43 | class BoolField(Field): 44 | db_field_type = 'bool' 45 | 46 | 47 | class Property(object): 48 | 49 | def __init__(self, name, value, index=None): 50 | self.name = name 51 | self.value = value 52 | self.index = index 53 | 54 | 55 | class Reference(object): 56 | 57 | def __init__(self, entity, db_column, null=False): 58 | self.entity = entity 59 | self.db_column = db_column 60 | self.null = null 61 | 62 | def __repr__(self): 63 | return "Reference(entity(%s), column(%s))" % (self.entity, self.db_column) 64 | 65 | 66 | class Relation(object): 67 | 68 | def __init__(self, origin, target, properties): 69 | self.origin = origin 70 | self.target = target 71 | self.properties = properties 72 | 73 | def __repr__(self): 74 | return "Relation(origin(%s), target(%s))" % (self.origin, self.target) 75 | 76 | 77 | class Entity(object): 78 | 79 | def __init__(self, name, fields=None, relations=None): 80 | self.name = name 81 | self.fields = fields 82 | self.relations = relations 83 | self.primary_key_field = None 84 | self.indexed_fields = None 85 | 86 | def get_primary_key_field(self): 87 | if not self.primary_key_field: 88 | if self.fields: 89 | for field in self.fields: 90 | if field.primary_key: 91 | self.primary_key_field = field 92 | break 93 | 94 | return self.primary_key_field 95 | 96 | def get_indexed_fields(self): 97 | if not self.indexed_fields: 98 | indexes = {} 99 | if self.fields: 100 | for field in self.fields: 101 | if field.index: 102 | if field.index not in indexes: 103 | indexes[field.index] = [] 104 | indexes[field.index].append(field) 105 | self.indexed_fields = indexes 106 | 107 | return self.indexed_fields 108 | -------------------------------------------------------------------------------- /sql2graph/schema2.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | from collections import namedtuple 3 | 4 | DefaultProperty = namedtuple('DefaultProperty', ['name', 'column']) 5 | Relation = namedtuple('Relation', ['rtype', 'start', 'end', 'properties']) 6 | Reference = namedtuple('Reference', ['entity', 'key_column']) 7 | 8 | class Property(DefaultProperty): 9 | maptype = str 10 | 11 | 12 | class IntegerProperty(Property): 13 | maptype = int 14 | 15 | 16 | class BooleanProperty(Property): 17 | maptype = bool 18 | 19 | 20 | class Schema(object): 21 | 22 | def __init__(self, entities): 23 | self.entities = entities 24 | self.entities_by_id = dict((e.name, e) for e in entities) 25 | 26 | def __getitem__(self, name): 27 | return self.entities_by_id[name] 28 | 29 | 30 | class Entity(object): 31 | 32 | def __init__(self, name, fields=[], relations=[]): 33 | self.name = name 34 | self.fields = fields 35 | self.relations = relations 36 | self.fields_by_name = dict(((field.name, field) for field in fields)) 37 | 38 | def iter_relations(self, name=None): 39 | for rel in self.relations: 40 | if name is not None and rel.name != name: 41 | continue 42 | yield rel 43 | 44 | def iter_single_fields(self, name=None): 45 | for field in self.fields: 46 | if isinstance(field, Property): 47 | if name is not None and field.name != name: 48 | continue 49 | yield field 50 | 51 | 52 | class Column(object): 53 | 54 | def __init__(self, name, foreign=None, function=None): 55 | self.name = name 56 | self.foreign = foreign 57 | self.function = function 58 | 59 | 60 | class ForeignColumn(Column): 61 | 62 | def __init__(self, table, name, foreign=None, null=False, backref=None): 63 | super(ForeignColumn, self).__init__(name, foreign=foreign) 64 | self.table = table 65 | self.null = null 66 | self.backref = backref 67 | 68 | 69 | 70 | def placeholders(ids): 71 | return ", ".join(["%s" for i in ids]) 72 | 73 | 74 | def generate_iter_query(columns, joins, ids=(), limit=None, order_by=None): 75 | if not columns or not joins: 76 | return "" 77 | 78 | id_column = columns[0] 79 | tpl = ["SELECT", "%(columns)s", "FROM", "%(joins)s"] 80 | if ids: 81 | tpl.append("WHERE %(id_column)s IN (%(ids)s)") 82 | #tpl.append("ORDER BY %(id_column)s") 83 | if order_by is not None: 84 | tpl.append(u"ORDER BY %s" % order_by) 85 | if limit is not None: 86 | tpl.append("LIMIT %d" % limit) 87 | 88 | sql_columns = ',\n'.join(' %s' % (i, ) for i in columns) 89 | sql_joins = '\n'.join(' ' + i for i in joins) 90 | sql = "\n".join(tpl) % dict(columns=sql_columns, joins=sql_joins, 91 | id_column=id_column, ids=placeholders(ids)) 92 | 93 | return "(%s)" % sql 94 | 95 | 96 | def indent(s, indentation): 97 | return "\n".join(["%s%s" % (indentation, line) for line in s.split("\n")]) 98 | 99 | 100 | def generate_union_query(queries): 101 | return "\n\nUNION\n\n""".join(queries) 102 | 103 | 104 | class SchemaError(RuntimeError): 105 | _dummy = True 106 | 107 | class SchemaHelper(object): 108 | 109 | def __init__(self, schema, entities, strict=True): 110 | self.schema = schema 111 | self.entities = entities 112 | self.strict = strict 113 | 114 | self.check_schema() 115 | 116 | def check_schema(self): 117 | for kind in self.entities: 118 | try: 119 | self.schema[kind] 120 | except: 121 | raise SchemaError("Not all entities have a defined schema: %s" % kind) 122 | 123 | rel_entities = [] 124 | for kind in self.entities: 125 | entity = self.schema[kind] 126 | for r in entity.iter_relations(): 127 | rel_entities.append(r.start.entity) 128 | rel_entities.append(r.end.entity) 129 | 130 | if self.strict: 131 | missing = set(rel_entities) - set(self.entities) 132 | if missing: 133 | raise SchemaError("Some relations need additional entities: %s" % str(missing)) 134 | 135 | def iter_entity_nodes(self, db, kind, properties=[]): 136 | entity = self.schema[kind] 137 | if not entity.fields: 138 | return [], [] 139 | joins = [kind] 140 | tables = set([kind]) 141 | #columns = ['%s.id' % (kind,)] 142 | columns = ["'%s' as kind" % (kind,)] 143 | 144 | if not properties: 145 | properties = [(f.name, f.maptype) for f in entity.iter_single_fields()] 146 | for p, ptype in properties: 147 | if not entity.fields_by_name.get(p): 148 | if ptype == int: 149 | columns.append('0 AS "%s"' % p) 150 | else: 151 | columns.append('NULL AS "%s"' % p) 152 | continue 153 | field = entity.fields_by_name.get(p) 154 | table = kind 155 | column = field.column 156 | while column.foreign is not None: 157 | foreign_table = table + '__' + column.name + '__' + column.foreign.table 158 | if foreign_table not in tables: 159 | join = 'LEFT JOIN' if column.foreign.null else 'JOIN' 160 | joins.append('%(join)s %(parent)s AS %(label)s ON %(label)s.id = %(child)s.%(child_column)s' % dict( 161 | join=join, parent=column.foreign.table, child=table, child_column=column.name, label=foreign_table)) 162 | tables.add(foreign_table) 163 | table = foreign_table 164 | column = column.foreign 165 | 166 | columns.append('%s.%s%s' % (table, column.name, ' AS "%s"' % field.name if field.name else '')) 167 | 168 | return columns, joins 169 | 170 | 171 | def iter_entity_relations(self, db, kind, properties=[]): 172 | 173 | entity = self.schema[kind] 174 | 175 | tables = set([kind]) 176 | fields = [] 177 | 178 | relations = list(entity.iter_relations()) 179 | 180 | output_relations = [] 181 | for rel in relations: 182 | 183 | columns = [] 184 | joins = [kind] 185 | 186 | columns.append('start_entity.node_id AS start') 187 | columns.append('end_entity.node_id AS end') 188 | 189 | if isinstance(rel.rtype, Column): 190 | table = kind 191 | column = rel.rtype 192 | 193 | # do we need to convert the column value somehow? 194 | function = rel.rtype.function 195 | 196 | while column.foreign is not None: 197 | foreign_table = table + '__' + column.name + '__' + column.foreign.table 198 | if foreign_table not in tables: 199 | #join = 'LEFT JOIN' if column.foreign.null else 'JOIN' 200 | join = 'JOIN' 201 | joins.append('%(join)s %(parent)s AS %(label)s ON %(label)s.id = %(child)s.%(child_column)s' % dict( 202 | join=join, parent=column.foreign.table, child=table, child_column=column.name, label=foreign_table)) 203 | tables.add(foreign_table) 204 | table = foreign_table 205 | column = column.foreign 206 | 207 | if function: 208 | columns.append("%s AS rel_type" % function('%s.%s' % (table, column.name))) 209 | else: 210 | columns.append('%s.%s AS rel_type' % (table, column.name)) 211 | else: 212 | columns.append("'%s' AS rel_type" % rel.rtype) 213 | 214 | 215 | # FIXME: that's really ugly... 216 | relation_properties = [(p.name, p) for p in rel.properties] 217 | if not properties: 218 | properties = relation_properties 219 | 220 | relation_properties = dict(relation_properties) 221 | for prop_name, prop_type in properties: 222 | 223 | if prop_name not in relation_properties.keys(): 224 | if prop_type == int: 225 | columns.append('0 AS "%s"' % prop_name) 226 | else: 227 | columns.append('NULL AS "%s"' % prop_name) 228 | continue 229 | 230 | table = kind 231 | column = relation_properties[prop_name].column 232 | while column.foreign is not None: 233 | foreign_table = table + '__' + column.name + '__' + column.foreign.table 234 | if foreign_table not in tables: 235 | join = 'LEFT JOIN' if column.foreign.null else 'JOIN' 236 | joins.append('%(join)s %(parent)s AS %(label)s ON %(label)s.id = %(child)s.%(child_column)s' % dict( 237 | join=join, parent=column.foreign.table, child=table, child_column=column.name, label=foreign_table)) 238 | tables.add(foreign_table) 239 | table = foreign_table 240 | column = column.foreign 241 | 242 | columns.append('%s.%s%s' % (table, column.name, ' AS "%s"' % prop_name if prop_name else '')) 243 | 244 | # start/end entities 245 | start, end = rel.start, rel.end 246 | 247 | start_entity, end_entity = start.entity, end.entity 248 | start_column, end_column = start.key_column, end.key_column 249 | 250 | joins.append(""" 251 | JOIN entity_mapping start_entity 252 | ON ( 253 | start_entity.pk = %(kind)s.%(start_column)s 254 | AND 255 | start_entity.entity = '%(start_entity)s' 256 | )""" % dict(start_entity=start_entity, start_column=start_column.name, kind=kind)) 257 | joins.append(""" 258 | JOIN entity_mapping end_entity 259 | ON ( 260 | end_entity.pk = %(kind)s.%(end_column)s 261 | AND 262 | end_entity.entity = '%(end_entity)s' 263 | )""" % dict(end_entity=end_entity, end_column=end_column.name, kind=kind)) 264 | 265 | output_relations.append((columns, joins)) 266 | 267 | return output_relations 268 | 269 | def fetch_entities(self, db, kind, properties=[]): 270 | return self.iter_entity_nodes(db, kind, properties) 271 | 272 | def fetch_relations(self, db, kind, properties=[]): 273 | return self.iter_entity_relations(db, kind, properties) 274 | 275 | def fetch_all_fields(self, cfg, db): 276 | fields = set([]) 277 | for kind in self.entities: 278 | entity = self.schema[kind] 279 | fields.update( 280 | set([(f.name, f.maptype) for f in entity.iter_single_fields()]) 281 | ) 282 | return list(fields) 283 | 284 | def fetch_all_relations_properties(self, cfg, db): 285 | relations = set([]) 286 | for kind in self.entities: 287 | entity = self.schema[kind] 288 | relations.update( 289 | set([(p.name, p.maptype) for r in entity.iter_relations() for p in r.properties]) 290 | ) 291 | return list(relations) 292 | 293 | def fetch_all(self, cfg, db, properties): 294 | for entity in self.entities: 295 | yield self.fetch_entities(db, entity, properties) 296 | 297 | def fetch_all_relations(self, cfg, db, properties=[]): 298 | for entity in self.entities: 299 | yield self.fetch_relations(db, entity, properties) 300 | 301 | --------------------------------------------------------------------------------