├── .gitignore ├── README.adoc ├── bloom └── bloom.perspective ├── code ├── csharp │ └── Example.cs ├── go │ ├── example.go │ ├── example_aura.go │ ├── go.mod │ └── go.sum ├── graphql │ └── example.js ├── java │ └── Example.java ├── javascript │ └── example.js ├── python │ └── example.py └── test-code.sh ├── data ├── stackoverflow-43.dump ├── stackoverflow-50.dump └── stackoverflow-data-importer.zip ├── documentation ├── img │ ├── example.png │ ├── icon.svg │ ├── model.svg │ ├── so_logo.svg │ └── user-provided-answer.png ├── stackoverflow.adoc ├── stackoverflow.neo4j-browser-guide └── stackoverflow.workspace.adoc ├── relate.project-install.json └── scripts └── import.cypher /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | node_modules 3 | *.class 4 | code/csharp/bin 5 | code/csharp/debug 6 | code/csharp/obj 7 | code/javascript/node_modules 8 | code/java/target 9 | pythonenv3.8 10 | /.idea 11 | .vscode 12 | -------------------------------------------------------------------------------- /README.adoc: -------------------------------------------------------------------------------- 1 | //Stack Overflow 2 | :name: stackoverflow 3 | //graph example description 4 | :long-name: Stack Overflow Questions, Answers, Tags, and Comments 5 | :description: Stack Overflow questions, answers, tags, and comments and the relationships between them. 6 | //icon representing graph example 7 | :icon: documentation/img/icon.svg 8 | //associated search tags, separate multiple tags with comma 9 | :tags: stackoverflow-example-data, stackoverflow-dataset, stackoverflow-template-data, stackoverflow-example, stackoverflow-template, stackoverflow-model 10 | //graph example author 11 | :author: Neo4j Devrel 12 | //true if this example is available on the demodb. Set to false if not used 13 | :demodb: false 14 | //data flat files (csv, json, etc). Set to false if not used 15 | :data: false 16 | //use a script to generate/process data? Set to either path for script, or false if not used 17 | :use-load-script: false 18 | //use a graph dump file for initial data set? Set to either path for dump file, or false if not used 19 | :use-dump-file: data/stackoverflow-4-3-1.dump 20 | //if a zip file exists for the data, specify here. Set to false if not used 21 | :zip-file: false 22 | //use a plugin for the database, separate multiple plugins with comma. 'public' plugins are apoc, graph-algorithms. 23 | //other algorithms are specified by path, e.g. apoc,graph-algorithms; Set to false if not used 24 | :use-plugin: apoc 25 | //target version of the database this example should run on 26 | :target-db-version: 4.0,4.3 27 | //specify a Bloom perspective, or false if not used 28 | :bloom-perspective: false 29 | //guide for the graph example. Should be friendly enough to be converted into various document formats 30 | :guide: documentation/stackoverflow.neo4j-browser-guide 31 | //temporary for rendered guides 32 | :rendered-guide: https://guides.neo4j.com/sandbox/{name} 33 | //guide for modeling decisions. Should be friendly enough to be converted into various document formats 34 | :model: documentation/img/model.svg 35 | :example: documentation/img/example.png 36 | :nodes: 6193 37 | :relationships: 11540 38 | 39 | image::{icon}[width=100] 40 | 41 | == {long-name} Graph Example 42 | 43 | Description: _{description}_ 44 | 45 | ifeval::[{todo} != false] 46 | To Do: {todo} 47 | endif::[] 48 | 49 | Nodes {nodes} Relationships {relationships} 50 | 51 | .Model 52 | image::{model}[] 53 | 54 | .Example 55 | image::{example}[width=600] 56 | 57 | .Example Query: 58 | 59 | :param-name: tagName 60 | :param-value: neo4j 61 | 62 | [source,cypher,role=query-example-params,subs=+attributes] 63 | ---- 64 | :param {param-name} => "{param-value}" 65 | ---- 66 | 67 | [source,cypher,role=query-example,param-name={param-name},param-value={param-value},result-column=answerer,expected-result="A value"] 68 | ---- 69 | MATCH (t:Tag {name:$tagName})<-[:TAGGED]-(q:Question)<-[:ANSWERED]-(a:Answer {is_accepted:true})<-[:PROVIDED]-(u:User) 70 | RETURN u.display_name as answerer LIMIT 5 71 | ---- 72 | 73 | === Setup 74 | 75 | This is for Neo4j version: {target-db-version} 76 | 77 | ifeval::[{use-plugin} != false] 78 | Required plugins: {use-plugin} 79 | endif::[] 80 | 81 | ifeval::[{demodb} != false] 82 | The database is also available on https://demo.neo4jlabs.com:7473 83 | 84 | Username "{name}", password: "{name}", database: "{name}" 85 | endif::[] 86 | 87 | Rendered guide available via: `:play {rendered-guide}` 88 | 89 | Unrendered guide: link:{guide}[] 90 | 91 | Load graph data via the following: 92 | 93 | ifeval::[{data} != false] 94 | ==== Data files: `{data}` 95 | 96 | Import flat files (csv, json, etc) using Cypher's https://neo4j.com/docs/cypher-manual/current/clauses/load-csv/[`LOAD CSV`], https://neo4j.com/labs/apoc/[APOC library], or https://neo4j.com/developer/data-import/[other methods]. 97 | endif::[] 98 | 99 | ifeval::[{use-dump-file} != false] 100 | ==== Dump file: `{use-dump-file}` 101 | 102 | * Drop the file into the `Files` section of a project in Neo4j Desktop. Then choose the option to `Create new DBMS from dump` option from the file options. 103 | 104 | * Use the neo4j-admin tool to load data from the command line with the command below. 105 | 106 | [source,shell,subs=attributes] 107 | ---- 108 | bin/neo4j-admin load --from {use-dump-file} [--database "database"] 109 | ---- 110 | 111 | * Upload the dump file to Neo4j Aura via https://console.neo4j.io/#import-instructions 112 | endif::[] 113 | 114 | ifeval::[{use-load-script} != false] 115 | ==== Data load script: `{use-load-script}` 116 | 117 | [source,shell,subs=attributes] 118 | ---- 119 | bin/cypher-shell -u neo4j -p "password" -f {use-load-script} [-d "database"] 120 | ---- 121 | 122 | Or import in Neo4j Browser by dragging or pasting the content of {use-load-script}. 123 | endif::[] 124 | 125 | ifeval::[{zip-file} != false] 126 | ==== Zip file 127 | 128 | Download the zip file link:{repo}/raw/master/{name}.zip[{name}.zip] and add it as "project from file" to https://neo4j.com/developer/neo4j-desktop[Neo4j Desktop^]. 129 | endif::[] 130 | 131 | === Code Examples 132 | 133 | * link:code/javascript/example.js[JavaScript] 134 | * link:code/java/Example.java[Java] 135 | * link:code/csharp/Example.cs[C#] 136 | * link:code/python/example.py[Python] 137 | * link:code/go/example.go[Go] 138 | 139 | === Feedback 140 | 141 | Feel free to submit issues or pull requests for improvement on this repository. 142 | -------------------------------------------------------------------------------- /bloom/bloom.perspective: -------------------------------------------------------------------------------- 1 | {"name":"Movie Graph","id":"d7a7df30-8473-11ea-a6ec-0b5e71841762","categories":[{"id":0,"name":"Other","color":"#6B6B6B","size":1,"icon":"no-icon","labels":[],"properties":[],"hiddenLabels":[],"caption":[""]},{"id":1,"name":"Person","color":"#FFE081","size":1,"icon":"F91CCD45-2C41-40B8-ADF6-5CBED4BD6C6E","labels":["Person"],"properties":[{"name":"name","exclude":false,"isCaption":true,"dataType":"string"},{"name":"born","exclude":false,"isCaption":false,"dataType":"bigint"}],"hiddenLabels":[],"caption":[""],"createdAt":"Wed Apr 22 2020","lastEditedAt":"Wed Apr 22 2020"},{"id":2,"name":"Movie","color":"#C990C0","size":1,"icon":"B0C159D8-F7B2-40A9-9840-1C7DE4F54B63","labels":["Movie"],"properties":[{"name":"title","exclude":false,"isCaption":true,"dataType":"string"},{"name":"tagline","exclude":false,"isCaption":false,"dataType":"string"},{"name":"released","exclude":false,"isCaption":false,"dataType":"bigint"}],"hiddenLabels":[],"caption":[""],"createdAt":"Wed Apr 22 2020","lastEditedAt":"Wed Apr 22 2020"}],"categoryIndex":2,"relationshipTypes":[{"properties":[{"propertyKey":"roles","type":"ACTED_IN","dataType":"array"}],"name":"ACTED_IN","id":"ACTED_IN","size":1,"color":"#F79767"},{"name":"DIRECTED","id":"DIRECTED","color":"#57C7E3","size":1},{"name":"PRODUCED","id":"PRODUCED","color":"#569480"},{"name":"WROTE","id":"WROTE","color":"#e9b5ec"},{"name":"FOLLOWS","id":"FOLLOWS","color":"#F16667"},{"properties":[{"propertyKey":"summary","type":"REVIEWED","dataType":"string"},{"propertyKey":"rating","type":"REVIEWED","dataType":"bigint"}],"name":"REVIEWED","id":"REVIEWED","color":"#4C8EDA"}],"palette":{"colors":["#FFE081","#C990C0","#F79767","#57C7E3","#F16667","#D9C8AE","#8DCC93","#ECB5C9","#4C8EDA","#FFC454","#DA7194","#569480","#848484","#D9D9D9"],"currentIndex":2},"createdAt":"Wed Apr 22 2020","lastEditedAt":"Wed Apr 22 2020","templates":[{"name":"Find the co-actors for a given person","id":"tmpl:1587545670728","createdAt":"Wed Apr 22 2020","text":"coactors of $person","cypher":"MATCH (p:Person)-[r1]->()<-[r2:ACTED_IN]-(p2)\nWHERE toLower(p.name) STARTS WITH toLower($person)\nRETURN *","params":[{"name":"$person","dataType":"String","suggestionLabel":"Person","suggestionProp":"name","cypher":null}],"hasCypherErrors":false},{"name":"Four hops from a person","id":"tmpl:1587546227251","createdAt":"Wed Apr 22 2020","text":"Four hops from $person","cypher":"MATCH (p:Person)-[r*1..4]-(hops)\nWHERE toLower(p.name) = toLower($person)\nRETURN *","params":[{"name":"$person","dataType":"String","suggestionLabel":"Person","suggestionProp":"name","cypher":null}],"hasCypherErrors":false},{"name":"Shortest path between two people","id":"tmpl:1587546502335","createdAt":"Wed Apr 22 2020","text":"Shortest path between $person1 and $person2","cypher":"MATCH p=shortestPath(\n(bacon:Person {name:$person1})-[*]-(meg:Person {name:$person2})\n)\nRETURN p","params":[{"name":"$person1","dataType":"String","suggestionLabel":"Person","suggestionProp":"name","cypher":null},{"name":"$person2","dataType":"String","suggestionLabel":"Person","suggestionProp":"name","cypher":null}],"hasCypherErrors":false},{"name":"Recommend 5 new actors for a person","id":"tmpl:1587546623511","createdAt":"Wed Apr 22 2020","text":"Recommend new coactors for $person","cypher":"MATCH (actor:Person {name:$person})-[:ACTED_IN]->(m)<-[:ACTED_IN]-(coActors),\n (coActors)-[:ACTED_IN]->(m2)<-[:ACTED_IN]-(cocoActors)\nWHERE NOT (actor)-[:ACTED_IN]->()<-[:ACTED_IN]-(cocoActors) AND actor <> cocoActors\nWITH cocoActors, count(*) AS Strength\nRETURN cocoActors ORDER BY Strength DESC LIMIT 5","params":[{"name":"$person","dataType":"String","suggestionLabel":"Person","suggestionProp":"name","cypher":null}],"hasCypherErrors":false}],"hiddenRelationshipTypes":[],"hiddenCategories":[],"hideUncategorisedData":false,"version":"1.2.1"} -------------------------------------------------------------------------------- /code/csharp/Example.cs: -------------------------------------------------------------------------------- 1 | // install dotnet core on your system 2 | // dotnet new console -o . 3 | // dotnet add package Neo4j.Driver 4 | // paste in this code into Program.cs 5 | // dotnet run 6 | 7 | using System; 8 | using System.Collections.Generic; 9 | using System.Text; 10 | using System.Threading.Tasks; 11 | using Neo4j.Driver; 12 | 13 | namespace dotnet { 14 | class Example { 15 | static async Task Main() { 16 | var driver = GraphDatabase.Driver("bolt://:", 17 | AuthTokens.Basic("", "")); 18 | 19 | var cypherQuery = 20 | @" 21 | MATCH (t:Tag {name:$tagName})<-[:TAGGED]-(q:Question)<-[:ANSWERED]-(a:Answer {is_accepted:true})<-[:PROVIDED]-(u:User) 22 | RETURN u.display_name as answerer LIMIT 5 23 | "; 24 | 25 | var session = driver.AsyncSession(o => o.WithDatabase("neo4j")); 26 | var result = await session.ReadTransactionAsync(async tx => { 27 | var r = await tx.RunAsync(cypherQuery, 28 | new { tagName="neo4j"}); 29 | return await r.ToListAsync(); 30 | }); 31 | 32 | await session?.CloseAsync(); 33 | foreach (var row in result) 34 | Console.WriteLine(row["answerer"].As()); 35 | 36 | } 37 | } 38 | } -------------------------------------------------------------------------------- /code/go/example.go: -------------------------------------------------------------------------------- 1 | // go mod init main 2 | // go run example.go 3 | package main 4 | 5 | import ( 6 | "fmt" 7 | "github.com/neo4j/neo4j-go-driver/v4/neo4j" 8 | "io" 9 | "reflect" 10 | ) 11 | 12 | func main() { 13 | results, err := runQuery("bolt://:", "neo4j", "", "") 14 | if err != nil { 15 | panic(err) 16 | } 17 | for _, result := range results { 18 | fmt.Println(result) 19 | } 20 | } 21 | 22 | func runQuery(uri, database, username, password string) (result []string, err error) { 23 | driver, err := neo4j.NewDriver(uri, neo4j.BasicAuth(username, password, "")) 24 | if err != nil { 25 | return nil, err 26 | } 27 | defer func() {err = handleClose(driver, err)}() 28 | session := driver.NewSession(neo4j.SessionConfig{AccessMode: neo4j.AccessModeRead, DatabaseName: database}) 29 | defer func() {err = handleClose(session, err)}() 30 | results, err := session.ReadTransaction(func(transaction neo4j.Transaction) (interface{}, error) { 31 | result, err := transaction.Run( 32 | ` 33 | MATCH (t:Tag {name:$tagName})<-[:TAGGED]-(q:Question)<-[:ANSWERED]-(a:Answer {is_accepted:true})<-[:PROVIDED]-(u:User) 34 | RETURN u.display_name as answerer LIMIT 5 35 | `, map[string]interface{}{ 36 | "tagName": "neo4j", 37 | }) 38 | if err != nil { 39 | return nil, err 40 | } 41 | var arr []string 42 | for result.Next() { 43 | value, found := result.Record().Get("answerer") 44 | if found { 45 | arr = append(arr, value.(string)) 46 | } 47 | } 48 | if err = result.Err(); err != nil { 49 | return nil, err 50 | } 51 | return arr, nil 52 | }) 53 | if err != nil { 54 | return nil, err 55 | } 56 | result = results.([]string) 57 | return result, err 58 | } 59 | 60 | func handleClose(closer io.Closer, previousError error) error { 61 | err := closer.Close() 62 | if err == nil { 63 | return previousError 64 | } 65 | if previousError == nil { 66 | return err 67 | } 68 | return fmt.Errorf("%v closure error occurred:\n%s\ninitial error was:\n%w", reflect.TypeOf(closer), err.Error(), previousError) 69 | } 70 | -------------------------------------------------------------------------------- /code/go/example_aura.go: -------------------------------------------------------------------------------- 1 | package main 2 | import ( 3 | "fmt" 4 | "github.com/neo4j/neo4j-go-driver/neo4j" 5 | ) 6 | func main() { 7 | var driver neo4j.Driver 8 | var err error 9 | // Aura requires you to use "bolt+routing" protocol, and process your queries using an encrypted connection 10 | // (You may need to replace your connection details, username and password) 11 | boltURL := "bolt+routing://" 12 | auth := neo4j.BasicAuth("", "", "") 13 | 14 | configurers := []func(*neo4j.Config){ 15 | func (config *neo4j.Config) { 16 | config.Encrypted = true 17 | }, 18 | } 19 | if driver, err = neo4j.NewDriver(boltURL, auth, configurers...); err != nil { 20 | panic(err) 21 | } 22 | 23 | // Don't forget to close the driver connection when you are finished with it 24 | defer driver.Close() 25 | 26 | var writeSession neo4j.Session 27 | // Using write transactions allow the driver to handle retries and transient errors for you 28 | if writeSession, err = driver.Session(neo4j.AccessModeWrite); err != nil { 29 | panic(err) 30 | } 31 | defer writeSession.Close() 32 | 33 | // To learn more about the Cypher syntax, see https://neo4j.com/docs/cypher-manual/current/ 34 | // The Reference Card is also a good resource for keywords https://neo4j.com/docs/cypher-refcard/current/ 35 | createRelationshipBetweenPeopleQuery := ` 36 | MERGE (p1:Person { name: $person1_name }) 37 | MERGE (p2:Person { name: $person2_name }) 38 | MERGE (p1)-[:KNOWS]->(p2) 39 | RETURN p1, p2` 40 | 41 | var result neo4j.Result 42 | result, err = writeSession.Run(createRelationshipBetweenPeopleQuery, map[string]interface{}{ 43 | "person1_name": "Alice", 44 | "person2_name": "David", 45 | }) 46 | 47 | if err != nil { 48 | panic(err) 49 | } 50 | 51 | // You should capture any errors along with the query and data for traceability 52 | if result.Err() != nil { 53 | panic(result.Err()) 54 | } 55 | 56 | for result.Next() { 57 | firstPerson := result.Record().GetByIndex(0).(neo4j.Node) 58 | fmt.Printf("First: '%s'\n", firstPerson.Props()["name"].(string)) 59 | secondPerson := result.Record().GetByIndex(1).(neo4j.Node) 60 | fmt.Printf("Second: '%s'\n", secondPerson.Props()["name"].(string)) 61 | } 62 | 63 | var readSession neo4j.Session 64 | 65 | if readSession, err = driver.Session(neo4j.AccessModeRead); err != nil { 66 | panic(err) 67 | } 68 | defer readSession.Close() 69 | 70 | readPersonByName := ` 71 | MATCH (p:Person) 72 | WHERE p.name = $person_name 73 | RETURN p.name AS name` 74 | 75 | result, err = readSession.Run(readPersonByName, map[string]interface{}{"person_name": "Alice"}) 76 | 77 | if err != nil { 78 | panic(err) 79 | } 80 | 81 | if result.Err() != nil { 82 | panic(result.Err()) 83 | } 84 | 85 | for result.Next() { 86 | fmt.Printf("Person name: '%s' \n", result.Record().GetByIndex(0).(string)) 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /code/go/go.mod: -------------------------------------------------------------------------------- 1 | module example 2 | 3 | go 1.14 4 | 5 | require github.com/neo4j/neo4j-go-driver v1.8.0 6 | -------------------------------------------------------------------------------- /code/go/go.sum: -------------------------------------------------------------------------------- 1 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 2 | github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= 3 | github.com/golang/mock v1.4.3/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw= 4 | github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= 5 | github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= 6 | github.com/neo4j/neo4j-go-driver v1.8.0 h1:YRp9jsFcF9k/AnvbcqFCN9OMeIT2XTJgxOpp2Puq7OE= 7 | github.com/neo4j/neo4j-go-driver v1.8.0/go.mod h1:0A49wIv0oP3uQdnbceK7Kc+snlY5B0F6dmtYArM0ltk= 8 | github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= 9 | github.com/onsi/ginkgo v1.12.0/go.mod h1:oUhWkIvk5aDxtKvDDuw8gItl8pKl42LzjC9KZE0HfGg= 10 | github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY= 11 | github.com/onsi/gomega v1.9.0/go.mod h1:Ho0h+IUsWyvy1OpqCwxlQ/21gkhVunqlU8fDGcoTdcA= 12 | github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= 13 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 14 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 15 | github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= 16 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= 17 | golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 18 | golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= 19 | golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 20 | golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 21 | golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 22 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 23 | golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 24 | golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 25 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 26 | golang.org/x/tools v0.0.0-20190425150028-36563e24a262/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= 27 | golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 28 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 29 | gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= 30 | gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= 31 | gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 32 | gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 33 | rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0= 34 | rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA= 35 | -------------------------------------------------------------------------------- /code/graphql/example.js: -------------------------------------------------------------------------------- 1 | const { Neo4jGraphQL } = require("@neo4j/graphql"); 2 | const { ApolloServer } = require("apollo-server"); 3 | const neo4j = require("neo4j-driver"); 4 | 5 | const driver = neo4j.driver( 6 | "bolt://:", 7 | neo4j.auth.basic("", "") 8 | ); 9 | 10 | let typeDefs; 11 | 12 | // Create executable GraphQL schema from GraphQL type definitions, 13 | // using @neo4j/graphql to autogenerate resolvers 14 | const neoSchema = new Neo4jGraphQL({ 15 | typeDefs, 16 | debug: true, 17 | }); 18 | 19 | // Create ApolloServer instance that will serve GraphQL schema created above 20 | // Inject Neo4j driver instance into the context object, which will be passed 21 | // into each (autogenerated) resolver 22 | const server = new ApolloServer({ 23 | context: { driver }, 24 | schema: neoSchema.schema, 25 | introspection: true, 26 | playground: true, 27 | }); 28 | 29 | // Start ApolloServer 30 | server.listen().then(({ url }) => { 31 | console.log(`GraphQL server ready at ${url}`); 32 | }); 33 | -------------------------------------------------------------------------------- /code/java/Example.java: -------------------------------------------------------------------------------- 1 | // Add your the driver dependency to your pom.xml build.gradle etc. 2 | // Java Driver Dependency: http://search.maven.org/#artifactdetails|org.neo4j.driver|neo4j-java-driver|4.0.1|jar 3 | // Reactive Streams http://search.maven.org/#artifactdetails|org.reactivestreams|reactive-streams|1.0.3|jar 4 | // download jars into current directory 5 | // java -cp "*" Example.java 6 | 7 | import org.neo4j.driver.*; 8 | import static org.neo4j.driver.Values.parameters; 9 | 10 | public class Example { 11 | 12 | public static void main(String...args) { 13 | 14 | Driver driver = GraphDatabase.driver("bolt://:", 15 | AuthTokens.basic("","")); 16 | 17 | try (Session session = driver.session(SessionConfig.forDatabase("neo4j"))) { 18 | 19 | String cypherQuery = 20 | "MATCH (t:Tag {name:$tagName})<-[:TAGGED]-(q:Question)<-[:ANSWERED]-(a:Answer {is_accepted:true})<-[:PROVIDED]-(u:User)\n" + 21 | "RETURN u.display_name as answerer LIMIT 5"; 22 | 23 | var result = session.readTransaction( 24 | tx -> tx.run(cypherQuery, 25 | parameters("tagName","neo4j")) 26 | .list()); 27 | 28 | for (Record record : result) { 29 | System.out.println(record.get("answerer").asString()); 30 | } 31 | } 32 | driver.close(); 33 | } 34 | } 35 | 36 | 37 | -------------------------------------------------------------------------------- /code/javascript/example.js: -------------------------------------------------------------------------------- 1 | // npm install --save neo4j-driver 2 | // node example.js 3 | const neo4j = require('neo4j-driver'); 4 | const driver = neo4j.driver('bolt://:', 5 | neo4j.auth.basic('', ''), 6 | {/* encrypted: 'ENCRYPTION_OFF' */}); 7 | 8 | const query = 9 | ` 10 | MATCH (t:Tag {name:$tagName})<-[:TAGGED]-(q:Question)<-[:ANSWERED]-(a:Answer {is_accepted:true})<-[:PROVIDED]-(u:User) 11 | RETURN u.display_name as answerer LIMIT 5 12 | `; 13 | 14 | const params = {"tagName": "neo4j"}; 15 | 16 | const session = driver.session({database:"neo4j"}); 17 | 18 | session.run(query, params) 19 | .then((result) => { 20 | result.records.forEach((record) => { 21 | console.log(record.get('answerer')); 22 | }); 23 | session.close(); 24 | driver.close(); 25 | }) 26 | .catch((error) => { 27 | console.error(error); 28 | }); 29 | -------------------------------------------------------------------------------- /code/python/example.py: -------------------------------------------------------------------------------- 1 | # pip3 install neo4j-driver 2 | # python3 example.py 3 | 4 | from neo4j import GraphDatabase, basic_auth 5 | 6 | driver = GraphDatabase.driver( 7 | "bolt://:", 8 | auth=basic_auth("", "")) 9 | 10 | cypher_query = ''' 11 | MATCH (t:Tag {name:$tagName})<-[:TAGGED]-(q:Question)<-[:ANSWERED]-(a:Answer {is_accepted:true})<-[:PROVIDED]-(u:User) 12 | RETURN u.display_name as answerer LIMIT 5 13 | ''' 14 | 15 | with driver.session(database="neo4j") as session: 16 | results = session.read_transaction( 17 | lambda tx: tx.run(cypher_query, 18 | tagName="neo4j").data()) 19 | for record in results: 20 | print(record['answerer']) 21 | 22 | driver.close() 23 | -------------------------------------------------------------------------------- /code/test-code.sh: -------------------------------------------------------------------------------- 1 | #/bin/sh 2 | NAME=${1-stackoverflow} 3 | TARGETPATH=${2-/tmp} 4 | TARGET="$TARGETPATH/$NAME" 5 | if [ ! -d $TARGET ]; then 6 | git clone https://github.com/neo4j-graph-examples/$name $TARGET 7 | fi 8 | 9 | QUERY=`grep -e '^\(:query:\| .*\+$\)' $TARGET/README.adoc | cut -d' ' -f2- | sed -e 's/\+$//g'` 10 | EXPECT=`grep :expected-result: $TARGET/README.adoc | cut -d' ' -f2-` 11 | PARAMNAME=`grep :param-name: $TARGET/README.adoc | cut -d' ' -f2-` 12 | PARAMVALUE=`grep :param-value: $TARGET/README.adoc | cut -d' ' -f2-` 13 | RESULTCOLUMN=`grep :result-column: $TARGET/README.adoc | cut -d' ' -f2-` 14 | 15 | # "Cloud Atlas" 16 | echo For example \"$NAME\" running 17 | echo $QUERY 18 | echo Expecting \"$EXPECT\" with {\"$PARAMNAME\": \"$PARAMVALUE\"} returning \"$RESULTCOLUMN\" 19 | 20 | BOLTPORT=7687 21 | HOST=localhost 22 | USERNAME=neo4j 23 | PASSWORD=secret 24 | JAVA_DRIVER_VERSION=4.0.1 25 | RX_VERSION=1.0.3 26 | 27 | # todo enterprise 28 | DOCKER_ID=`docker run -d -p $BOLTPORT:$BOLTPORT -v $TARGET:/repo --env NEO4J_AUTH=$USERNAME/$PASSWORD neo4j:3.5` 29 | 30 | echo $DOCKER_ID 31 | docker exec $DOCKER_ID sh -c "ls /repo" 32 | # sleep 10 33 | 34 | docker logs $DOCKER_ID 35 | 36 | # todo also handle dump files 37 | exit 38 | 39 | docker exec $DOCKER_ID sh -c "cat /repo/scripts/import.cypher | cypher-shell -u $USERNAME -p $PASSWORD" 40 | 41 | echo "Node-Count:" 42 | docker exec $DOCKER_ID cypher-shell -u $USERNAME -p $PASSWORD 'MATCH (n) RETURN count(*);' 43 | 44 | CODE=/tmp/code 45 | mkdir -p $CODE 46 | pushd $CODE 47 | 48 | npm install --save neo4j-driver 49 | sed -e "s//$BOLTPORT/g" -e "s//$HOST/g" -e "s/mUser/$USERNAME/g" -e "s/s3cr3t/$PASSWORD/g" $TARGET/code/javascript/example.js > example.js 50 | node example.js | grep "$EXPECT" || echo "JAVASCRIPT FAIL" 51 | 52 | pip install neo4j-driver 53 | sed -e "s//$BOLTPORT/g" -e "s//$HOST/g" -e "s/mUser/$USERNAME/g" -e "s/s3cr3t/$PASSWORD/g" $TARGET/code/python/example.py > example.py 54 | python example.py | grep "$EXPECT" || echo "PYTHON FAIL" 55 | 56 | curl -sOL https://repo1.maven.org/maven2/org/reactivestreams/reactive-streams/${RX_VERSION}/reactive-streams-${RX_VERSION}.jar 57 | curl -sOL https://repo1.maven.org/maven2/org/neo4j/driver/neo4j-java-driver/${JAVA_DRIVER_VERSION}/neo4j-java-driver-${JAVA_DRIVER_VERSION}.jar 58 | 59 | sed -e "s//$BOLTPORT/g" -e "s//$HOST/g" -e "s/mUser/$USERNAME/g" -e "s/s3cr3t/$PASSWORD/g" $TARGET/code/java/Example.java > Example.java 60 | 61 | javac -cp neo4j-java-driver-${JAVA_DRIVER_VERSION}.jar Example.java 62 | java -cp neo4j-java-driver-${JAVA_DRIVER_VERSION}.jar:reactive-streams-${RX_VERSION}.jar:. Example | grep "$EXPECT" || echo "JAVA FAIL" 63 | 64 | sed -e "s//$BOLTPORT/g" -e "s//$HOST/g" -e "s/mUser/$USERNAME/g" -e "s/s3cr3t/$PASSWORD/g" $TARGET/code/go/example.go > example.go 65 | go mod init main 66 | go run example.go | grep "$EXPECT" || echo "GO FAIL" 67 | 68 | popd 69 | 70 | echo rm -rf $TMP 71 | docker rm -f $DOCKER_ID -------------------------------------------------------------------------------- /data/stackoverflow-43.dump: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neo4j-graph-examples/stackoverflow/7f0fa206ae9a0a4514a49879b3f497927f2beee0/data/stackoverflow-43.dump -------------------------------------------------------------------------------- /data/stackoverflow-50.dump: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neo4j-graph-examples/stackoverflow/7f0fa206ae9a0a4514a49879b3f497927f2beee0/data/stackoverflow-50.dump -------------------------------------------------------------------------------- /data/stackoverflow-data-importer.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neo4j-graph-examples/stackoverflow/7f0fa206ae9a0a4514a49879b3f497927f2beee0/data/stackoverflow-data-importer.zip -------------------------------------------------------------------------------- /documentation/img/example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neo4j-graph-examples/stackoverflow/7f0fa206ae9a0a4514a49879b3f497927f2beee0/documentation/img/example.png -------------------------------------------------------------------------------- /documentation/img/icon.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /documentation/img/model.svg: -------------------------------------------------------------------------------- 1 | Neo4j Graph VisualizationCreated using Neo4j (http://www.neo4j.com/)TAGGEDASKEDANSWEREDCOMMENTEDCOMMENTED_ONPROVIDED Answer Comment User Question Tag -------------------------------------------------------------------------------- /documentation/img/so_logo.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /documentation/img/user-provided-answer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neo4j-graph-examples/stackoverflow/7f0fa206ae9a0a4514a49879b3f497927f2beee0/documentation/img/user-provided-answer.png -------------------------------------------------------------------------------- /documentation/stackoverflow.adoc: -------------------------------------------------------------------------------- 1 | == Exploring Stackoverflow 2 | :images: {img} 3 | 4 | image::{img}/so_logo.svg[float=right,width=200px] 5 | 6 | Every developer has a tab open https://stackoverflow.com/[Stack Overflow^], the massively popular question-and-answer site. This dataset explores users, questions, answers, comments, and tags related to the `neo4j`- and `cypher`- tagged questions on Stack Overflow. 7 | 8 | This guide will show you how to: 9 | 10 | * Create: load questions, answers, comments, tags, and users into the graph 11 | * Find: Find unanswered questions 12 | * Query: discover the most engaged users and most popular questions 13 | 14 | Throughout the guide you'll find Cypher statements that you can execute, by clicking on them and then executing them by hitting the run button. 15 | 16 | == The Model 17 | 18 | image::{images}/model.svg[width=100%] 19 | 20 | === Nodes 21 | 22 | We model the `Users`, `Questions`, `Answers`, `Comments`, and `Tags` as nodes. 23 | 24 | === Relationships 25 | 26 | Users ask questions, comment, and provide answers. 27 | 28 | `(:User)-[:ASKED]->(:Question)` 29 | `(:User)-[:COMMENTED]->(:Comment)` 30 | `(:User)-[:PROVIDED]->(:Answer)` 31 | 32 | Answers attempt to answer questions, comments are made on questions, and questions are associated with tags. 33 | 34 | `(:Answer)-[:ANSWERED]->(:Question)` 35 | `(:Comment)-[:COMMENTED_ON]->(:Question)` 36 | `(:Question)-[:TAGGED]->(:Tag)` 37 | 38 | If you want to see it yourself, run: 39 | 40 | [source,cypher] 41 | ---- 42 | CALL db.schema.visualization; 43 | ---- 44 | 45 | == Load JSON Import 46 | 47 | There is already data preloaded in this graph, if you want to extend (more pages or other tags) or update it with the most recent questions, please modify and run the statement below. 48 | 49 | Update this dataset using `apoc.load.json`. Run the query as-is to update or add a tag to the `tags` array to extend. 50 | 51 | //setup 52 | [source,cypher] 53 | ---- 54 | // look for several pages of questions 55 | WITH ["neo4j","cypher"] as tags 56 | UNWIND tags as tagName 57 | UNWIND range(1,2) as page 58 | 59 | WITH "https://api.stackexchange.com/2.3/questions?page="+page+"&pagesize=25&order=desc&sort=creation&tagged="+tagName+"&site=stackoverflow&filter=!5-i6Zw8Y)4W7vpy91PMYsKM-k9yzEsSC1_Uxlf" as url 60 | 61 | CALL apoc.load.json(url) YIELD value 62 | CALL apoc.util.sleep(250) // careful with throttling 63 | 64 | UNWIND value.items AS q 65 | 66 | // create the questions 67 | MERGE (question:Question {uuid:q.question_id}) 68 | ON CREATE SET question.title = q.title, 69 | question.link = q.share_link, 70 | question.creation_date = q.creation_date, 71 | question.accepted_answer_id=q.accepted_answer_id, 72 | question.view_count=q.view_count, 73 | question.answer_count=q.answer_count, 74 | question.body_markdown=q.body_markdown 75 | 76 | // who asked the question 77 | MERGE (owner:User {uuid:coalesce(q.owner.user_id,'deleted')}) 78 | ON CREATE SET owner.display_name = q.owner.display_name 79 | MERGE (owner)-[:ASKED]->(question) 80 | 81 | // what tags do the questions have 82 | FOREACH (tagName IN q.tags | 83 | MERGE (tag:Tag {name:tagName}) 84 | ON CREATE SET tag.link = "https://stackoverflow.com/questions/tagged/" + tag.name 85 | MERGE (question)-[:TAGGED]->(tag)) 86 | 87 | // who answered the questions? 88 | FOREACH (a IN q.answers | 89 | MERGE (question)<-[:ANSWERED]-(answer:Answer {uuid:a.answer_id}) 90 | ON CREATE SET answer.is_accepted = a.is_accepted, 91 | answer.link=a.share_link, 92 | answer.title=a.title, 93 | answer.body_markdown=a.body_markdown, 94 | answer.score=a.score, 95 | answer.favorite_score=a.favorite_score, 96 | answer.view_count=a.view_count 97 | MERGE (answerer:User {uuid:coalesce(a.owner.user_id,'deleted')}) 98 | ON CREATE SET answerer.display_name = a.owner.display_name 99 | MERGE (answer)<-[:PROVIDED]-(answerer) 100 | ) 101 | 102 | // who commented ont he question 103 | FOREACH (c in q.comments | 104 | MERGE (question)<-[:COMMENTED_ON]-(comment:Comment {uuid:c.comment_id}) 105 | ON CREATE SET comment.link=c.link, comment.score=c.score 106 | MERGE (commenter:User {uuid:coalesce(c.owner.user_id,'deleted')}) 107 | ON CREATE SET commenter.display_name = c.owner.display_name 108 | MERGE (comment)<-[:COMMENTED]-(commenter) 109 | ); 110 | ---- 111 | 112 | Read More: https://neo4j.com/labs/apoc/4.1/import/load-json/#load-json-examples-stackoverflow[Import from StackOverflow API^] 113 | 114 | == Basic Queries 115 | 116 | Labels in the graph and counts for each: 117 | 118 | [source,cypher] 119 | ---- 120 | MATCH (n) 121 | RETURN labels(n) as label, count(*) as freq 122 | ORDER BY freq DESC; 123 | ---- 124 | 125 | Relationship-types in the graph and counts for eachabel: 126 | 127 | [source,cypher] 128 | ---- 129 | MATCH ()-[r]->() 130 | RETURN type(r) as type, count(*) as freq 131 | ORDER BY freq DESC; 132 | ---- 133 | 134 | == Top Tags 135 | 136 | Which are the most popular tags? 137 | 138 | [source,cypher] 139 | ---- 140 | MATCH (q:Question)-[:TAGGED]->(t:Tag) 141 | RETURN t.name, count(q) AS questions 142 | ORDER BY questions DESC 143 | LIMIT 5; 144 | ---- 145 | 146 | == Exploring Users 147 | 148 | Who are the top users asking questions? 149 | 150 | [source,cypher] 151 | ---- 152 | MATCH (u:User)-[:ASKED]->(q:Question) 153 | RETURN u.display_name, count(*) AS questions 154 | ORDER by questions DESC 155 | LIMIT 10; 156 | ---- 157 | 158 | Who's answering? 159 | 160 | Ordered by number of answers 161 | 162 | [source,cypher] 163 | ---- 164 | MATCH (u:User)-[:PROVIDED]->(a:Answer)-[:ANSWERED]->(q:Question) 165 | RETURN u.display_name as user,COUNT(a) AS answers, avg(a.score) as avg_score 166 | ORDER BY answers DESC LIMIT 10; 167 | ---- 168 | 169 | Ordered by max score, filtered for a particular tag 170 | 171 | [source,cypher] 172 | ---- 173 | MATCH (u:User)-[:PROVIDED]->(a:Answer)-[:ANSWERED]-> 174 | (q:Question)-[:TAGGED]->(:Tag {name:"cypher"}) 175 | RETURN u.display_name as user,COUNT(a) AS answers, max(a.score) as max_score 176 | ORDER BY max_score DESC LIMIT 10; 177 | ---- 178 | 179 | What's the shortest path between users? 180 | 181 | [source,cypher] 182 | ---- 183 | MATCH path = allShortestPaths( 184 | (u1:User {display_name:"alexanoid"})-[*]-(u2:User {display_name:"InverseFalcon"}) 185 | ) 186 | RETURN path LIMIT 1; 187 | ---- 188 | 189 | == User Engagement 190 | 191 | User engagement over time: 192 | 193 | [source,cypher] 194 | ---- 195 | MATCH (u:User)-[:PROVIDED]->()-[:ANSWERED]-> 196 | (q:Question)-[:TAGGED]->(t:Tag) 197 | WHERE u.display_name = "InverseFalcon" 198 | RETURN apoc.date.format(q.creation_date,'s','yyyy-MM') as month, 199 | count(distinct q) as count, collect(distinct t.name) as tags 200 | ORDER BY month asc 201 | ---- 202 | 203 | == Unanswered Questions 204 | 205 | What are the tags for unanswered questions? 206 | 207 | [source,cypher] 208 | ---- 209 | MATCH (q:Question)-[:TAGGED]->(t:Tag) 210 | WHERE NOT t.name IN ['neo4j','cypher'] 211 | AND NOT (q)<-[:ANSWERED]-() 212 | RETURN t.name as tag, count(q) AS questions 213 | ORDER BY questions DESC LIMIT 10; 214 | ---- 215 | 216 | == How are tags related to other tags? 217 | 218 | Tag correlations: 219 | 220 | [source,cypher] 221 | ---- 222 | MATCH (t1:Tag)<-[:TAGGED]-()-[:TAGGED]->(t2:Tag) 223 | WHERE id(t1) < id(t2) and t1.name <> 'neo4j' and t2.name <> 'neo4j' 224 | RETURN t1.name, t2.name,count(*) as freq 225 | ORDER BY freq desc LIMIT 10; 226 | ---- 227 | 228 | == Virtual Graphs: Tags 229 | 230 | Project tags via co-occurrence with virtual relationships. 231 | 232 | [source,cypher] 233 | ---- 234 | MATCH (t1:Tag)<-[:TAGGED]-()-[:TAGGED]->(t2:Tag) 235 | WHERE id(t1) < id(t2) and t1.name <> 'neo4j' and t2.name <> 'neo4j' 236 | WITH t1, t2,count(*) as freq where freq > 3 237 | RETURN t1,t2, apoc.create.vRelationship(t1,'OCCURRED',{freq:freq},t2) as rel 238 | ---- 239 | 240 | If you want to you can also materialize those relationships in the graph and then explore the data in Browser or Bloom visually forming clusters. 241 | 242 | [source,cypher] 243 | ---- 244 | MATCH (t1:Tag)<-[:TAGGED]-()-[:TAGGED]->(t2:Tag) 245 | WHERE id(t1) < id(t2) and t1.name <> 'neo4j' and t2.name <> 'neo4j' 246 | WITH t1, t2,count(*) as freq where freq > 3 247 | MERGE (t1)-[r:OCCURRED]-(t2) SET r.freq=freq 248 | RETURN count(*) 249 | ---- 250 | 251 | == Virtual Graphs: Social Network 252 | 253 | We can do something similar for commenters - what users keep on running into each other in the comments? 254 | 255 | [source,cypher] 256 | ---- 257 | MATCH p1=(u1:User)-[:COMMENTED]->(c1:Comment)-[:COMMENTED_ON]-(q:Question) 258 | MATCH p2=(u2:User)-[:COMMENTED]->(c2:Comment)-[:COMMENTED_ON]-(q) 259 | WHERE id(u1) < id(u2) 260 | WITH u1, u2, count(distinct q) as freq 261 | WHERE freq > 2 262 | RETURN u1, u2, apoc.create.vRelationship(u1,'OCCURRED',{freq:freq},u2) as rel 263 | ---- 264 | 265 | == Further Reading 266 | 267 | * https://neo4j.com/labs/apoc/4.1/import/load-json/#load-json-examples-stackoverflow[Import from StackOverflow API^] 268 | * https://neo4j.com/videos/exploring-stackoverflow-data-with-michael-hunger-twitch-stream/[Exploring StackOverflow data with Michael Hunger – Twitch stream^] 269 | * https://neo4j.com/blog/import-10m-stack-overflow-questions/[Import 10M Stack Overflow Questionsinto Neo4j In Just 3 Minutes^] 270 | -------------------------------------------------------------------------------- /documentation/stackoverflow.neo4j-browser-guide: -------------------------------------------------------------------------------- 1 | 21 | 24 |
25 | 26 | 44 | 45 | 46 | 47 | 48 |
49 |

Exploring Stackoverflow

50 |
51 |
52 |
53 |
54 | so logo 55 |
56 |
57 |
58 |

Every developer has a tab open Stack Overflow, the massively popular question-and-answer site. This dataset explores users, questions, answers, comments, and tags related to the neo4j- and cypher- tagged questions on Stack Overflow.

59 |
60 |
61 |

This guide will show you how to:

62 |
63 |
64 |
    65 |
  • 66 |

    Create: load questions, answers, comments, tags, and users into the graph

    67 |
  • 68 |
  • 69 |

    Find: Find unanswered questions

    70 |
  • 71 |
  • 72 |

    Query: discover the most engaged users and most popular questions

    73 |
  • 74 |
75 |
76 |
77 |

Throughout the guide you’ll find Cypher statements that you can execute, by clicking on them and then executing them by hitting the run button.

78 |
79 |
80 |
81 |
82 | 83 | 84 | 85 | 86 |
87 |

The Model

88 |
89 |
90 |
91 |
92 | model 93 |
94 |
95 | 96 | 97 | 98 |

Nodes

99 |
100 |

We model the Users, Questions, Answers, Comments, and Tags as nodes.

101 |
102 | 103 | 104 | 105 |

Relationships

106 |
107 |

Users ask questions, comment, and provide answers.

108 |
109 |
110 |

(:User)-[:ASKED]→(:Question) 111 | (:User)-[:COMMENTED]→(:Comment) 112 | (:User)-[:PROVIDED]→(:Answer)

113 |
114 |
115 |

Answers attempt to answer questions, comments are made on questions, and questions are associated with tags.

116 |
117 |
118 |

(:Answer)-[:ANSWERED]→(:Question) 119 | (:Comment)-[:COMMENTED_ON]→(:Question) 120 | (:Question)-[:TAGGED]→(:Tag)

121 |
122 |
123 |

If you want to see it yourself, run:

124 |
125 |
126 |
127 |
CALL db.schema.visualization;
128 |
129 |
130 |
131 |
132 |
133 | 134 | 135 | 136 | 137 |
138 |

Load JSON Import

139 |
140 |
141 |
142 |

There is already data preloaded in this graph, if you want to extend (more pages or other tags) or update it with the most recent questions, please modify and run the statement below.

143 |
144 |
145 |

Update this dataset using apoc.load.json. Run the query as-is to update or add a tag to the tags array to extend.

146 |
147 |
148 |
149 |
// look for several pages of questions
150 | WITH ["neo4j","cypher"] as tags
151 | UNWIND tags as tagName
152 | UNWIND range(1,2) as page
153 | 
154 | WITH "https://api.stackexchange.com/2.3/questions?page="+page+"&pagesize=25&order=desc&sort=creation&tagged="+tagName+"&site=stackoverflow&filter=!5-i6Zw8Y)4W7vpy91PMYsKM-k9yzEsSC1_Uxlf" as url
155 | 
156 | CALL apoc.load.json(url) YIELD value
157 | CALL apoc.util.sleep(250)  // careful with throttling
158 | 
159 | UNWIND value.items AS q
160 | 
161 | // create the questions
162 | MERGE (question:Question {uuid:q.question_id})
163 |   ON CREATE SET question.title = q.title,
164 |   	question.link = q.share_link,
165 |   	question.creation_date = q.creation_date,
166 |   	question.accepted_answer_id=q.accepted_answer_id,
167 |   	question.view_count=q.view_count,
168 |    	question.answer_count=q.answer_count,
169 |    	question.body_markdown=q.body_markdown
170 | 
171 | // who asked the question
172 | MERGE (owner:User {uuid:coalesce(q.owner.user_id,'deleted')})
173 |   ON CREATE SET owner.display_name = q.owner.display_name
174 | MERGE (owner)-[:ASKED]->(question)
175 | 
176 | // what tags do the questions have
177 | FOREACH (tagName IN q.tags |
178 |   MERGE (tag:Tag {name:tagName})
179 |     ON CREATE SET tag.link = "https://stackoverflow.com/questions/tagged/" + tag.name
180 |   MERGE (question)-[:TAGGED]->(tag))
181 | 
182 | // who answered the questions?
183 | FOREACH (a IN q.answers |
184 |    MERGE (question)<-[:ANSWERED]-(answer:Answer {uuid:a.answer_id})
185 |     ON CREATE SET answer.is_accepted = a.is_accepted,
186 |     answer.link=a.share_link,
187 |     answer.title=a.title,
188 |     answer.body_markdown=a.body_markdown,
189 |     answer.score=a.score,
190 |    	answer.favorite_score=a.favorite_score,
191 |    	answer.view_count=a.view_count
192 |    MERGE (answerer:User {uuid:coalesce(a.owner.user_id,'deleted')})
193 |     ON CREATE SET answerer.display_name = a.owner.display_name
194 |    MERGE (answer)<-[:PROVIDED]-(answerer)
195 | )
196 | 
197 | // who commented ont he question
198 | FOREACH (c in q.comments |
199 |   MERGE (question)<-[:COMMENTED_ON]-(comment:Comment {uuid:c.comment_id})
200 |     ON CREATE SET comment.link=c.link, comment.score=c.score
201 |   MERGE (commenter:User {uuid:coalesce(c.owner.user_id,'deleted')})
202 |     ON CREATE SET commenter.display_name = c.owner.display_name
203 |   MERGE (comment)<-[:COMMENTED]-(commenter)
204 | );
205 |
206 |
207 |
208 |

Read More: Import from StackOverflow API

209 |
210 |
211 |
212 |
213 | 214 | 215 | 216 | 217 |
218 |

Basic Queries

219 |
220 |
221 |
222 |

Labels in the graph and counts for each:

223 |
224 |
225 |
226 |
MATCH (n)
227 | RETURN labels(n) as label, count(*) as freq
228 | ORDER BY freq DESC;
229 |
230 |
231 |
232 |

Relationship-types in the graph and counts for eachabel:

233 |
234 |
235 |
236 |
MATCH ()-[r]->()
237 | RETURN type(r) as type, count(*) as freq
238 | ORDER BY freq DESC;
239 |
240 |
241 |
242 |
243 |
244 | 245 | 246 | 247 | 248 |
249 |

Top Tags

250 |
251 |
252 |
253 |

Which are the most popular tags?

254 |
255 |
256 |
257 |
MATCH (q:Question)-[:TAGGED]->(t:Tag)
258 | RETURN t.name,  count(q) AS questions
259 | ORDER BY questions DESC
260 | LIMIT 5;
261 |
262 |
263 |
264 |
265 |
266 | 267 | 268 | 269 | 270 |
271 |

Exploring Users

272 |
273 |
274 |
275 |

Who are the top users asking questions?

276 |
277 |
278 |
279 |
MATCH (u:User)-[:ASKED]->(q:Question)
280 | RETURN u.display_name, count(*) AS questions
281 | ORDER by questions DESC
282 | LIMIT 10;
283 |
284 |
285 |
286 |

Who’s answering?

287 |
288 |
289 |

Ordered by number of answers

290 |
291 |
292 |
293 |
MATCH (u:User)-[:PROVIDED]->(a:Answer)-[:ANSWERED]->(q:Question)
294 | RETURN u.display_name as user,COUNT(a) AS answers, avg(a.score) as avg_score
295 | ORDER BY answers DESC LIMIT 10;
296 |
297 |
298 |
299 |

Ordered by max score, filtered for a particular tag

300 |
301 |
302 |
303 |
MATCH (u:User)-[:PROVIDED]->(a:Answer)-[:ANSWERED]->
304 |       (q:Question)-[:TAGGED]->(:Tag {name:"cypher"})
305 | RETURN u.display_name as user,COUNT(a) AS answers, max(a.score) as max_score
306 | ORDER BY max_score DESC LIMIT 10;
307 |
308 |
309 |
310 |

What’s the shortest path between users?

311 |
312 |
313 |
314 |
MATCH path = allShortestPaths(
315 |   (u1:User {display_name:"alexanoid"})-[*]-(u2:User {display_name:"InverseFalcon"})
316 | )
317 | RETURN path LIMIT 1;
318 |
319 |
320 |
321 |
322 |
323 | 324 | 325 | 326 | 327 |
328 |

User Engagement

329 |
330 |
331 |
332 |

User engagement over time:

333 |
334 |
335 |
336 |
MATCH (u:User)-[:PROVIDED]->()-[:ANSWERED]->
337 |       (q:Question)-[:TAGGED]->(t:Tag)
338 | WHERE u.display_name = "InverseFalcon"
339 | RETURN apoc.date.format(q.creation_date,'s','yyyy-MM') as month,
340 |        count(distinct q) as count, collect(distinct t.name) as tags
341 | ORDER BY month asc
342 |
343 |
344 |
345 |
346 |
347 | 348 | 349 | 350 | 351 |
352 |

Unanswered Questions

353 |
354 |
355 |
356 |

What are the tags for unanswered questions?

357 |
358 |
359 |
360 |
MATCH (q:Question)-[:TAGGED]->(t:Tag)
361 | WHERE NOT t.name IN ['neo4j','cypher']
362 |   AND NOT (q)<-[:ANSWERED]-()
363 | RETURN t.name as tag, count(q) AS questions
364 | ORDER BY questions DESC LIMIT 10;
365 |
366 |
367 |
368 |
369 |
370 | 371 | 372 | 373 | 374 |
375 |

How are tags related to other tags?

376 |
377 |
378 |
379 |

Tag correlations:

380 |
381 |
382 |
383 |
MATCH (t1:Tag)<-[:TAGGED]-()-[:TAGGED]->(t2:Tag)
384 | WHERE id(t1) < id(t2) and t1.name <> 'neo4j' and t2.name <> 'neo4j'
385 | RETURN t1.name, t2.name,count(*) as freq
386 | ORDER BY freq desc LIMIT 10;
387 |
388 |
389 |
390 |
391 |
392 | 393 | 394 | 395 | 396 |
397 |

Virtual Graphs: Tags

398 |
399 |
400 |
401 |

Project tags via co-occurrence with virtual relationships.

402 |
403 |
404 |
405 |
MATCH (t1:Tag)<-[:TAGGED]-()-[:TAGGED]->(t2:Tag)
406 | WHERE id(t1) < id(t2) and t1.name <> 'neo4j' and t2.name <> 'neo4j'
407 | WITH t1, t2,count(*) as freq  where freq > 3
408 | RETURN t1,t2, apoc.create.vRelationship(t1,'OCCURRED',{freq:freq},t2) as rel
409 |
410 |
411 |
412 |

If you want to you can also materialize those relationships in the graph and then explore the data in Browser or Bloom visually forming clusters.

413 |
414 |
415 |
416 |
MATCH (t1:Tag)<-[:TAGGED]-()-[:TAGGED]->(t2:Tag)
417 | WHERE id(t1) < id(t2) and t1.name <> 'neo4j' and t2.name <> 'neo4j'
418 | WITH t1, t2,count(*) as freq  where freq > 3
419 | MERGE (t1)-[r:OCCURRED]-(t2) SET r.freq=freq
420 | RETURN count(*)
421 |
422 |
423 |
424 |
425 |
426 | 427 | 428 | 429 | 430 |
431 |

Virtual Graphs: Social Network

432 |
433 |
434 |
435 |

We can do something similar for commenters - what users keep on running into each other in the comments?

436 |
437 |
438 |
439 |
MATCH p1=(u1:User)-[:COMMENTED]->(c1:Comment)-[:COMMENTED_ON]-(q:Question)
440 | MATCH p2=(u2:User)-[:COMMENTED]->(c2:Comment)-[:COMMENTED_ON]-(q)
441 | WHERE id(u1) < id(u2)
442 | WITH u1, u2, count(distinct q) as freq
443 | WHERE freq > 2
444 | RETURN u1, u2, apoc.create.vRelationship(u1,'OCCURRED',{freq:freq},u2) as rel
445 |
446 |
447 |
448 |
449 |
450 | 451 | 452 | 453 | 454 |
455 |

Further Reading

456 |
457 | 472 |
473 |
474 |
475 |
-------------------------------------------------------------------------------- /documentation/stackoverflow.workspace.adoc: -------------------------------------------------------------------------------- 1 | == Social Network Analysis 2 | :images: {img} 3 | 4 | image::{img}/so_logo.svg[float=right,width=200px] 5 | 6 | https://stackoverflow.com/[Stack Overflow^], the massively popular question-and-answer site for developers. 7 | 8 | In this guide, you will learn: 9 | 10 | * How you can model social data as graphs 11 | * How to query the graph and answer questions using Cypher 12 | * How to use shortest path algorithms to understand relationships 13 | 14 | In the next section, you will import the Stack Overflow dataset into Neo4j. 15 | 16 | == Stack Overflow graph model 17 | [role=NX_TAB_NAV,tab=import] 18 | pagelaunch::[] 19 | 20 | The model contains data about users' questions, answers, and comments about Neo4j and Cypher. 21 | 22 | Nodes represent: 23 | 24 | * `Users` 25 | * `Questions` 26 | * `Answers` 27 | * `Comments` 28 | * `Tags` 29 | 30 | image::{img}/model.svg[width=100%] 31 | 32 | The relationships between these nodes show: 33 | 34 | . What `User` `ASKED` a `Question` 35 | . What `User` `PROVIDED` an `Answer` 36 | . What `User` `COMMENTED` to a `Comment` 37 | . What `Answer` `ANSWERED` a `Question` 38 | . What `Comment` is `COMMENTED_ON` a `Question` 39 | . What `Question` is `TAGGED` with a `Tag` 40 | 41 | button::Import the StackOverflow dataset[role=NX_IMPORT_LOAD,endpoint=https://neo4j-graph-examples.github.io/stackoverflow/data/stackoverflow-data-importer.zip] 42 | 43 | Click the highlight:import/import-run-import-button[Run import] button to import the data into Neo4j. 44 | 45 | button::Explore the data[role=NX_EXPLORE_SEARCH,search=User any Question any Answer] 46 | 47 | Take a minute to _Explore_ the data: 48 | 49 | . Find a `Question` node 50 | . Follow the `ASKED` relationship to the `User` who asked the question 51 | . Find an `Answer` node and follow the `ANSWERED` relationship to the `Question` it answered 52 | 53 | You can zoom in to see more detail and double-click on nodes to see their properties. 54 | 55 | [TIP] 56 | ==== 57 | You can also see a visual representation of the graph data model by running this Cypher query: 58 | 59 | [source,cypher] 60 | ---- 61 | CALL db.schema.visualization(); 62 | ---- 63 | ==== 64 | 65 | In the next section, you will use Cypher to query the graph. 66 | 67 | == Users and Questions 68 | [role=NX_TAB_NAV,tab=query] 69 | pagelaunch::[] 70 | 71 | Using the data model and Cypher, you can answer questions such as: 72 | 73 | * Which users asked the most questions? 74 | * Which users provided the most answers? 75 | * How are users related to each other? 76 | * Which questions are unanswered? 77 | * What answers are the most popular? 78 | 79 | The following query uses the `ASKED` relationship to find `User` and `Question` nodes. 80 | 81 | .Match User and Question nodes 82 | [source,cypher] 83 | ---- 84 | MATCH (u:User)-[a:ASKED]->(q:Question) 85 | RETURN u,a,q 86 | ---- 87 | 88 | [NOTE] 89 | ==== 90 | The arrow button icon:ArrowIcon[] copies the query to the clipboard. 91 | 92 | The play button icon:PlayIcon[] executes the query and returns the results. 93 | ==== 94 | 95 | Run the query and observe the results. 96 | You should note that the `ASKED` relationship is in the direction `User` to `Question`. 97 | 98 | [NOTE] 99 | .Challenge 100 | ==== 101 | Can you complete this query to find `User` and `Answer` nodes using the `PROVIDED` relationship: 102 | 103 | .Replace the `?`'s to complete the query 104 | [source,cypher] 105 | ---- 106 | MATCH (u:User)-[?]->(?) 107 | RETURN u,?,? 108 | ---- 109 | ==== 110 | 111 | [%collapsible] 112 | .Reveal the solution 113 | ==== 114 | [source,cypher] 115 | ---- 116 | MATCH (u:User)-[p:PROVIDED]->(a:Answer) 117 | RETURN u,p,a 118 | ---- 119 | ==== 120 | 121 | The query returns this graph of `User` and `Answer` nodes: 122 | 123 | image::{img}/user-provided-answer.png[A graph of User and Answer nodes. There are clear clusters of users and answers.] 124 | 125 | The distribution of nodes in the graph is not uniform - you can see that a few users have provided many answers. 126 | 127 | In the next section, you will write Cypher to discover who those users are. 128 | 129 | == Exploring Users 130 | 131 | As well as returning nodes, you can also return properties of nodes and aggregations (e.g. counts). 132 | 133 | .How many questions did users ask? 134 | [source,cypher] 135 | ---- 136 | MATCH (u:User)-[:ASKED]->(q:Question) 137 | RETURN u.display_name, count(*) AS questions 138 | ---- 139 | 140 | Adding an order and limiting the number of rows returns a top 10 list of users who asked the most questions. 141 | 142 | [source,cypher] 143 | ---- 144 | MATCH (u:User)-[:ASKED]->(q:Question) 145 | RETURN u.display_name, count(*) AS questions 146 | ORDER by questions DESC 147 | LIMIT 10 148 | ---- 149 | 150 | You can create a similar query for users who provided the most answers. 151 | 152 | .How many answers did users provide? 153 | [source,cypher] 154 | ---- 155 | MATCH (u:User)-[:PROVIDED]->(a:Answer) 156 | RETURN u.display_name, COUNT(a) AS answers 157 | ORDER BY answers DESC 158 | LIMIT 10 159 | ---- 160 | 161 | Users score answers on Stake Overflow. 162 | You can find each user's average score using the `avg` function. 163 | 164 | [source,cypher] 165 | ---- 166 | MATCH (u:User)-[:PROVIDED]->(a:Answer) 167 | RETURN u.display_name, COUNT(a) AS answers, avg(a.score) as avg_score 168 | ORDER BY answers DESC 169 | LIMIT 10 170 | ---- 171 | 172 | [NOTE] 173 | .Challenge 174 | ==== 175 | Can you modify the query to find: 176 | 177 | . The users with the highest average score by changing the `ORDER`. 178 | . The users with the highest maximum score by using the `max` function. 179 | ==== 180 | 181 | [%collapsible] 182 | .Reveal the solution 183 | ==== 184 | .Users with the highest average score 185 | [source,cypher] 186 | ---- 187 | MATCH (u:User)-[:PROVIDED]->(a:Answer) 188 | RETURN u.display_name, COUNT(a) AS answers, avg(a.score) as avg_score 189 | ORDER BY avg_score DESC 190 | LIMIT 10 191 | ---- 192 | 193 | .Users with the highest maximum score 194 | [source,cypher] 195 | ---- 196 | MATCH (u:User)-[:PROVIDED]->(a:Answer) 197 | RETURN u.display_name, COUNT(a) AS answers, max(a.score) as max_score 198 | ORDER BY max_score DESC 199 | LIMIT 10 200 | ---- 201 | ==== 202 | 203 | The graph also contains the comments that users make on questions. The `COMMENTED` relationship connects `User` and `Comment` nodes. 204 | 205 | Can you complete this query to use the `COMMENTED` relationship to find the users who commented the most? 206 | 207 | .Replace the `?`'s to complete the query 208 | [source,cypher] 209 | ---- 210 | MATCH (u:User)-[:?????????]->(c:Comment) 211 | RETURN u.display_name, COUNT(?) AS comments 212 | ORDER BY ???????? DESC 213 | LIMIT 10 214 | ---- 215 | 216 | [%collapsible] 217 | .Reveal the solution 218 | ==== 219 | [source,cypher] 220 | ---- 221 | MATCH (u:User)-[:COMMENTED]->(c:Comment) 222 | RETURN u.display_name, COUNT(c) AS comments 223 | ORDER BY comments DESC 224 | LIMIT 10 225 | ---- 226 | ==== 227 | 228 | In the next section, you will use Cypher to filter the results returned by a query. 229 | 230 | == Filtering Results 231 | 232 | In the previous section, you used Cypher to find the users who asked the most questions and provided the most answers. 233 | 234 | The user `"A. L"` asked the most questions, and `"cybersam"` provided the most answers. 235 | 236 | You can filter the results using a filter on `MATCH` or by using a `WHERE` clause. 237 | 238 | For example, find all the questions asked by `"A. L"`. 239 | 240 | .Filter on MATCH 241 | [source,cypher] 242 | ---- 243 | MATCH (u:User {display_name: "A. L"})-[a:ASKED]->(q:Question) 244 | RETURN u,a,q 245 | ---- 246 | 247 | .Using a WHERE clause 248 | [source,cypher] 249 | ---- 250 | MATCH (u:User)-[a:ASKED]->(q:Question) 251 | WHERE u.display_name = "A. L" 252 | RETURN u,a,q 253 | ---- 254 | 255 | Extending the query to include the `Tag` nodes through the `TAGGED` relationship allows you to filter for questions with a particular tag. 256 | 257 | [source,cypher] 258 | ---- 259 | MATCH (u:User)-[a:ASKED]->(q:Question)-[tg:TAGGED]->(t:Tag) 260 | WHERE u.display_name = "A. L" AND t.name = "cypher" 261 | RETURN u,a,q,tg,t 262 | ---- 263 | 264 | [NOTE] 265 | .Challenge 266 | ==== 267 | Can you find comments made by `"cybersam"` on questions tagged with `"neo4j"`? 268 | 269 | You will have to traverse the graph using the `User`, `Comment`, `Question`, and `Tag` nodes, through the `COMMENTED`, `COMMENTED_ON`, and `TAGGED` relationships. 270 | ==== 271 | 272 | [%collapsible] 273 | .Reveal the solution 274 | ==== 275 | .Comments made by "cybersam" on questions tagged with "neo4j" 276 | [source,cypher] 277 | ---- 278 | MATCH (u:User)-[ct:COMMENTED]->(c:Comment)-[co:COMMENTED_ON]->(q:Question)-[tg:TAGGED]->(t:Tag) 279 | WHERE u.display_name = "cybersam" AND t.name = "neo4j" 280 | RETURN u,ct,c,co,q,tg,t 281 | ---- 282 | ==== 283 | 284 | In the next section, you will use shortest path functions to understand how users relate to each other. 285 | 286 | == Shortest paths 287 | 288 | As you have seen, user nodes are related to each other through the questions they ask, the answers they provide, and the comments they make. 289 | 290 | You can use shortest path functions to find the shortest path between users. 291 | 292 | For example, you can use `shortestPath` function to find the shortest route between the user `"A. L"` who asked the most questions, and `"cybersam"` who provided the most answers. 293 | 294 | [source,cypher] 295 | ---- 296 | MATCH path = shortestPath( 297 | (u1:User {display_name:"A. L"})-[*]-(u2:User {display_name:"cybersam"}) 298 | ) 299 | RETURN path; 300 | ---- 301 | 302 | The query is defined as the path `(u1:User {display_name:"A. L"})-[*]-(u2:User {display_name:"cybersam"})`. 303 | 304 | The `[*]` is a wildcard that matches any relationship type. 305 | 306 | The query returns the first shortest path it finds. 307 | You can return *all* the shortest paths using the `allShortestPaths` function. 308 | 309 | [source,cypher] 310 | ---- 311 | MATCH path = allShortestPaths( 312 | (u1:User {display_name:"A. L"})-[*]-(u2:User {display_name:"cybersam"}) 313 | ) 314 | RETURN path; 315 | ---- 316 | 317 | [NOTE] 318 | .Challenge 319 | ==== 320 | Can you identify two other users in the graph and find the shortest path between them? 321 | ==== 322 | 323 | In the next section, you will explore some Cypher queries and review the results. 324 | 325 | == Exploring the Graph 326 | 327 | In this section, you will review and run some Cypher queries which explore the graph. 328 | 329 | Review each Cypher query before running and observing the results. 330 | 331 | === Unanswered Questions 332 | 333 | What questions remain unanswered? 334 | In the graph, unanswered questions do not have an `ANSWERED` relationship. 335 | 336 | The absence of a relationship is a valid path in Cypher. 337 | You can use the `NOT` keyword to negate a pattern. 338 | 339 | [source,cypher] 340 | ---- 341 | MATCH (q:Question) 342 | WHERE NOT (q)<-[:ANSWERED]-() 343 | RETURN q 344 | ---- 345 | 346 | What type of questions relating to "neo4j" and "cypher" remain unanswered? 347 | 348 | This query aggregates the results by the `Tag` node to show which tags have the most unanswered questions. 349 | 350 | [source,cypher] 351 | ---- 352 | MATCH (q:Question)-[:TAGGED]->(t:Tag) 353 | WHERE NOT t.name IN ['neo4j','cypher'] 354 | AND NOT (q)<-[:ANSWERED]-() 355 | RETURN t.name as tag, count(q) AS questions 356 | ORDER BY questions DESC LIMIT 10; 357 | ---- 358 | 359 | === User Engagement 360 | 361 | By aggregating when users interacted with Stack Overflow, you can see how active they are. 362 | 363 | For example, you can aggregate the number of questions answered by month. 364 | 365 | [source,cypher] 366 | ---- 367 | MATCH (u:User)-[:PROVIDED]->()-[:ANSWERED]-> 368 | (q:Question)-[:TAGGED]->(t:Tag) 369 | WHERE u.display_name = "cybersam" 370 | RETURN apoc.date.format(q.creation_date,'s','yyyy-MM') as month, 371 | count(distinct q) as count, collect(distinct t.name) as tags 372 | ORDER BY month asc 373 | ---- 374 | 375 | === How are tags related to other tags? 376 | 377 | Understanding how users tag questions can help you see patterns. 378 | 379 | For example, how tags correlate to each other: 380 | 381 | [source,cypher] 382 | ---- 383 | MATCH (t1:Tag)<-[:TAGGED]-()-[:TAGGED]->(t2:Tag) 384 | WHERE id(t1) < id(t2) and t1.name <> 'neo4j' and t2.name <> 'neo4j' 385 | RETURN t1.name, t2.name,count(*) as freq 386 | ORDER BY freq desc LIMIT 10; 387 | ---- 388 | 389 | In the next section, you will find more resources to continue learning about Neo4j. 390 | 391 | == Next steps 392 | 393 | Congratulations on completing this guide! 394 | 395 | You can continue your Neo4j learning journey with these resources: 396 | 397 | link:https://neo4j.com/graphacademy[GraphAcademy - completely free online courses^] 398 | 399 | link:https://neo4j.com/videos/exploring-stackoverflow-data-with-michael-hunger-twitch-stream/[Exploring StackOverflow data^] with Michael Hunger – Twitch stream. 400 | 401 | link:https://towardsdatascience.com/tagoverflow-correlating-tags-in-stackoverflow-66e2b0e1117b[TagOverflow^] — Correlating Tags in StackOverflow. 402 | 403 | The source code and data dumps for this guide are available in the github repository - link:https://github.com/neo4j-graph-examples/stackoverflow[neo4j-graph-examples/stackoverflow^]. 404 | -------------------------------------------------------------------------------- /relate.project-install.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "stackoverflow", 3 | "icon": "documentation/img/icon.svg", 4 | "title": "StackOverflow Data", 5 | "description":"StackOverflow Analysis - Questions, Answers, Tags, Comments", 6 | "dbms": [ 7 | { 8 | "scriptFile":"scripts/import.cypher", 9 | "targetNeo4jVersion": "^4.3", 10 | "plugins": ["apoc"] 11 | }, 12 | { 13 | "dumpFile": "data/stackoverflow-43.dump", 14 | "targetNeo4jVersion":">=4.3.0 <5.0.0", 15 | "plugins": ["apoc"] 16 | }, 17 | { 18 | "dumpFile": "data/stackoverflow-50.dump", 19 | "targetNeo4jVersion":">=5.0.0 <6.0.0", 20 | "plugins": ["apoc"] 21 | } 22 | ] 23 | } 24 | -------------------------------------------------------------------------------- /scripts/import.cypher: -------------------------------------------------------------------------------- 1 | // cypher import for stackoverflow dataset. 2 | // note that stackoverflow considers > 30 request/sec per IP to be very abusive and will throttle the IP maing such a request 3 | // the import query looks for results related to the "neo4j" and "cypher" tags 4 | // The query has an added filter which allows us to get the comments and the answers (&filter=!5-i6Zw8Y)4W7vpy91PMYsKM-k9yzEsSC1_Uxlf) 5 | 6 | CREATE CONSTRAINT FOR (q:Question) REQUIRE q.uuid IS UNIQUE; 7 | CREATE CONSTRAINT FOR (t:Tag) REQUIRE t.name IS UNIQUE; 8 | CREATE CONSTRAINT FOR (u:User) REQUIRE u.uuid IS UNIQUE; 9 | CREATE CONSTRAINT FOR (a:Answer) REQUIRE a.uuid IS UNIQUE; 10 | CREATE CONSTRAINT FOR (c:Comment) REQUIRE c.uuid IS UNIQUE; 11 | 12 | 13 | // look for several pages of questions 14 | WITH ["neo4j","cypher"] as tags 15 | UNWIND tags as tagName 16 | UNWIND range(1,10) as page // careful with throttling 17 | WITH "https://api.stackexchange.com/2.3/questions?page="+page+"&pagesize=100&order=desc&sort=creation&tagged="+tagName+"&site=stackoverflow&filter=!5-i6Zw8Y)4W7vpy91PMYsKM-k9yzEsSC1_Uxlf" as url 18 | CALL apoc.load.json(url) YIELD value 19 | CALL apoc.util.sleep(250) 20 | UNWIND value.items AS q 21 | // create the questions 22 | MERGE (question:Question {uuid:q.question_id}) 23 | ON CREATE SET question.title = q.title, 24 | question.link = q.share_link, 25 | question.creation_date = q.creation_date, 26 | question.accepted_answer_id=q.accepted_answer_id, 27 | question.view_count=q.view_count, 28 | question.answer_count=q.answer_count, 29 | question.body_markdown=q.body_markdown 30 | // who asked the question 31 | MERGE (owner:User {uuid:coalesce(q.owner.user_id,'deleted')}) 32 | ON CREATE SET owner.display_name = q.owner.display_name 33 | MERGE (owner)-[:ASKED]->(question) 34 | // what tags do the questions have 35 | FOREACH (tagName IN q.tags | 36 | MERGE (tag:Tag {name:tagName}) 37 | ON CREATE SET tag.link = "https://stackoverflow.com/questions/tagged/" + tag.name 38 | MERGE (question)-[:TAGGED]->(tag)) 39 | // who answered the questions? 40 | FOREACH (a IN q.answers | 41 | MERGE (question)<-[:ANSWERED]-(answer:Answer {uuid:a.answer_id}) 42 | ON CREATE SET answer.is_accepted = a.is_accepted, 43 | answer.link=a.share_link, 44 | answer.title=a.title, 45 | answer.body_markdown=a.body_markdown, 46 | answer.score=a.score, 47 | answer.favorite_score=a.favorite_score, 48 | answer.view_count=a.view_count 49 | MERGE (answerer:User {uuid:coalesce(a.owner.user_id,'deleted')}) 50 | ON CREATE SET answerer.display_name = a.owner.display_name 51 | MERGE (answer)<-[:PROVIDED]-(answerer) 52 | ) 53 | FOREACH (c in q.comments | 54 | MERGE (question)<-[:COMMENTED_ON]-(comment:Comment {uuid:c.comment_id}) 55 | ON CREATE SET comment.link=c.link, comment.score=c.score 56 | MERGE (commenter:User {uuid:coalesce(c.owner.user_id,'deleted')}) 57 | ON CREATE SET commenter.display_name = c.owner.display_name 58 | MERGE (comment)<-[:COMMENTED]-(commenter) 59 | ); 60 | --------------------------------------------------------------------------------